diff options
-rw-r--r-- | dev-ml/markup/Manifest | 1 | ||||
-rw-r--r-- | dev-ml/markup/files/test.patch | 273 | ||||
-rw-r--r-- | dev-ml/markup/files/uutf.patch | 1085 | ||||
-rw-r--r-- | dev-ml/markup/markup-0.7.2-r1.ebuild | 44 |
4 files changed, 0 insertions, 1403 deletions
diff --git a/dev-ml/markup/Manifest b/dev-ml/markup/Manifest index 1819e63ec676..29247e73873e 100644 --- a/dev-ml/markup/Manifest +++ b/dev-ml/markup/Manifest @@ -1,2 +1 @@ -DIST markup-0.7.2.tar.gz 275010 SHA256 630a737ab6113e17999aacfd55f73b6671211d7980be86f0c711c0b385887c34 SHA512 72a87f54692a0b751c23e6b52bc4ecaa68334b0c6c067793cbf5b011b7d06ce7563f9aa2daeef3553ab48bb6cb9e592587b5a4f37279eaef7b45e19e5b372f73 WHIRLPOOL 679a01c5d197eadf1a8b74247e276405c182acff4c7781b577fbad9fcdc33be164ff81222e79c4e0e5193d1295ee4896ddda547cce1712bfb5ebda050f5bf5ac DIST markup-0.7.3.tar.gz 275094 SHA256 e1eb3562e0d26ccc33aa5dbe802e4210dbd7c30a8e69b6098b825afb11bb6af1 SHA512 e4577e438241d58c728507c88f14b7f029dbc4aa6b9c5dbf78f03b6c083a430026158c3146a88c14c9cd90a242b1bb4ed838b150bb89433fb6a6f673e5d2bb66 WHIRLPOOL ad967738706d4c017f266ecdef7b0772ce0bc17f9bc7dda228ffabc9cccccc88cde69337e063577d1fcda1e93cf4a7f18bbbf09709ee82f0a4b8382f5e339d2e diff --git a/dev-ml/markup/files/test.patch b/dev-ml/markup/files/test.patch deleted file mode 100644 index f2a525764831..000000000000 --- a/dev-ml/markup/files/test.patch +++ /dev/null @@ -1,273 +0,0 @@ -Index: markup.ml-0.7.2/test/test_encoding.ml -=================================================================== ---- markup.ml-0.7.2.orig/test/test_encoding.ml -+++ markup.ml-0.7.2/test/test_encoding.ml -@@ -15,9 +15,9 @@ let test_ucs_4 (f : Encoding.t) name s1 - expect_error (1, 2) (`Decoding_error (bad_bytes, name)) - begin fun report -> - let chars = s1 |> string |> f ~report in -- next_option chars ok (assert_equal (Some (Char.code 'f'))); -+ next_option chars ok (assert_equal (Some ((Uchar.of_int (Char.code 'f'))))); - next_option chars ok (assert_equal (Some Uutf.u_rep)); -- next_option chars ok (assert_equal (Some (Char.code 'o'))); -+ next_option chars ok (assert_equal (Some ((Uchar.of_int (Char.code 'o'))))); - next_option chars ok (assert_equal None); - next_option chars ok (assert_equal None) - end; -@@ -25,9 +25,9 @@ let test_ucs_4 (f : Encoding.t) name s1 - expect_error (2, 2) (`Decoding_error ("\x00\x00\x00", name)) - begin fun report -> - let chars = s2 |> string |> f ~report in -- next_option chars ok (assert_equal (Some (Char.code 'f'))); -- next_option chars ok (assert_equal (Some 0x000A)); -- next_option chars ok (assert_equal (Some (Char.code 'o'))); -+ next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'f')))); -+ next_option chars ok (assert_equal (Some (Uchar.of_int 0x000A))); -+ next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'o')))); - next_option chars ok (assert_equal (Some Uutf.u_rep)); - next_option chars ok (assert_equal None); - next_option chars ok (assert_equal None) -@@ -38,12 +38,12 @@ let tests = [ - let s = "\xef\xbb\xbffoo\xf0\x9f\x90\x99bar\xa0more" in - expect_error (1, 8) (`Decoding_error ("\xa0", "utf-8")) begin fun report -> - let chars = s |> string |> utf_8 ~report in -- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); -- next_option chars ok (assert_equal (Some 0x1F419)); -- next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r'])); -+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'])); -+ next_option chars ok (assert_equal (Some (Uchar.of_int 0x1F419))); -+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r'])); - next_option chars ok (assert_equal (Some Uutf.u_rep)); - next_n 4 chars ok -- (assert_equal (List.map Char.code ['m'; 'o'; 'r'; 'e'])); -+ (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['m'; 'o'; 'r'; 'e'])); - next_option chars ok (assert_equal None); - next_option chars ok (assert_equal None) - end); -@@ -53,11 +53,11 @@ let tests = [ - expect_error (1, 6) (`Decoding_error ("\xdc\x19", "utf-16be")) - begin fun report -> - let chars = s |> string |> utf_16be ~report in -- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); -- next_option chars ok (assert_equal (Some 0x1F419)); -- next_option chars ok (assert_equal (Some (Char.code 'b'))); -+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'])); -+ next_option chars ok (assert_equal (Some (Uchar.of_int 0x1F419))); -+ next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'b')))); - next_option chars ok (assert_equal (Some Uutf.u_rep)); -- next_n 16 chars ok (assert_equal (List.map Char.code ['a'; 'r'])); -+ next_n 16 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['a'; 'r'])); - next_option chars ok (assert_equal None); - next_option chars ok (assert_equal None) - end); -@@ -67,11 +67,11 @@ let tests = [ - expect_error (1, 6) (`Decoding_error ("\x19\xdc", "utf-16le")) - begin fun report -> - let chars = s |> string |> utf_16le ~report in -- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); -- next_option chars ok (assert_equal (Some 0x1F419)); -- next_option chars ok (assert_equal (Some (Char.code 'b'))); -+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'])); -+ next_option chars ok (assert_equal (Some (Uchar.of_int 0x1F419))); -+ next_option chars ok (assert_equal (Some (Uchar.of_int (Char.code 'b')))); - next_option chars ok (assert_equal (Some Uutf.u_rep)); -- next_n 16 chars ok (assert_equal (List.map Char.code ['a'; 'r'])); -+ next_n 16 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['a'; 'r'])); - next_option chars ok (assert_equal None); - next_option chars ok (assert_equal None) - end); -@@ -79,7 +79,7 @@ let tests = [ - ("encoding.iso_8859_1" >:: fun _ -> - let chars = string "foo\xa0" |> iso_8859_1 in - next_n 4 chars -- ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'; '\xa0'])); -+ ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'; '\xa0'])); - next_option chars ok (assert_equal None); - next_option chars ok (assert_equal None)); - -@@ -88,26 +88,26 @@ let tests = [ - expect_error (1, 4) (`Decoding_error ("\xa0", "us-ascii")) - begin fun report -> - let chars = s |> string |> us_ascii ~report in -- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); -+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'])); - next_option chars ok (assert_equal (Some Uutf.u_rep)); -- next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r'])); -+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r'])); - next_option chars ok (assert_equal None); - next_option chars ok (assert_equal None) - end); - - ("encoding.windows_1251" >:: fun _ -> - let chars = string "foo\xe0\xe1\xe2bar" |> windows_1251 in -- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); -- next_n 3 chars ok (assert_equal [0x0430; 0x0431; 0x0432]); -- next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r'])); -+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'])); -+ next_n 3 chars ok (assert_equal [Uchar.of_int 0x0430; Uchar.of_int 0x0431; Uchar.of_int 0x0432]); -+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r'])); - next_option chars ok (assert_equal None); - next_option chars ok (assert_equal None)); - - ("encoding.windows_1252" >:: fun _ -> - let chars = string "foo\x80\x83bar" |> windows_1252 in -- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); -- next_n 2 chars ok (assert_equal [0x20AC; 0x0192]); -- next_n 3 chars ok (assert_equal (List.map Char.code ['b'; 'a'; 'r'])); -+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'])); -+ next_n 2 chars ok (assert_equal [Uchar.of_int 0x20AC; Uchar.of_int 0x0192]); -+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['b'; 'a'; 'r'])); - next_option chars ok (assert_equal None); - next_option chars ok (assert_equal None)); - -@@ -137,7 +137,7 @@ let tests = [ - - ("encoding.ebcdic" >:: fun _ -> - let chars = string "\x86\x96\x96" |> ebcdic in -- next_n 3 chars ok (assert_equal (List.map Char.code ['f'; 'o'; 'o'])); -+ next_n 3 chars ok (assert_equal (List.map (fun x -> Uchar.of_int (Char.code x)) ['f'; 'o'; 'o'])); - next_option chars ok (assert_equal None); - next_option chars ok (assert_equal None)); - ] -Index: markup.ml-0.7.2/test/test_html_tokenizer.ml -=================================================================== ---- markup.ml-0.7.2.orig/test/test_html_tokenizer.ml -+++ markup.ml-0.7.2/test/test_html_tokenizer.ml -@@ -134,7 +134,7 @@ let tests = [ - expect "�" - [ 1, 1, E (`Bad_token ("�", - reference, "out of range")); -- 1, 1, S (`Char Uutf.u_rep); -+ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep)); - 1, 35, S `EOF]; - - expect "�" -@@ -142,22 +142,22 @@ let tests = [ - reference, "missing ';' at end")); - 1, 1, E (`Bad_token ("�", - reference, "out of range")); -- 1, 1, S (`Char Uutf.u_rep); -+ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep)); - 1, 34, S `EOF]; - - expect "�" - [ 1, 1, E (`Bad_token ("�", reference, "out of range")); -- 1, 1, S (`Char Uutf.u_rep); -+ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep)); - 1, 9, S `EOF]; - - expect "�" - [ 1, 1, E (`Bad_token ("�", reference, "out of range")); -- 1, 1, S (`Char Uutf.u_rep); -+ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep)); - 1, 11, S `EOF]; - - expect "�" - [ 1, 1, E (`Bad_token ("�", reference, "out of range")); -- 1, 1, S (`Char Uutf.u_rep); -+ 1, 1, S (`Char (Uchar.to_int Uutf.u_rep)); - 1, 5, S `EOF]; - - expect "" -@@ -264,7 +264,7 @@ let tests = [ - expect ~state:`RCDATA "f\x00</foo>" - ([ 1, 1, S (`Char 0x66); - 1, 2, E (`Bad_token ("U+0000", "content", "null")); -- 1, 2, S (`Char Uutf.u_rep)] @ -+ 1, 2, S (`Char (Uchar.to_int Uutf.u_rep))] @ - (char_sequence ~start:3 "</foo>")); - - expect ~state:`RCDATA "<title>f</title >" -@@ -302,7 +302,7 @@ let tests = [ - expect ~state:`RAWTEXT "f\x00</foo>" - ([ 1, 1, S (`Char 0x66); - 1, 2, E (`Bad_token ("U+0000", "content", "null")); -- 1, 2, S (`Char Uutf.u_rep)] @ -+ 1, 2, S (`Char (Uchar.to_int Uutf.u_rep))] @ - (char_sequence ~start:3 "</foo>"))); - - ("html.tokenizer.script-data" >:: fun _ -> -@@ -330,7 +330,7 @@ let tests = [ - expect ~state:`Script_data "f<!--o\x00o" - ((char_sequence ~no_eof:true "f<!--o") @ - [1, 7, E (`Bad_token ("U+0000", "script", "null")); -- 1, 7, S (`Char Uutf.u_rep); -+ 1, 7, S (`Char (Uchar.to_int Uutf.u_rep)); - 1, 8, S (`Char 0x6F); - 1, 9, E (`Unexpected_eoi "script"); - 1, 9, S `EOF]); -@@ -363,7 +363,7 @@ let tests = [ - expect ~state:`Script_data "f<!--a-\x00-" - ((char_sequence ~no_eof:true "f<!--a-") @ - [ 1, 8, E (`Bad_token ("U+0000", "script", "null")); -- 1, 8, S (`Char Uutf.u_rep); -+ 1, 8, S (`Char (Uchar.to_int Uutf.u_rep)); - 1, 9, S (`Char 0x02D); - 1, 10, E (`Unexpected_eoi "script"); - 1, 10, S `EOF]); -@@ -371,7 +371,7 @@ let tests = [ - expect ~state:`Script_data "f<!--a--\x00--" - ((char_sequence ~no_eof:true "f<!--a--") @ - [ 1, 9, E (`Bad_token ("U+0000", "script", "null")); -- 1, 9, S (`Char Uutf.u_rep); -+ 1, 9, S (`Char (Uchar.to_int Uutf.u_rep)); - 1, 10, S (`Char 0x02D); - 1, 11, S (`Char 0x02D); - 1, 12, E (`Unexpected_eoi "script"); -@@ -380,14 +380,14 @@ let tests = [ - expect ~state:`Script_data "f<!--<script>\x00" - ((char_sequence ~no_eof:true "f<!--<script>") @ - [ 1, 14, E (`Bad_token ("U+0000", "script", "null")); -- 1, 14, S (`Char Uutf.u_rep); -+ 1, 14, S (`Char (Uchar.to_int Uutf.u_rep)); - 1, 15, E (`Unexpected_eoi "script"); - 1, 15, S `EOF]); - - expect ~state:`Script_data "f<!--<script>-\x00-" - ((char_sequence ~no_eof:true "f<!--<script>-") @ - [ 1, 15, E (`Bad_token ("U+0000", "script", "null")); -- 1, 15, S (`Char Uutf.u_rep); -+ 1, 15, S (`Char (Uchar.to_int Uutf.u_rep)); - 1, 16, S (`Char 0x2D); - 1, 17, E (`Unexpected_eoi "script"); - 1, 17, S `EOF]); -@@ -395,7 +395,7 @@ let tests = [ - expect ~state:`Script_data "f<!--<script>--\x00--" - ((char_sequence ~no_eof:true "f<!--<script>--") @ - [ 1, 16, E (`Bad_token ("U+0000", "script", "null")); -- 1, 16, S (`Char Uutf.u_rep); -+ 1, 16, S (`Char (Uchar.to_int Uutf.u_rep)); - 1, 17, S (`Char 0x2D); - 1, 18, S (`Char 0x2D); - 1, 19, E (`Unexpected_eoi "script"); -@@ -413,7 +413,7 @@ let tests = [ - expect ~state:`Script_data "f\x00</foo>" - ([ 1, 1, S (`Char 0x66); - 1, 2, E (`Bad_token ("U+0000", "content", "null")); -- 1, 2, S (`Char Uutf.u_rep)] @ -+ 1, 2, S (`Char (Uchar.to_int Uutf.u_rep))] @ - (char_sequence ~start:3 "</foo>"))); - - ("html.tokenizer.plaintext" >:: fun _ -> -@@ -424,7 +424,7 @@ let tests = [ - expect ~state:`PLAINTEXT "f\x00</foo>" - ([ 1, 1, S (`Char 0x66); - 1, 2, E (`Bad_token ("U+0000", "content", "null")); -- 1, 2, S (`Char Uutf.u_rep)] @ -+ 1, 2, S (`Char (Uchar.to_int Uutf.u_rep))] @ - (char_sequence ~start:3 "</foo>"))); - - ("html.tokenizer.comment" >:: fun _ -> -Index: markup.ml-0.7.2/test/test_input.ml -=================================================================== ---- markup.ml-0.7.2.orig/test/test_input.ml -+++ markup.ml-0.7.2/test/test_input.ml -@@ -71,7 +71,7 @@ let tests = [ - end); - - ("input.bom" >:: fun _ -> -- [0xFEFF; 0x66] -+ [Uchar.of_int 0xFEFF; Uchar.of_int 0x66] - |> of_list - |> preprocess is_valid_xml_char Error.ignore_errors - |> fst diff --git a/dev-ml/markup/files/uutf.patch b/dev-ml/markup/files/uutf.patch deleted file mode 100644 index f561084ee454..000000000000 --- a/dev-ml/markup/files/uutf.patch +++ /dev/null @@ -1,1085 +0,0 @@ -Index: markup.ml-0.7.2/src/common.ml -=================================================================== ---- markup.ml-0.7.2.orig/src/common.ml -+++ markup.ml-0.7.2/src/common.ml -@@ -134,7 +134,7 @@ let is_printable = is_in_range 0x0020 0x - let char c = - if is_printable c then begin - let buffer = Buffer.create 4 in -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int c); - Buffer.contents buffer - end - else -Index: markup.ml-0.7.2/src/detect.ml -=================================================================== ---- markup.ml-0.7.2.orig/src/detect.ml -+++ markup.ml-0.7.2/src/detect.ml -@@ -222,7 +222,7 @@ let meta_tag_prescan = - let rec iterate () = - next source throw (fun () -> k "") (function - | c when c = quote -> k (Buffer.contents buffer) -- | c -> add_utf_8 buffer (Char.code (Char.lowercase c)); iterate ()) -+ | c -> add_utf_8 buffer (Uchar.of_int (Char.code (Char.lowercase c))); iterate ()) - in - iterate () - in -@@ -236,7 +236,7 @@ let meta_tag_prescan = - push source c; - k (Buffer.contents buffer) - | c -> -- add_utf_8 buffer (Char.code (Char.lowercase c)); -+ add_utf_8 buffer (Uchar.of_int (Char.code (Char.lowercase c))); - iterate ()) - in - iterate () -@@ -315,7 +315,7 @@ let meta_tag_prescan = - k (Buffer.contents buffer) - - | Some c -> -- add_utf_8 buffer (Char.code (Char.lowercase c)); -+ add_utf_8 buffer (Uchar.of_int (Char.code (Char.lowercase c))); - iterate () - end - in -Index: markup.ml-0.7.2/src/encoding.ml -=================================================================== ---- markup.ml-0.7.2.orig/src/encoding.ml -+++ markup.ml-0.7.2/src/encoding.ml -@@ -4,7 +4,7 @@ - open Common - open Kstream - --type t = ?report:Error.parse_handler -> char Kstream.t -> int Kstream.t -+type t = ?report:Error.parse_handler -> char Kstream.t -> Uchar.t Kstream.t - - let wrap f = fun ?(report = Error.ignore_errors) s -> f report s - -@@ -24,8 +24,8 @@ let _uutf_decoder encoding name = - k Uutf.u_rep) - | `Await -> - next bytes throw -- (fun () -> Uutf.Manual.src decoder "" 0 0; run ()) -- (fun c -> Uutf.Manual.src decoder (String.make 1 c) 0 1; run ()) -+ (fun () -> Uutf.Manual.src decoder Bytes.empty 0 0; run ()) -+ (fun c -> Uutf.Manual.src decoder (Bytes.make 1 c) 0 1; run ()) - in - run ()) - |> make) -@@ -87,7 +87,7 @@ let _ucs_4_decoder arrange name = - let skip = - if !first then begin - first := false; -- scalar = Uutf.u_bom -+ scalar = Uchar.to_int Uutf.u_bom - end - else - false -@@ -96,9 +96,9 @@ let _ucs_4_decoder arrange name = - if skip then run () - else - if scalar = 0x000A then -- newline k scalar -+ newline k (Uchar.of_int scalar) - else -- char k scalar -+ char k (Uchar.of_int scalar) - - | [] -> empty () - -@@ -130,7 +130,7 @@ let code_page table = - - (fun _ bytes -> - (fun throw empty k -> -- next bytes throw empty (fun c -> k table.(Char.code c))) -+ next bytes throw empty (fun c -> k (Uchar.of_int table.(Char.code c)))) - |> make) - |> wrap - -Index: markup.ml-0.7.2/src/html_parser.ml -=================================================================== ---- markup.ml-0.7.2.orig/src/html_parser.ml -+++ markup.ml-0.7.2/src/html_parser.ml -@@ -1022,7 +1022,7 @@ let parse requested_context report (toke - let frameset_ok = ref true in - let head_seen = ref false in - -- let add_character = Text.add text in -+ let add_character = (fun x y -> Text.add text x (Uchar.of_int y)) in - - set_foreign (fun () -> - Stack.current_element_is_foreign context open_elements); -@@ -2717,7 +2717,7 @@ let parse requested_context report (toke - | l, `Char 0 -> - report l (`Bad_token ("U+0000", "foreign content", "null")) !throw - (fun () -> -- add_character l Uutf.u_rep; -+ add_character l (Uchar.to_int Uutf.u_rep); - mode ()) - - | l, `Char (0x0009 | 0x000A | 0x000C | 0x000D | 0x0020 as c) -> -Index: markup.ml-0.7.2/src/html_tokenizer.ml -=================================================================== ---- markup.ml-0.7.2.orig/src/html_tokenizer.ml -+++ markup.ml-0.7.2/src/html_tokenizer.ml -@@ -252,7 +252,7 @@ let tokenize report (input, get_location - report location - (`Bad_token (prefix ^ text ^ semicolon, "character reference", - "Windows-1252 character")) !throw (fun () -> -- k (Some (`One n))) -+ k (Some (`One (Uchar.of_int n)))) - - else - match n with -@@ -268,9 +268,9 @@ let tokenize report (input, get_location - (`Bad_token (prefix ^ text ^ semicolon, - "character reference", - "invalid HTML character")) !throw (fun () -> -- k (Some (`One n))) -+ k (Some (`One (Uchar.of_int n)))) - -- | n -> k (Some (`One n)) -+ | n -> k (Some (`One (Uchar.of_int n))) - end - end - in -@@ -366,6 +366,10 @@ let tokenize report (input, get_location - | _ -> unterminated ()) - in - -+ let ma = function -+ a, `One x -> (a, `One (Uchar.of_int x)) -+ | a, `Two (x,y) -> (a, `Two (Uchar.of_int x, Uchar.of_int y)) in -+ - let rec match_named best matched replace candidate = - next_option input !throw (function - | None -> finish best matched replace -@@ -377,8 +381,8 @@ let tokenize report (input, get_location - | `None -> finish best matched (v::replace) - | `Continue -> match_named best matched (v::replace) candidate - | `Match_and_continue m -> -- match_named (Some m) (v::(replace @ matched)) [] candidate -- | `Match m -> finish (Some m) (v::matched) []) -+ match_named (Some (ma m)) (v::(replace @ matched)) [] candidate -+ | `Match m -> finish (Some (ma m)) (v::matched) []) - in - match_named None [] [] "") - -@@ -409,11 +413,11 @@ let tokenize report (input, get_location - emit (l, `Char 0x0026) state - - | Some (`One c) -> -- emit (l, `Char c) state -+ emit (l, `Char (Uchar.to_int c)) state - - | Some (`Two (c, c')) -> -- emit (l, `Char c) (fun () -> -- emit (l, `Char c') state) -+ emit (l, `Char (Uchar.to_int c)) (fun () -> -+ emit (l, `Char (Uchar.to_int c')) state) - end - - (* 8.2.4.3. *) -@@ -427,7 +431,7 @@ let tokenize report (input, get_location - - | Some (l, 0) -> - report l (`Bad_token ("U+0000", "content", "null")) !throw (fun () -> -- emit (l, `Char Uutf.u_rep) rcdata_state) -+ emit (l, `Char (Uchar.to_int Uutf.u_rep)) rcdata_state) - - | None -> - emit_eof () -@@ -444,7 +448,7 @@ let tokenize report (input, get_location - - | Some (l, 0) -> - report l (`Bad_token ("U+0000", "content", "null")) !throw (fun () -> -- emit (l, `Char Uutf.u_rep) rawtext_state) -+ emit (l, `Char (Uchar.to_int Uutf.u_rep)) rawtext_state) - - | None -> - emit_eof () -@@ -461,7 +465,7 @@ let tokenize report (input, get_location - - | Some (l, 0) -> - report l (`Bad_token ("U+0000", "content", "null")) !throw (fun () -> -- emit_character l Uutf.u_rep script_data_state) -+ emit_character l (Uchar.to_int Uutf.u_rep) script_data_state) - - | None -> - emit_eof () -@@ -475,7 +479,7 @@ let tokenize report (input, get_location - next_option input !throw begin function - | Some (l, 0) -> - report l (`Bad_token ("U+0000", "content", "null")) !throw (fun () -> -- emit (l, `Char Uutf.u_rep) plaintext_state) -+ emit (l, `Char (Uchar.to_int Uutf.u_rep)) plaintext_state) - - | None -> - emit_eof () -@@ -501,7 +505,7 @@ let tokenize report (input, get_location - end_tag_open_state l' tag - - | Some (_, c) when is_alphabetic c -> -- add_utf_8 tag._tag_name (to_lowercase c); -+ add_utf_8 tag._tag_name (Uchar.of_int (to_lowercase c)); - tag_name_state l' tag - - | Some (_, 0x003F) -> -@@ -529,7 +533,7 @@ let tokenize report (input, get_location - - next_option input !throw begin function - | Some (_, c) when is_alphabetic c -> -- add_utf_8 tag._tag_name (to_lowercase c); -+ add_utf_8 tag._tag_name (Uchar.of_int (to_lowercase c)); - tag_name_state l' tag - - | Some (_, 0x003E) -> -@@ -569,7 +573,7 @@ let tokenize report (input, get_location - report (get_location ()) (`Unexpected_eoi "tag") !throw data_state - - | Some (_, c) -> -- add_utf_8 tag._tag_name (to_lowercase c); -+ add_utf_8 tag._tag_name (Uchar.of_int (to_lowercase c)); - tag_name_state l' tag - end - -@@ -589,7 +593,7 @@ let tokenize report (input, get_location - next_option input !throw begin function - | Some (_, c as v) when is_alphabetic c -> - let name_buffer = Buffer.create 32 in -- add_utf_8 name_buffer (to_lowercase c); -+ add_utf_8 name_buffer (Uchar.of_int (to_lowercase c)); - text_end_tag_name_state state l' (v::cs) name_buffer - - | maybe_v -> -@@ -618,7 +622,7 @@ let tokenize report (input, get_location - emit_tag l' (create_tag ()) - - | Some ((_, c) as v) when is_alphabetic c -> -- add_utf_8 name_buffer (to_lowercase c); -+ add_utf_8 name_buffer (Uchar.of_int (to_lowercase c)); - text_end_tag_name_state state l' (v::cs) name_buffer - - | maybe_v -> -@@ -676,7 +680,7 @@ let tokenize report (input, get_location - - | Some (l, 0) -> - report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () -> -- emit_character l Uutf.u_rep (fun () -> -+ emit_character l (Uchar.to_int Uutf.u_rep) (fun () -> - script_data_escaped_state l')) - - | None -> -@@ -699,7 +703,7 @@ let tokenize report (input, get_location - - | Some (l, 0) -> - report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () -> -- emit_character l Uutf.u_rep (fun () -> -+ emit_character l (Uchar.to_int Uutf.u_rep) (fun () -> - script_data_escaped_state l')) - - | None -> -@@ -725,7 +729,7 @@ let tokenize report (input, get_location - - | Some (l, 0) -> - report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () -> -- emit_character l Uutf.u_rep (fun () -> -+ emit_character l (Uchar.to_int Uutf.u_rep) (fun () -> - script_data_escaped_state l')) - - | None -> -@@ -745,7 +749,7 @@ let tokenize report (input, get_location - - | Some (_, c as v) when is_alphabetic c -> - let tag_buffer = Buffer.create 32 in -- add_utf_8 tag_buffer (to_lowercase c); -+ add_utf_8 tag_buffer (Uchar.of_int (to_lowercase c)); - emit_characters (List.rev (v::cs)) (fun () -> - script_data_double_escape_start_state l' tag_buffer) - -@@ -765,7 +769,7 @@ let tokenize report (input, get_location - else script_data_escaped_state l') - - | Some (l, c) when is_alphabetic c -> -- add_utf_8 tag_buffer (to_lowercase c); -+ add_utf_8 tag_buffer (Uchar.of_int (to_lowercase c)); - emit_character l c (fun () -> - script_data_double_escape_start_state l' tag_buffer) - -@@ -787,7 +791,7 @@ let tokenize report (input, get_location - - | Some (l, 0) -> - report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () -> -- emit_character l Uutf.u_rep (fun () -> -+ emit_character l (Uchar.to_int Uutf.u_rep) (fun () -> - script_data_double_escaped_state l')) - - | None -> -@@ -811,7 +815,7 @@ let tokenize report (input, get_location - - | Some (l, 0) -> - report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () -> -- emit_character l Uutf.u_rep (fun () -> -+ emit_character l (Uchar.to_int Uutf.u_rep) (fun () -> - script_data_double_escaped_state l')) - - | None -> -@@ -838,7 +842,7 @@ let tokenize report (input, get_location - - | Some (l, 0) -> - report l (`Bad_token ("U+0000", "script", "null")) !throw (fun () -> -- emit_character l Uutf.u_rep (fun () -> -+ emit_character l (Uchar.to_int Uutf.u_rep) (fun () -> - script_data_double_escaped_state l')) - - | None -> -@@ -872,7 +876,7 @@ let tokenize report (input, get_location - else script_data_double_escaped_state l') - - | Some (l, c) when is_alphabetic c -> -- add_utf_8 tag_buffer (to_lowercase c); -+ add_utf_8 tag_buffer (Uchar.of_int (to_lowercase c)); - emit_character l c (fun () -> - script_data_double_escape_end_state l' tag_buffer) - -@@ -910,10 +914,10 @@ let tokenize report (input, get_location - | Some (l, (0x0022 | 0x0027 | 0x003C | 0x003D as c)) -> - report l (`Bad_token (char c, "attribute name", - "invalid start character")) !throw (fun () -> -- start_attribute c) -+ start_attribute (Uchar.of_int c)) - - | Some (_, c) -> -- start_attribute (to_lowercase c) -+ start_attribute (Uchar.of_int (to_lowercase c)) - end - - (* 8.2.4.35. *) -@@ -942,14 +946,14 @@ let tokenize report (input, get_location - | Some (l, (0x0022 | 0x0027 | 0x003C as c)) -> - report l (`Bad_token (char c, "attribute name", - "invalid name character")) !throw (fun () -> -- add_utf_8 name_buffer c; -+ add_utf_8 name_buffer (Uchar.of_int c); - attribute_name_state l' tag name_buffer) - - | None -> - report (get_location ()) (`Unexpected_eoi "tag") !throw data_state - - | Some (_, c) -> -- add_utf_8 name_buffer (to_lowercase c); -+ add_utf_8 name_buffer (Uchar.of_int (to_lowercase c)); - attribute_name_state l' tag name_buffer - end - -@@ -985,13 +989,13 @@ let tokenize report (input, get_location - | Some (l, (0x0022 | 0x0027 | 0x003C as c)) -> - report l (`Bad_token (char c, "attribute name", - "invalid start character")) !throw (fun () -> -- start_next_attribute c) -+ start_next_attribute (Uchar.of_int c)) - - | None -> - report (get_location ()) (`Unexpected_eoi "tag") !throw data_state - - | Some (_, c) -> -- start_next_attribute (to_lowercase c) -+ start_next_attribute (Uchar.of_int (to_lowercase c)) - end - - (* 8.2.4.37. *) -@@ -1030,13 +1034,13 @@ let tokenize report (input, get_location - | Some (l, (0x003C | 0x003D | 0x0060 as c)) -> - report l (`Bad_token (char c, "attribute value", - "invalid start character")) !throw (fun () -> -- start_value attribute_value_unquoted_state (Some c)) -+ start_value attribute_value_unquoted_state (Some (Uchar.of_int c))) - - | None -> - report (get_location ()) (`Unexpected_eoi "tag") !throw data_state - - | Some (_, c) -> -- start_value attribute_value_unquoted_state (Some c) -+ start_value attribute_value_unquoted_state (Some (Uchar.of_int c)) - end - - (* 8.2.4.38 and 8.2.4.39. *) -@@ -1062,7 +1066,7 @@ let tokenize report (input, get_location - data_state - - | Some (_, c) -> -- add_utf_8 value_buffer c; -+ add_utf_8 value_buffer (Uchar.of_int c); - attribute_value_quoted_state quote l' tag name value_buffer - end - -@@ -1092,14 +1096,14 @@ let tokenize report (input, get_location - | Some (l, (0x0022 | 0x0027 | 0x003C | 0x003D | 0x0060 as c)) -> - report l (`Bad_token (char c, "attribute value", - "invalid character")) !throw (fun () -> -- add_utf_8 value_buffer c; -+ add_utf_8 value_buffer (Uchar.of_int c); - attribute_value_unquoted_state l' tag name value_buffer) - - | None -> - report (get_location ()) (`Unexpected_eoi "tag") !throw data_state - - | Some (_, c) -> -- add_utf_8 value_buffer c; -+ add_utf_8 value_buffer (Uchar.of_int c); - attribute_value_unquoted_state l' tag name value_buffer - end - -@@ -1107,7 +1111,7 @@ let tokenize report (input, get_location - and character_reference_in_attribute allowed l value_buffer k = - consume_character_reference true (Some allowed) l begin function - | None -> -- add_utf_8 value_buffer 0x0026; -+ add_utf_8 value_buffer (Uchar.of_int 0x0026); - k () - - | Some (`One c) -> -@@ -1176,7 +1180,7 @@ let tokenize report (input, get_location - emit_comment l' buffer - - | Some (_, c) -> -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int c); - consume () - end - in -@@ -1239,7 +1243,7 @@ let tokenize report (input, get_location - emit_comment l' buffer) - - | Some (_, c) -> -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int c); - comment_state l' buffer - end - -@@ -1266,7 +1270,7 @@ let tokenize report (input, get_location - - | Some (_, c) -> - Buffer.add_char buffer '-'; -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int c); - comment_state l' buffer - end - -@@ -1286,7 +1290,7 @@ let tokenize report (input, get_location - emit_comment l' buffer) - - | Some (_, c) -> -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int c); - comment_state l' buffer - end - -@@ -1308,7 +1312,7 @@ let tokenize report (input, get_location - - | Some (_, c) -> - Buffer.add_char buffer '-'; -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int c); - comment_state l' buffer - end - -@@ -1343,7 +1347,7 @@ let tokenize report (input, get_location - report l (`Bad_token ("--" ^ (char c), "comment", - "'--' should be in '-->'")) !throw (fun () -> - Buffer.add_string buffer "--"; -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int c); - comment_state l' buffer) - end - -@@ -1369,7 +1373,7 @@ let tokenize report (input, get_location - - | Some (_, c) -> - Buffer.add_string buffer "--!"; -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int c); - comment_state l' buffer - end - -@@ -1420,7 +1424,7 @@ let tokenize report (input, get_location - - | Some (_, c) -> - doctype._doctype_name <- -- add_doctype_char doctype._doctype_name (to_lowercase c); -+ add_doctype_char doctype._doctype_name (Uchar.of_int (to_lowercase c)); - doctype_name_state l' doctype - end - -@@ -1445,7 +1449,7 @@ let tokenize report (input, get_location - - | Some (_, c) -> - doctype._doctype_name <- -- add_doctype_char doctype._doctype_name (to_lowercase c); -+ add_doctype_char doctype._doctype_name (Uchar.of_int (to_lowercase c)); - doctype_name_state l' doctype - end - -@@ -1574,7 +1578,7 @@ let tokenize report (input, get_location - emit_doctype ~quirks:true l' doctype) - - | Some (_, c) -> -- add doctype c; -+ add doctype (Uchar.of_int c); - doctype_identifier_quoted_state add quote next_state l' doctype - end - -Index: markup.ml-0.7.2/src/html_writer.ml -=================================================================== ---- markup.ml-0.7.2.orig/src/html_writer.ml -+++ markup.ml-0.7.2/src/html_writer.ml -@@ -8,7 +8,7 @@ let _escape_attribute s = - Uutf.String.fold_utf_8 (fun () _ -> function - | `Malformed _ -> () - | `Uchar c -> -- match c with -+ match (Uchar.to_int c) with - | 0x0026 -> Buffer.add_string buffer "&" - | 0x00A0 -> Buffer.add_string buffer " " - | 0x0022 -> Buffer.add_string buffer """ -@@ -21,7 +21,7 @@ let _escape_text s = - Uutf.String.fold_utf_8 (fun () _ -> function - | `Malformed _ -> () - | `Uchar c -> -- match c with -+ match (Uchar.to_int c) with - | 0x0026 -> Buffer.add_string buffer "&" - | 0x00A0 -> Buffer.add_string buffer " " - | 0x003C -> Buffer.add_string buffer "<" -Index: markup.ml-0.7.2/src/input.ml -=================================================================== ---- markup.ml-0.7.2.orig/src/input.ml -+++ markup.ml-0.7.2/src/input.ml -@@ -27,13 +27,13 @@ let preprocess is_valid_char report sour - in - - let rec iterate () = -- next source throw empty (function -+ next source throw empty (fun x -> match Uchar.to_int x with - | 0xFEFF when !first_char -> first_char := false; iterate () - - | 0x0D -> -- next source throw newline (function -+ next source throw newline (fun y -> match Uchar.to_int y with - | 0x0A -> newline () -- | c -> push source c; newline ()) -+ | c -> push source (Uchar.of_int c); newline ()) - - | 0x0A -> newline () - -Index: markup.ml-0.7.2/src/input.mli -=================================================================== ---- markup.ml-0.7.2.orig/src/input.mli -+++ markup.ml-0.7.2/src/input.mli -@@ -4,5 +4,5 @@ - open Common - - val preprocess : -- (int -> bool) -> Error.parse_handler -> int Kstream.t -> -+ (int -> bool) -> Error.parse_handler -> Uchar.t Kstream.t -> - (location * int) Kstream.t * (unit -> location) -Index: markup.ml-0.7.2/src/markup.ml -=================================================================== ---- markup.ml-0.7.2.orig/src/markup.ml -+++ markup.ml-0.7.2/src/markup.ml -@@ -187,7 +187,7 @@ sig - - val decode : - ?report:(location -> Error.t -> unit io) -> t -> -- (char, _) stream -> (int, async) stream -+ (char, _) stream -> (Uchar.t, async) stream - end - - val parse_xml : -Index: markup.ml-0.7.2/src/markup.mli -=================================================================== ---- markup.ml-0.7.2.orig/src/markup.mli -+++ markup.ml-0.7.2/src/markup.mli -@@ -194,7 +194,7 @@ sig - - val decode : - ?report:(location -> Error.t -> unit) -> t -> -- (char, 's) stream -> (int, 's) stream -+ (char, 's) stream -> (Uchar.t, 's) stream - (** Applies a decoder to a byte stream. Illegal input byte sequences result in - calls to the error handler [~report] with error kind [`Decoding_error]. - The illegal bytes are then skipped, and zero or more U+FFFD replacement -@@ -764,7 +764,7 @@ sig - - val decode : - ?report:(location -> Error.t -> unit io) -> Encoding.t -> -- (char, _) stream -> (int, async) stream -+ (char, _) stream -> (Uchar.t, async) stream - end - - (** {2 XML} *) -@@ -838,7 +838,7 @@ val kstream : ('a, _) stream -> 'a Kstre - val of_kstream : 'a Kstream.t -> ('a, _) stream - - val preprocess_input_stream : -- (int, 's) stream -> (location * int, 's) stream * (unit -> location) -+ (Uchar.t, 's) stream -> (location * int, 's) stream * (unit -> location) - - (**/**) - -Index: markup.ml-0.7.2/src/utility.ml -=================================================================== ---- markup.ml-0.7.2.orig/src/utility.ml -+++ markup.ml-0.7.2/src/utility.ml -@@ -346,11 +346,11 @@ let xhtml_entity name = - - match lookup 0 with - | `One c -> -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int c); - Some (Buffer.contents buffer) - | `Two (c, c') -> -- add_utf_8 buffer c; -- add_utf_8 buffer c'; -+ add_utf_8 buffer (Uchar.of_int c); -+ add_utf_8 buffer (Uchar.of_int c'); - Some (Buffer.contents buffer) - - with Exit -> None -Index: markup.ml-0.7.2/src/xml_tokenizer.ml -=================================================================== ---- markup.ml-0.7.2.orig/src/xml_tokenizer.ml -+++ markup.ml-0.7.2/src/xml_tokenizer.ml -@@ -101,7 +101,7 @@ let tokenize report resolve_reference (i - end - - | _, c when filter c -> -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int c); - read () - - | l, c -> -@@ -133,7 +133,7 @@ let tokenize report resolve_reference (i - - | _, c when is_name_start_char c -> - let buffer = Buffer.create 32 in -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int c); - let rec read () = - next input !throw unexpected_eoi begin function - | _, 0x003B -> -@@ -146,7 +146,7 @@ let tokenize report resolve_reference (i - end - - | _, c when is_name_char c -> -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int c); - read () - - | l, c -> -@@ -218,7 +218,7 @@ let tokenize report resolve_reference (i - report_if (not @@ is_name_start_char c) l (fun () -> - `Bad_token (char c, "attribute", "invalid start character")) - !throw (fun () -> -- add_utf_8 name_buffer c; -+ add_utf_8 name_buffer (Uchar.of_int c); - name_state ()) - end - -@@ -235,7 +235,7 @@ let tokenize report resolve_reference (i - report_if (not @@ is_name_start_char c) l (fun () -> - `Bad_token (char c, "attribute", "invalid name character")) - !throw (fun () -> -- add_utf_8 name_buffer c; -+ add_utf_8 name_buffer (Uchar.of_int c); - name_state ()) - end - -@@ -275,14 +275,14 @@ let tokenize report resolve_reference (i - report l - (`Bad_token ("&", "attribute", "replace with '&'")) - !throw (fun () -> -- add_utf_8 value_buffer 0x0026; -+ add_utf_8 value_buffer (Uchar.of_int 0x0026); - state ()) - end - - and handle_lt l state = - report l (`Bad_token ("<", "attribute", "replace with '<'")) !throw - (fun () -> -- add_utf_8 value_buffer 0x003C; -+ add_utf_8 value_buffer (Uchar.of_int 0x003C); - state ()) - - and quoted_value_state quote = -@@ -300,7 +300,7 @@ let tokenize report resolve_reference (i - quoted_value_state quote) - - | _, c -> -- add_utf_8 value_buffer c; -+ add_utf_8 value_buffer (Uchar.of_int c); - quoted_value_state quote - end - -@@ -317,7 +317,7 @@ let tokenize report resolve_reference (i - handle_lt l unquoted_value_state - - | _, c -> -- add_utf_8 value_buffer c; -+ add_utf_8 value_buffer (Uchar.of_int c); - unquoted_value_state () - end - -@@ -372,7 +372,7 @@ let tokenize report resolve_reference (i - report_if (not @@ is_name_start_char c) l (fun () -> - `Bad_token (char c, pi, "invalid start character")) !throw - (fun () -> -- add_utf_8 target_buffer c; -+ add_utf_8 target_buffer (Uchar.of_int c); - target_state ()) - end - -@@ -388,13 +388,13 @@ let tokenize report resolve_reference (i - report_if (not @@ is_name_char c) l (fun () -> - `Bad_token (char c, pi, "invalid name character")) !throw - (fun () -> -- add_utf_8 target_buffer c; -+ add_utf_8 target_buffer (Uchar.of_int c); - target_state ()) - end - - and text_state () = - next' pi finish_pi (fun (_, c) -> -- add_utf_8 text_buffer c; -+ add_utf_8 text_buffer (Uchar.of_int c); - text_state ()) - - and xml_declaration_state () = -@@ -572,7 +572,7 @@ let tokenize report resolve_reference (i - and initial_state () = - next input !throw (fun () -> emit_eoi ()) begin function - | l, (0x005D as c) -> -- add_character l c; -+ add_character l (Uchar.of_int c); - one_bracket_state l - - | l, 0x003C -> -@@ -583,7 +583,7 @@ let tokenize report resolve_reference (i - | None -> - report l (`Bad_token (char c, "text", "replace with '&'")) - !throw (fun () -> -- add_character l c; -+ add_character l (Uchar.of_int c); - initial_state ()) - - | Some s -> -@@ -591,14 +591,14 @@ let tokenize report resolve_reference (i - initial_state ()) - - | l, c -> -- add_character l c; -+ add_character l (Uchar.of_int c); - initial_state () - end - - and one_bracket_state l' = - next_option input !throw begin function - | Some (l, (0x005D as c)) -> -- add_character l c; -+ add_character l (Uchar.of_int c); - two_brackets_state l' l - - | v -> -@@ -611,11 +611,11 @@ let tokenize report resolve_reference (i - | Some (l, (0x003E as c)) -> - report l' (`Bad_token ("]]>", "text", "must end a CDATA section")) - !throw (fun () -> -- add_character l c; -+ add_character l (Uchar.of_int c); - initial_state ()) - - | Some (l, (0x005D as c)) -> -- add_character l c; -+ add_character l (Uchar.of_int c); - two_brackets_state l'' l - - | v -> -@@ -626,7 +626,7 @@ let tokenize report resolve_reference (i - and begin_markup_state l' = - let recover v = - lt_in_text l' (fun () -> -- add_character l' 0x003C; -+ add_character l' (Uchar.of_int 0x003C); - push_option input v; - initial_state ()) - in -@@ -648,7 +648,7 @@ let tokenize report resolve_reference (i - - | _, c when is_name_start_char c -> - let tag_name_buffer = Buffer.create 32 in -- add_utf_8 tag_name_buffer c; -+ add_utf_8 tag_name_buffer (Uchar.of_int c); - start_tag_state l' tag_name_buffer - - | l, c as v -> -@@ -660,7 +660,7 @@ let tokenize report resolve_reference (i - and start_tag_state l' buffer = - let recover v = - lt_in_text l' (fun () -> -- add_character l' 0x003C; -+ add_character l' (Uchar.of_int 0x003C); - add_string l' (Buffer.contents buffer); - push_option input v; - initial_state ()) -@@ -680,7 +680,7 @@ let tokenize report resolve_reference (i - attributes_state l' (Buffer.contents buffer) [] - - | _, c when is_name_char c -> -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int c); - start_tag_state l' buffer - - | l, c as v -> -@@ -731,8 +731,8 @@ let tokenize report resolve_reference (i - and end_tag_state l' = - let recover v = - lt_in_text l' (fun () -> -- add_character l' 0x003C; -- add_character l' 0x002F; -+ add_character l' (Uchar.of_int 0x003C); -+ add_character l' (Uchar.of_int 0x002F); - push_option input v; - initial_state ()) - in -@@ -743,7 +743,7 @@ let tokenize report resolve_reference (i - begin function - | _, c when is_name_start_char c -> - let name_buffer = Buffer.create 32 in -- add_utf_8 name_buffer c; -+ add_utf_8 name_buffer (Uchar.of_int c); - end_tag_name_state l' name_buffer - - | l, c as v -> -@@ -755,8 +755,8 @@ let tokenize report resolve_reference (i - and end_tag_name_state l' buffer = - let recover v = - lt_in_text l' (fun () -> -- add_character l' 0x003C; -- add_character l' 0x002F; -+ add_character l' (Uchar.of_int 0x003C); -+ add_character l' (Uchar.of_int 0x002F); - add_string l' (Buffer.contents buffer); - push_option input v; - initial_state ()) -@@ -773,7 +773,7 @@ let tokenize report resolve_reference (i - end_tag_whitespace_state false l' (Buffer.contents buffer) - - | _, c when is_name_char c -> -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int c); - end_tag_name_state l' buffer - - | l, c as v -> -@@ -821,8 +821,8 @@ let tokenize report resolve_reference (i - - | v -> - bad_comment_start "<!" l' (fun () -> -- add_character l' 0x003C; -- add_character l' 0x0021; -+ add_character l' (Uchar.of_int 0x003C); -+ add_character l' (Uchar.of_int 0x0021); - push_option input v; - initial_state ()) - end -@@ -834,9 +834,9 @@ let tokenize report resolve_reference (i - - | v -> - bad_comment_start "<!-" l' (fun () -> -- add_character l' 0x003C; -- add_character l' 0x0021; -- add_character l' 0x002D; -+ add_character l' (Uchar.of_int 0x003C); -+ add_character l' (Uchar.of_int 0x0021); -+ add_character l' (Uchar.of_int 0x002D); - push_option input v; - initial_state ()) - end -@@ -852,7 +852,7 @@ let tokenize report resolve_reference (i - comment_one_dash_state l' l buffer - - | _, c -> -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int c); - comment_state l' buffer - end - -@@ -863,8 +863,8 @@ let tokenize report resolve_reference (i - comment_two_dashes_state false l' l'' buffer - - | _, c -> -- add_utf_8 buffer 0x002D; -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int 0x002D); -+ add_utf_8 buffer (Uchar.of_int c); - comment_state l' buffer - end - -@@ -883,14 +883,14 @@ let tokenize report resolve_reference (i - - | _, 0x002D -> - recover (fun () -> -- add_utf_8 buffer 0x002D; -+ add_utf_8 buffer (Uchar.of_int 0x002D); - comment_two_dashes_state true l' l'' buffer) - - | _, c -> - recover (fun () -> -- add_utf_8 buffer 0x002D; -- add_utf_8 buffer 0x002D; -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int 0x002D); -+ add_utf_8 buffer (Uchar.of_int 0x002D); -+ add_utf_8 buffer (Uchar.of_int c); - comment_state l' buffer) - end - -@@ -905,9 +905,9 @@ let tokenize report resolve_reference (i - !throw (fun () -> - lt_in_text l' (fun () -> - push_list input cs; -- add_character l' 0x003C; -- add_character l' 0x0021; -- add_character l' 0x005B; -+ add_character l' (Uchar.of_int 0x003C); -+ add_character l' (Uchar.of_int 0x0021); -+ add_character l' (Uchar.of_int 0x005B); - initial_state ())) - end - -@@ -918,7 +918,7 @@ let tokenize report resolve_reference (i - cdata_one_bracket_state l' l - - | l, c -> -- add_character l c; -+ add_character l (Uchar.of_int c); - cdata_state l' - end - -@@ -929,8 +929,8 @@ let tokenize report resolve_reference (i - cdata_two_brackets_state l' l'' l - - | l, c -> -- add_character l'' 0x005D; -- add_character l c; -+ add_character l'' (Uchar.of_int 0x005D); -+ add_character l (Uchar.of_int c); - cdata_state l' - end - -@@ -941,13 +941,13 @@ let tokenize report resolve_reference (i - initial_state () - - | l, 0x005D -> -- add_character l'' 0x005D; -+ add_character l'' (Uchar.of_int 0x005D); - cdata_two_brackets_state l' l''' l - - | l, c -> -- add_character l'' 0x005D; -- add_character l''' 0x005D; -- add_character l c; -+ add_character l'' (Uchar.of_int 0x005D); -+ add_character l''' (Uchar.of_int 0x005D); -+ add_character l (Uchar.of_int c); - cdata_state l' - end - -@@ -963,9 +963,9 @@ let tokenize report resolve_reference (i - !throw (fun () -> - lt_in_text l' (fun () -> - push_list input cs; -- add_character l' 0x003C; -- add_character l' 0x0021; -- add_character l' 0x0044; -+ add_character l' (Uchar.of_int 0x003C); -+ add_character l' (Uchar.of_int 0x0021); -+ add_character l' (Uchar.of_int 0x0044); - initial_state ())) - end - -@@ -980,15 +980,15 @@ let tokenize report resolve_reference (i - emit_doctype l' buffer initial_state - - | _, (0x0022 | 0x0027 as c) -> -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int c); - doctype_quoted_state (fun () -> doctype_state l' buffer) c l' buffer - - | _, (0x003C as c) -> -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int c); - doctype_item_state (fun () -> doctype_state l' buffer) l' buffer - - | _, c -> -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int c); - doctype_state l' buffer - end - -@@ -996,11 +996,11 @@ let tokenize report resolve_reference (i - next input !throw (fun () -> unterminated_doctype l' buffer) - begin function - | _, c when c = quote -> -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int c); - state () - - | _, c -> -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int c); - doctype_quoted_state state quote l' buffer - end - -@@ -1008,18 +1008,18 @@ let tokenize report resolve_reference (i - next input !throw (fun () -> unterminated_doctype l' buffer) - begin function - | _, (0x0021 as c) -> -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int c); - doctype_declaration_state state l' buffer - - | l, (0x003F as c) -> -- add_utf_8 buffer c; -- let undo = tap (fun (_, c) -> add_utf_8 buffer c) input in -+ add_utf_8 buffer (Uchar.of_int c); -+ let undo = tap (fun (_, c) -> add_utf_8 buffer (Uchar.of_int c)) input in - parse_declaration_or_processing_instruction l (fun _ -> - undo (); - state ()) - - | _, c -> -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int c); - state () - end - -@@ -1027,16 +1027,16 @@ let tokenize report resolve_reference (i - next input !throw (fun () -> unterminated_doctype l' buffer) - begin function - | _, (0x003E as c) -> -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int c); - state () - - | _, (0x0022 | 0x0027 as c) -> -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int c); - doctype_quoted_state - (fun () -> doctype_declaration_state state l' buffer) c l' buffer - - | _, c -> -- add_utf_8 buffer c; -+ add_utf_8 buffer (Uchar.of_int c); - doctype_declaration_state state l' buffer - end - diff --git a/dev-ml/markup/markup-0.7.2-r1.ebuild b/dev-ml/markup/markup-0.7.2-r1.ebuild deleted file mode 100644 index f70ac55cd716..000000000000 --- a/dev-ml/markup/markup-0.7.2-r1.ebuild +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright 1999-2016 Gentoo Foundation -# Distributed under the terms of the GNU General Public License v2 -# $Id$ - -EAPI=5 - -inherit findlib eutils - -DESCRIPTION="Error-recovering streaming HTML5 and XML parsers" -HOMEPAGE="https://github.com/aantron/markup.ml" -SRC_URI="https://github.com/aantron/markup.ml/archive/${PV}.tar.gz -> ${P}.tar.gz" - -LICENSE="BSD" -SLOT="0/${PV}p1" -KEYWORDS="~amd64" -IUSE="doc test" - -DEPEND=" - dev-lang/ocaml:=[ocamlopt] - dev-ml/lwt:=[ocamlopt] - >=dev-ml/uutf-1.0:=[ocamlopt] -" -RDEPEND="${DEPEND}" -DEPEND="${DEPEND} - test? ( dev-ml/ounit ) - dev-ml/ocamlbuild" -S="${WORKDIR}/${PN}.ml-${PV}" - -src_prepare() { - epatch "${FILESDIR}/uutf.patch" \ - "${FILESDIR}/test.patch" -} - -src_compile() { - emake - use doc && emake docs -} - -src_install() { - findlib_src_preinst - emake ocamlfind-install - dodoc README.md - use doc && dohtml doc/html/* -} |