

let make_iso enc =
  let s = ref "" in
  for i = 0 to 255 do
    let u = try Netconversion.makechar (enc :> Netconversion.encoding) i 
            with Not_found -> "" in
    s := !s ^ u
  done;
  !s
;;

let make_ucs2 start stop =
  let s = String.create ((stop - start) * 2) in
  for i = 0 to stop-start-1 do
    let k = 2 * i in
    let c = i + start in
    s.[k]   <- Char.chr(c lsr 8);
    s.[k+1] <- Char.chr(c land 0xff);
  done;
  s
;;

let make_ucs4 start stop =
  let s = String.create ((stop - start) * 4) in
  for i = 0 to stop-start-1 do
    let k = 4 * i in
    let c = i + start in
    s.[k]   <- Char.chr(c lsr 24);
    s.[k+1] <- Char.chr((c lsr 16) land 0xff);
    s.[k+2] <- Char.chr((c lsr 8) land 0xff);
    s.[k+3] <- Char.chr(c land 0xff);
  done;
  s
;;

let name_of_encoding enc =
  match enc with
      `Enc_iso88591 -> "ISO_8859-1"
    | `Enc_iso88592 -> "ISO_8859-2"
    | `Enc_iso88593 -> "ISO_8859-3"
    | `Enc_iso88594 -> "ISO_8859-4"
    | `Enc_iso88595 -> "ISO_8859-5"
    | `Enc_iso88596 -> "ISO_8859-6"
    | `Enc_iso88597 -> "ISO_8859-7"
    | `Enc_iso88598 -> "ISO_8859-8"
    | `Enc_iso88599 -> "ISO_8859-9"
    | `Enc_iso885910 -> "ISO_8859-10"
    | `Enc_iso885913 -> "ISO_8859-13"
    | `Enc_iso885914 -> "ISO_8859-14"
    | `Enc_iso885915 -> "ISO_8859-15"
    | `Enc_utf8     -> "UTF-8"
    | `Enc_ucs4     -> "UCS-4"
    | `Enc_ucs2     -> "UCS-2"
    | `Enc_utf16    -> "UTF-16"

  (* Note: GNU-iconv assumes big endian byte order *)
;;

let iconv_recode_string in_enc out_enc in_s =
  let in_enc_name  = name_of_encoding in_enc in
  let out_enc_name = name_of_encoding out_enc in
  let out_s = ref "" in

  let out_ch,in_ch = Unix.open_process ("iconv -f " ^ in_enc_name ^ " -t " ^ 
					out_enc_name) in
  (* Write in_s to in_ch in a new thread: *)
  ignore
    (Thread.create
       (fun () ->
	  output_string in_ch in_s;
	  close_out in_ch;
       )
       ()
    );
  (* Read the result in the current thread: *)
  let buf = String.create 1024 in
  let n = ref 1 in
  while !n <> 0 do
    let n' = input out_ch buf 0 1024 in
    out_s := !out_s ^ String.sub buf 0 n';
    n := n'
  done;
  ignore(Unix.close_process (out_ch,in_ch));
  !out_s
;;

let test_iso_and_utf8 enc  =
  let name = name_of_encoding enc in
  print_string ("Recode: " ^ name ^ " and UTF-8... "); flush stdout;
  let s = make_iso enc in
  let s1' = Netconversion.recode_string (enc :> Netconversion.encoding) 
                                        `Enc_utf8 s in
  let s2' = iconv_recode_string         enc `Enc_utf8 s in
  assert(s1' = s2');
  let s1  = Netconversion.recode_string `Enc_utf8 
	                                (enc :> Netconversion.encoding) s1' in
  let s2  = iconv_recode_string         `Enc_utf8 enc s1' in
  assert(s1 = s2 && s1 = s);
  print_endline "OK"; flush stdout
;;

let test_utf16_and_utf8_0000_d7ff () =
  print_string "Recode: UTF-16-BE and UTF-8, #0000-#D7FF... "; 
  flush stdout;
  let s = make_ucs2 0 0xd800 in
  let s1' = Netconversion.recode_string `Enc_utf16_be `Enc_utf8 s in
  let s2' = iconv_recode_string        `Enc_utf16    `Enc_utf8 s in
  assert(s1' = s2');
  let s1  = Netconversion.recode_string `Enc_utf8 `Enc_utf16_be s1' in
  let s2  = iconv_recode_string        `Enc_utf8 `Enc_utf16 s1' in
  assert(s1 = s2 && s1 = s);
  print_endline "OK"; flush stdout
;;

let test_utf16_and_utf8_e000_fffd () =
  print_string "Recode: UTF-16-BE and UTF-8, #E000-#FFFD... "; 
  flush stdout;
  let s = make_ucs2 0xe000 0xfffe in
  let s1' = Netconversion.recode_string `Enc_utf16_be `Enc_utf8 s in
  let s2' = iconv_recode_string        `Enc_utf16    `Enc_utf8 s in
  assert(s1' = s2');
  let s1  = Netconversion.recode_string `Enc_utf8 `Enc_utf16_be s1' in
  let s2  = iconv_recode_string        `Enc_utf8 `Enc_utf16 s1' in
  assert(s1 = s2 && s1 = s);
  print_endline "OK"; flush stdout
;;

let test_utf16_and_utf8_10000_10FFFF () =
  print_string "Recode: UTF-16-BE and UTF-8, #10000-#10FFFF... "; 
  flush stdout;
  for i = 1 to 16 do
    let s0  = make_ucs4 (i * 0x10000) (i * 0x10000 + 0x10000) in
    let s   = iconv_recode_string        `Enc_ucs4     `Enc_utf16 s0 in
    let s1' = Netconversion.recode_string `Enc_utf16_be `Enc_utf8 s in
    let s2' = iconv_recode_string        `Enc_utf16    `Enc_utf8 s in
    assert(s1' = s2');
    let s1  = Netconversion.recode_string `Enc_utf8 `Enc_utf16_be s1' in
    let s2  = iconv_recode_string        `Enc_utf8 `Enc_utf16 s1' in
    assert(s1 = s2 && s1 = s);
    print_string "+"; flush stdout;
  done;
  print_endline "OK"; flush stdout
;;


print_endline "Warning: You need the command 'iconv' to run this test!";
flush stdout;
test_iso_and_utf8 `Enc_iso88591;
test_iso_and_utf8 `Enc_iso88592;
test_iso_and_utf8 `Enc_iso88593;
test_iso_and_utf8 `Enc_iso88594;
test_iso_and_utf8 `Enc_iso88595;
test_iso_and_utf8 `Enc_iso88596;
test_iso_and_utf8 `Enc_iso88597;
(* test_iso_and_utf8 `Enc_iso88598; *)
test_iso_and_utf8 `Enc_iso88599;
test_iso_and_utf8 `Enc_iso885910;
(* test_iso_and_utf8 `Enc_iso885913; *)
(* test_iso_and_utf8 `Enc_iso885914; *)
(* test_iso_and_utf8 `Enc_iso885915; *)
test_utf16_and_utf8_0000_d7ff();
test_utf16_and_utf8_e000_fffd();
(* This test does not work because iconv does not support the surrogate
 * representation of UTF-16:
 * test_utf16_and_utf8_10000_10FFFF();
 *)
()
;;
