123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137 |
- open Seppo_lib
- let test_diff () =
- (match Tag.diff String.compare [] [] with
- | [], [], [] -> assert true
- | _ -> assert false);
- (let s, p, m = Tag.diff String.compare [ "b"; "d"; "f" ] [ "a"; "d"; "e" ] in
- s |> String.concat " " |> Assrt.equals_string __LOC__ "d";
- p |> String.concat " " |> Assrt.equals_string __LOC__ "a e";
- m |> String.concat " " |> Assrt.equals_string __LOC__ "b f");
- assert true
- let assert_cat msg exp res =
- Assrt.equals_int msg (List.length exp) (List.length res);
- Assrt.equals_string msg
- (exp |> String.concat " ")
- (res |> List.map (fun (Tag.Tag t) -> t) |> String.concat " ")
- let test_map () =
- (* https://v2.ocaml.org/api/Map.Make.html *)
- let m = Tag.(Tmap.(empty |> add_tag_list [ Tag "#Ä"; Tag "#à" ])) in
- m |> Tag.Tmap.cardinal |> Assrt.equals_int __LOC__ 1;
- let (Tag t) = m |> Tag.Tmap.find_opt "#a" |> Option.get in
- t |> Assrt.equals_string __LOC__ "#Ä";
- assert true
- (* Example from Uuseg
- let test_segment_word () =
- let ret = Tag.utf_8_segments `Word "Uhu" in
- ret |> List.length |> Assrt.equals_int __LOC__ 1;
- let ret = Tag.utf_8_segments `Word "Hello World!" in
- ret |> List.length |> Assrt.equals_int __LOC__ 4;
- let ret = Tag.utf_8_segments `Word "Hello, #World!" in
- ret |> List.length |> Assrt.equals_int __LOC__ 6;
- let ret = Tag.utf_8_segments `Word "Hello, #🌍World!" in
- ret |> List.length |> Assrt.equals_int __LOC__ 7;
- let ret = Tag.utf_8_segments `Word "Hello, #🌍🎉World!" in
- ret |> List.length |> Assrt.equals_int __LOC__ 8;
- assert true
- *)
- let test_of_string () =
- Tag.of_string "Hello, #World!" |> assert_cat "of_string 1" [ "#World" ];
- Tag.of_string "abc#def" |> assert_cat "of_string 1.1" [];
- Tag.of_string "abc #def" |> assert_cat "of_string 1.2" [ "#def" ];
- Tag.of_string "abc.#def" |> assert_cat "of_string 1.3" [ "#def" ];
- Tag.of_string "ab #@uu @cd@ef #gh ij"
- |> assert_cat "of_string 1.4" [ "#@uu"; "#gh" ];
- Tag.of_string "H #Uhu un #🐫 d #So ja"
- |> assert_cat "of_string 2" [ "#Uhu"; "#So" ];
- Tag.of_string "H #Uhu un 🐫 d #So ja"
- |> assert_cat "of_string 3" [ "#Uhu"; "#So" ];
- (* TODO the flag: *)
- Tag.of_string
- "#† #RIP #AD2021 📻 🇦🇹 Hannes Leopoldseder ist tot - ooe.ORF.at\n\n\
- via https://twitter.com/wasbuxton/status/1361797119871508485\n\
- via https://twitter.com/jnd1er\n\
- auch https://ooe.orf.at/stories/3090120/"
- |> assert_cat "of_string 4" [ "#†"; "#RIP"; "#AD2021" ];
- Tag.of_string
- "#† #RIP #AD2021 Dr. Brad J. Cox Ph.D. Obituary - Manassas, VA | SCNow\n\n\
- \"… Dr. Brad J. Cox Ph. DDr. Brad Cox, Ph. D of Manassas, Virginia, died \
- on January\n\
- 2, 2021 at his residence. Dr. Cox was a computer scientist known mostly for\n\
- creating the Objective – C programming language …\"\n\n\
- via https://news.ycombinator.com/item?id=25876767 #objc"
- |> assert_cat "of_string 5" [ "#†"; "#RIP"; "#AD2021"; "#objc" ];
- assert true
- let test_zero_width_space () =
- Tag.of_string "Hello, #Worlds!" |> assert_cat "zero_width_space 1" [ "#World" ];
- assert true
- let test_sift_channel () =
- let ic = open_in "t_tag.1.txt" in
- let ret = Tag.sift_channel ic in
- close_in ic;
- (match ret with
- | Ok v -> v |> assert_cat "comp" [ "#World" ]
- | Error v -> v |> Assrt.equals_string __LOC__ "#Hello");
- assert true
- (* https://codeberg.org/mro/ShaarliGo/src/branch/master/t_tags.go#L57 *)
- let test_fold () =
- let tst msg exp src =
- Tag.Tag src |> Tag.fold |> Assrt.equals_string msg exp
- in
- tst "fold 1" "hallo wyrld!" "Hälló wÿrld!";
- tst "fold 2" "demaiziere" "DeMaizière";
- tst "fold 3" "cegłowski!" "Cegłowski!";
- tst "fold 3" "iieeean" "íìéèêäñ";
- assert true
- let test_normalize () =
- let sh, lo, ts =
- Tag.normalise "#Uhu" "Aha\n#more" [ Tag "#Uhu"; Tag "#less" ] Tag.Tmap.empty
- in
- sh |> Assrt.equals_string __LOC__ "#Uhu";
- lo |> Assrt.equals_string __LOC__ "Aha\n#more\n#less";
- ts |> assert_cat "comp" [ "#Uhu"; "#less"; "#more" ];
- let sh, lo, ts =
- Tag.normalise "#Uh/u" "Aha\n#mo.re" [ Tag "#Uhu"; Tag "#less" ] Tag.Tmap.empty
- in
- sh |> Assrt.equals_string __LOC__ "#Uh/u";
- lo |> Assrt.equals_string __LOC__ "Aha\n#mo.re\n#Uhu #less";
- ts |> assert_cat "comp" [ "#Uh"; "#Uhu"; "#less"; "#mo" ];
- assert true
- let test_cdb () =
- let fn = "tmp/tag.cdb" in
- Unix.(try unlink fn with Unix_error (ENOENT, "unlink", _) -> ());
- File.touch fn;
- let db = Mapcdb.Cdb "tmp/tag.cdb" in
- let db = Mapcdb.add_string "#a" "#Ä" db in
- let (Mapcdb.Cdb db') = db in
- db' |> Assrt.equals_string __LOC__ fn;
- let (s,l,ts) = Tag.cdb_normalise "uhu #á" "aha #B" [] db in
- s |> Assrt.equals_string __LOC__ "uhu #á";
- l |> Assrt.equals_string __LOC__ "aha #B";
- ts |> List.length |> Assrt.equals_int __LOC__ 2;
- Mapcdb.find_string_opt "#a" db |> Option.get |> Assrt.equals_string __LOC__ "#Ä";
- Mapcdb.find_string_opt "#b" db |> Option.get |> Assrt.equals_string __LOC__ "#B";
- assert true
- let () =
- Unix.chdir "../../../test/";
- (* test_segment_word (); *)
- test_map ();
- test_diff ();
- test_of_string ();
- test_zero_width_space ();
- test_sift_channel ();
- test_fold ();
- test_normalize ();
- test_cdb ();
- assert true
|