t_tag.ml 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. open Seppo_lib
  2. let test_diff () =
  3. (match Tag.diff String.compare [] [] with
  4. | [], [], [] -> assert true
  5. | _ -> assert false);
  6. (let s, p, m = Tag.diff String.compare [ "b"; "d"; "f" ] [ "a"; "d"; "e" ] in
  7. s |> String.concat " " |> Assrt.equals_string __LOC__ "d";
  8. p |> String.concat " " |> Assrt.equals_string __LOC__ "a e";
  9. m |> String.concat " " |> Assrt.equals_string __LOC__ "b f");
  10. assert true
  11. let assert_cat msg exp res =
  12. Assrt.equals_int msg (List.length exp) (List.length res);
  13. Assrt.equals_string msg
  14. (exp |> String.concat " ")
  15. (res |> List.map (fun (Tag.Tag t) -> t) |> String.concat " ")
  16. let test_map () =
  17. (* https://v2.ocaml.org/api/Map.Make.html *)
  18. let m = Tag.(Tmap.(empty |> add_tag_list [ Tag "#Ä"; Tag "#à" ])) in
  19. m |> Tag.Tmap.cardinal |> Assrt.equals_int __LOC__ 1;
  20. let (Tag t) = m |> Tag.Tmap.find_opt "#a" |> Option.get in
  21. t |> Assrt.equals_string __LOC__ "#Ä";
  22. assert true
  23. (* Example from Uuseg
  24. let test_segment_word () =
  25. let ret = Tag.utf_8_segments `Word "Uhu" in
  26. ret |> List.length |> Assrt.equals_int __LOC__ 1;
  27. let ret = Tag.utf_8_segments `Word "Hello World!" in
  28. ret |> List.length |> Assrt.equals_int __LOC__ 4;
  29. let ret = Tag.utf_8_segments `Word "Hello, #World!" in
  30. ret |> List.length |> Assrt.equals_int __LOC__ 6;
  31. let ret = Tag.utf_8_segments `Word "Hello, #🌍World!" in
  32. ret |> List.length |> Assrt.equals_int __LOC__ 7;
  33. let ret = Tag.utf_8_segments `Word "Hello, #🌍🎉World!" in
  34. ret |> List.length |> Assrt.equals_int __LOC__ 8;
  35. assert true
  36. *)
  37. let test_of_string () =
  38. Tag.of_string "Hello, #World!" |> assert_cat "of_string 1" [ "#World" ];
  39. Tag.of_string "abc#def" |> assert_cat "of_string 1.1" [];
  40. Tag.of_string "abc #def" |> assert_cat "of_string 1.2" [ "#def" ];
  41. Tag.of_string "abc.#def" |> assert_cat "of_string 1.3" [ "#def" ];
  42. Tag.of_string "ab #@uu @cd@ef #gh ij"
  43. |> assert_cat "of_string 1.4" [ "#@uu"; "#gh" ];
  44. Tag.of_string "H #Uhu un #🐫 d #So ja"
  45. |> assert_cat "of_string 2" [ "#Uhu"; "#So" ];
  46. Tag.of_string "H #Uhu un 🐫 d #So ja"
  47. |> assert_cat "of_string 3" [ "#Uhu"; "#So" ];
  48. (* TODO the flag: *)
  49. Tag.of_string
  50. "#† #RIP #AD2021 📻 🇦🇹 Hannes Leopoldseder ist tot - ooe.ORF.at\n\n\
  51. via https://twitter.com/wasbuxton/status/1361797119871508485\n\
  52. via https://twitter.com/jnd1er\n\
  53. auch https://ooe.orf.at/stories/3090120/"
  54. |> assert_cat "of_string 4" [ "#†"; "#RIP"; "#AD2021" ];
  55. Tag.of_string
  56. "#† #RIP #AD2021 Dr. Brad J. Cox Ph.D. Obituary - Manassas, VA | SCNow\n\n\
  57. \"… Dr. Brad J. Cox Ph. DDr. Brad Cox, Ph. D of Manassas, Virginia, died \
  58. on January\n\
  59. 2, 2021 at his residence. Dr. Cox was a computer scientist known mostly for\n\
  60. creating the Objective – C programming language …\"\n\n\
  61. via https://news.ycombinator.com/item?id=25876767 #objc"
  62. |> assert_cat "of_string 5" [ "#†"; "#RIP"; "#AD2021"; "#objc" ];
  63. assert true
  64. let test_zero_width_space () =
  65. Tag.of_string "Hello, #World​s!" |> assert_cat "zero_width_space 1" [ "#World" ];
  66. assert true
  67. let test_sift_channel () =
  68. let ic = open_in "t_tag.1.txt" in
  69. let ret = Tag.sift_channel ic in
  70. close_in ic;
  71. (match ret with
  72. | Ok v -> v |> assert_cat "comp" [ "#World" ]
  73. | Error v -> v |> Assrt.equals_string __LOC__ "#Hello");
  74. assert true
  75. (* https://codeberg.org/mro/ShaarliGo/src/branch/master/t_tags.go#L57 *)
  76. let test_fold () =
  77. let tst msg exp src =
  78. Tag.Tag src |> Tag.fold |> Assrt.equals_string msg exp
  79. in
  80. tst "fold 1" "hallo wyrld!" "Hälló wÿrld!";
  81. tst "fold 2" "demaiziere" "DeMaizière";
  82. tst "fold 3" "cegłowski!" "Cegłowski!";
  83. tst "fold 3" "iieeean" "íìéèêäñ";
  84. assert true
  85. let test_normalize () =
  86. let sh, lo, ts =
  87. Tag.normalise "#Uhu" "Aha\n#more" [ Tag "#Uhu"; Tag "#less" ] Tag.Tmap.empty
  88. in
  89. sh |> Assrt.equals_string __LOC__ "#Uhu";
  90. lo |> Assrt.equals_string __LOC__ "Aha\n#more\n#less";
  91. ts |> assert_cat "comp" [ "#Uhu"; "#less"; "#more" ];
  92. let sh, lo, ts =
  93. Tag.normalise "#Uh/u" "Aha\n#mo.re" [ Tag "#Uhu"; Tag "#less" ] Tag.Tmap.empty
  94. in
  95. sh |> Assrt.equals_string __LOC__ "#Uh/u";
  96. lo |> Assrt.equals_string __LOC__ "Aha\n#mo.re\n#Uhu #less";
  97. ts |> assert_cat "comp" [ "#Uh"; "#Uhu"; "#less"; "#mo" ];
  98. assert true
  99. let test_cdb () =
  100. let fn = "tmp/tag.cdb" in
  101. Unix.(try unlink fn with Unix_error (ENOENT, "unlink", _) -> ());
  102. File.touch fn;
  103. let db = Mapcdb.Cdb "tmp/tag.cdb" in
  104. let db = Mapcdb.add_string "#a" "#Ä" db in
  105. let (Mapcdb.Cdb db') = db in
  106. db' |> Assrt.equals_string __LOC__ fn;
  107. let (s,l,ts) = Tag.cdb_normalise "uhu #á" "aha #B" [] db in
  108. s |> Assrt.equals_string __LOC__ "uhu #á";
  109. l |> Assrt.equals_string __LOC__ "aha #B";
  110. ts |> List.length |> Assrt.equals_int __LOC__ 2;
  111. Mapcdb.find_string_opt "#a" db |> Option.get |> Assrt.equals_string __LOC__ "#Ä";
  112. Mapcdb.find_string_opt "#b" db |> Option.get |> Assrt.equals_string __LOC__ "#B";
  113. assert true
  114. let () =
  115. Unix.chdir "../../../test/";
  116. (* test_segment_word (); *)
  117. test_map ();
  118. test_diff ();
  119. test_of_string ();
  120. test_zero_width_space ();
  121. test_sift_channel ();
  122. test_fold ();
  123. test_normalize ();
  124. test_cdb ();
  125. assert true