tunicode.nim 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. import std/unicode
  2. proc asRune(s: static[string]): Rune =
  3. ## Compile-time conversion proc for converting string literals to a Rune
  4. ## value. Returns the first Rune of the specified string.
  5. ##
  6. ## Shortcuts code like ``"å".runeAt(0)`` to ``"å".asRune`` and returns a
  7. ## compile-time constant.
  8. if s.len == 0: Rune(0)
  9. else: s.runeAt(0)
  10. let
  11. someString = "öÑ"
  12. someRunes = toRunes(someString)
  13. compared = (someString == $someRunes)
  14. doAssert compared == true
  15. proc testReplacements(word: string): string =
  16. case word
  17. of "two":
  18. return "2"
  19. of "foo":
  20. return "BAR"
  21. of "βeta":
  22. return "beta"
  23. of "alpha":
  24. return "αlpha"
  25. else:
  26. return "12345"
  27. doAssert translate("two not alpha foo βeta", testReplacements) == "2 12345 αlpha BAR beta"
  28. doAssert translate(" two not foo βeta ", testReplacements) == " 2 12345 BAR beta "
  29. doAssert title("foo bar") == "Foo Bar"
  30. doAssert title("αlpha βeta γamma") == "Αlpha Βeta Γamma"
  31. doAssert title("") == ""
  32. doAssert capitalize("βeta") == "Βeta"
  33. doAssert capitalize("foo") == "Foo"
  34. doAssert capitalize("") == ""
  35. doAssert swapCase("FooBar") == "fOObAR"
  36. doAssert swapCase(" ") == " "
  37. doAssert swapCase("Αlpha Βeta Γamma") == "αLPHA βETA γAMMA"
  38. doAssert swapCase("a✓B") == "A✓b"
  39. doAssert swapCase("Јамогујестистаклоитоминештети") == "јАМОГУЈЕСТИСТАКЛОИТОМИНЕШТЕТИ"
  40. doAssert swapCase("ὕαλονϕαγεῖνδύναμαιτοῦτοοὔμεβλάπτει") == "ὝΑΛΟΝΦΑΓΕῖΝΔΎΝΑΜΑΙΤΟῦΤΟΟὔΜΕΒΛΆΠΤΕΙ"
  41. doAssert swapCase("Կրնամապակիուտեևինծիանհանգիստչըներ") == "կՐՆԱՄԱՊԱԿԻՈՒՏԵևԻՆԾԻԱՆՀԱՆԳԻՍՏՉԸՆԵՐ"
  42. doAssert swapCase("") == ""
  43. doAssert isAlpha("r")
  44. doAssert isAlpha("α")
  45. doAssert isAlpha("ϙ")
  46. doAssert isAlpha("ஶ")
  47. doAssert(not isAlpha("$"))
  48. doAssert(not isAlpha(""))
  49. doAssert isAlpha("Βeta")
  50. doAssert isAlpha("Args")
  51. doAssert isAlpha("𐌼𐌰𐌲𐌲𐌻𐌴𐍃𐍄𐌰𐌽")
  52. doAssert isAlpha("ὕαλονϕαγεῖνδύναμαιτοῦτοοὔμεβλάπτει")
  53. doAssert isAlpha("Јамогујестистаклоитоминештети")
  54. doAssert isAlpha("Կրնամապակիուտեևինծիանհանգիստչըներ")
  55. doAssert(not isAlpha("$Foo✓"))
  56. doAssert(not isAlpha("⠙⠕⠑⠎⠝⠞"))
  57. doAssert isSpace("\t")
  58. doAssert isSpace("\l")
  59. doAssert(not isSpace("Β"))
  60. doAssert(not isSpace("Βeta"))
  61. doAssert isSpace("\t\l \v\r\f")
  62. doAssert isSpace(" ")
  63. doAssert(not isSpace(""))
  64. doAssert(not isSpace("ΑΓc \td"))
  65. doAssert(not isLower(' '.Rune))
  66. doAssert(not isUpper(' '.Rune))
  67. doAssert toUpper("Γ") == "Γ"
  68. doAssert toUpper("b") == "B"
  69. doAssert toUpper("α") == "Α"
  70. doAssert toUpper("✓") == "✓"
  71. doAssert toUpper("ϙ") == "Ϙ"
  72. doAssert toUpper("") == ""
  73. doAssert toUpper("ΑΒΓ") == "ΑΒΓ"
  74. doAssert toUpper("AAccβ") == "AACCΒ"
  75. doAssert toUpper("A✓$β") == "A✓$Β"
  76. doAssert toLower("a") == "a"
  77. doAssert toLower("γ") == "γ"
  78. doAssert toLower("Γ") == "γ"
  79. doAssert toLower("4") == "4"
  80. doAssert toLower("Ϙ") == "ϙ"
  81. doAssert toLower("") == ""
  82. doAssert toLower("abcdγ") == "abcdγ"
  83. doAssert toLower("abCDΓ") == "abcdγ"
  84. doAssert toLower("33aaΓ") == "33aaγ"
  85. doAssert reversed("Reverse this!") == "!siht esreveR"
  86. doAssert reversed("先秦兩漢") == "漢兩秦先"
  87. doAssert reversed("as⃝df̅") == "f̅ds⃝a"
  88. doAssert reversed("a⃞b⃞c⃞") == "c⃞b⃞a⃞"
  89. doAssert reversed("ὕαλονϕαγεῖνδύναμαιτοῦτοοὔμεβλάπτει") == "ιετπάλβεμὔοοτῦοτιαμανύδνῖεγαϕνολαὕ"
  90. doAssert reversed("Јамогујестистаклоитоминештети") == "итетшенимотиолкатситсејугомаЈ"
  91. doAssert reversed("Կրնամապակիուտեևինծիանհանգիստչըներ") == "րենըչտսիգնահնաիծնիևետւոիկապամանրԿ"
  92. doAssert len(toRunes("as⃝df̅")) == runeLen("as⃝df̅")
  93. const test = "as⃝"
  94. doAssert lastRune(test, test.len-1)[1] == 3
  95. doAssert graphemeLen("è", 0) == 2
  96. # test for rune positioning and runeSubStr()
  97. let s = "Hänsel ««: 10,00€"
  98. var t = ""
  99. for c in s.utf8:
  100. t.add c
  101. doAssert(s == t)
  102. doAssert(runeReverseOffset(s, 1) == (20, 18))
  103. doAssert(runeReverseOffset(s, 19) == (-1, 18))
  104. doAssert(runeStrAtPos(s, 0) == "H")
  105. doAssert(runeSubStr(s, 0, 1) == "H")
  106. doAssert(runeStrAtPos(s, 10) == ":")
  107. doAssert(runeSubStr(s, 10, 1) == ":")
  108. doAssert(runeStrAtPos(s, 9) == "«")
  109. doAssert(runeSubStr(s, 9, 1) == "«")
  110. doAssert(runeStrAtPos(s, 17) == "€")
  111. doAssert(runeSubStr(s, 17, 1) == "€")
  112. # echo runeStrAtPos(s, 18) # index error
  113. doAssert(runeSubStr(s, 0) == "Hänsel ««: 10,00€")
  114. doAssert(runeSubStr(s, -18) == "Hänsel ««: 10,00€")
  115. doAssert(runeSubStr(s, 10) == ": 10,00€")
  116. doAssert(runeSubStr(s, 18) == "")
  117. doAssert(runeSubStr(s, 0, 10) == "Hänsel ««")
  118. doAssert(runeSubStr(s, 12) == "10,00€")
  119. doAssert(runeSubStr(s, -6) == "10,00€")
  120. doAssert(runeSubStr(s, 12, 5) == "10,00")
  121. doAssert(runeSubStr(s, 12, -1) == "10,00")
  122. doAssert(runeSubStr(s, -6, 5) == "10,00")
  123. doAssert(runeSubStr(s, -6, -1) == "10,00")
  124. doAssert(runeSubStr(s, 0, 100) == "Hänsel ««: 10,00€")
  125. doAssert(runeSubStr(s, -100, 100) == "Hänsel ««: 10,00€")
  126. doAssert(runeSubStr(s, 0, -100) == "")
  127. doAssert(runeSubStr(s, 100, -100) == "")
  128. block splitTests:
  129. let s = " this is an example "
  130. let s2 = ":this;is;an:example;;"
  131. let s3 = ":this×is×an:example××"
  132. doAssert s.split() == @["", "this", "is", "an", "example", "", ""]
  133. doAssert s2.split(seps = [':'.Rune, ';'.Rune]) == @["", "this", "is", "an",
  134. "example", "", ""]
  135. doAssert s3.split(seps = [':'.Rune, "×".asRune]) == @["", "this", "is",
  136. "an", "example", "", ""]
  137. doAssert s.split(maxsplit = 4) == @["", "this", "is", "an", "example "]
  138. doAssert s.split(' '.Rune, maxsplit = 1) == @["", "this is an example "]
  139. doAssert s3.split("×".runeAt(0)) == @[":this", "is", "an:example", "", ""]
  140. block stripTests:
  141. doAssert(strip("") == "")
  142. doAssert(strip(" ") == "")
  143. doAssert(strip("y") == "y")
  144. doAssert(strip(" foofoofoo ") == "foofoofoo")
  145. doAssert(strip("sfoofoofoos", runes = ['s'.Rune]) == "foofoofoo")
  146. block:
  147. let stripTestRunes = ['b'.Rune, 'a'.Rune, 'r'.Rune]
  148. doAssert(strip("barfoofoofoobar", runes = stripTestRunes) == "foofoofoo")
  149. doAssert(strip("sfoofoofoos", leading = false, runes = ['s'.Rune]) == "sfoofoofoo")
  150. doAssert(strip("sfoofoofoos", trailing = false, runes = ['s'.Rune]) == "foofoofoos")
  151. block:
  152. let stripTestRunes = ["«".asRune, "»".asRune]
  153. doAssert(strip("«TEXT»", runes = stripTestRunes) == "TEXT")
  154. doAssert(strip("copyright©", leading = false, runes = ["©".asRune]) == "copyright")
  155. doAssert(strip("¿Question?", trailing = false, runes = ["¿".asRune]) == "Question?")
  156. doAssert(strip("×text×", leading = false, runes = ["×".asRune]) == "×text")
  157. doAssert(strip("×text×", trailing = false, runes = ["×".asRune]) == "text×")
  158. block repeatTests:
  159. doAssert repeat('c'.Rune, 5) == "ccccc"
  160. doAssert repeat("×".asRune, 5) == "×××××"
  161. block alignTests:
  162. doAssert align("abc", 4) == " abc"
  163. doAssert align("a", 0) == "a"
  164. doAssert align("1232", 6) == " 1232"
  165. doAssert align("1232", 6, '#'.Rune) == "##1232"
  166. doAssert align("1232", 6, "×".asRune) == "××1232"
  167. doAssert alignLeft("abc", 4) == "abc "
  168. doAssert alignLeft("a", 0) == "a"
  169. doAssert alignLeft("1232", 6) == "1232 "
  170. doAssert alignLeft("1232", 6, '#'.Rune) == "1232##"
  171. doAssert alignLeft("1232", 6, "×".asRune) == "1232××"
  172. block differentSizes:
  173. # upper and lower variants have different number of bytes
  174. doAssert toLower("AẞC") == "aßc"
  175. doAssert toLower("ȺẞCD") == "ⱥßcd"
  176. doAssert toUpper("ⱥbc") == "ȺBC"
  177. doAssert toUpper("rsⱦuv") == "RSȾUV"
  178. doAssert swapCase("ⱥbCd") == "ȺBcD"
  179. doAssert swapCase("XyꟆaB") == "xYᶎAb"
  180. doAssert swapCase("aᵹcᲈd") == "AꝽCꙊD"
  181. block: # bug #17768
  182. let s1 = "abcdef"
  183. let s2 = "abcdéf"
  184. doAssert s1.runeSubstr(0, -1) == "abcde"
  185. doAssert s2.runeSubstr(0, -1) == "abcdé"