unicode.nim 35 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module provides support to handle the Unicode UTF-8 encoding.
  10. ##
  11. ## There are no specialized ``insert``, ``delete``, ``add`` and ``contains``
  12. ## procedures for ``seq[Rune]`` in this module because the generic variants
  13. ## of these procedures in the system module already work with it.
  14. ##
  15. ## The current version is compatible with Unicode v12.0.0.
  16. ##
  17. ## **See also:**
  18. ## * `strutils module <strutils.html>`_
  19. ## * `unidecode module <unidecode.html>`_
  20. ## * `encodings module <encodings.html>`_
  21. include "system/inclrtl"
  22. type
  23. RuneImpl = int32 # underlying type of Rune
  24. Rune* = distinct RuneImpl ## \
  25. ## Type that can hold a single Unicode code point.
  26. ##
  27. ## A Rune may be composed with other Runes to a character on the screen.
  28. ## `RuneImpl` is the underlying type used to store Runes, currently `int32`.
  29. template ones(n: untyped): untyped = ((1 shl n)-1)
  30. proc runeLen*(s: string): int {.rtl, extern: "nuc$1".} =
  31. ## Returns the number of runes of the string ``s``.
  32. runnableExamples:
  33. let a = "añyóng"
  34. doAssert a.runeLen == 6
  35. ## note: a.len == 8
  36. result = 0
  37. var i = 0
  38. while i < len(s):
  39. if uint(s[i]) <= 127: inc(i)
  40. elif uint(s[i]) shr 5 == 0b110: inc(i, 2)
  41. elif uint(s[i]) shr 4 == 0b1110: inc(i, 3)
  42. elif uint(s[i]) shr 3 == 0b11110: inc(i, 4)
  43. elif uint(s[i]) shr 2 == 0b111110: inc(i, 5)
  44. elif uint(s[i]) shr 1 == 0b1111110: inc(i, 6)
  45. else: inc i
  46. inc(result)
  47. proc runeLenAt*(s: string, i: Natural): int =
  48. ## Returns the number of bytes the rune starting at ``s[i]`` takes.
  49. ##
  50. ## See also:
  51. ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_
  52. runnableExamples:
  53. let a = "añyóng"
  54. doAssert a.runeLenAt(0) == 1
  55. doAssert a.runeLenAt(1) == 2
  56. if uint(s[i]) <= 127: result = 1
  57. elif uint(s[i]) shr 5 == 0b110: result = 2
  58. elif uint(s[i]) shr 4 == 0b1110: result = 3
  59. elif uint(s[i]) shr 3 == 0b11110: result = 4
  60. elif uint(s[i]) shr 2 == 0b111110: result = 5
  61. elif uint(s[i]) shr 1 == 0b1111110: result = 6
  62. else: result = 1
  63. const replRune = Rune(0xFFFD)
  64. template fastRuneAt*(s: string, i: int, result: untyped, doInc = true) =
  65. ## Returns the rune ``s[i]`` in ``result``.
  66. ##
  67. ## If ``doInc == true`` (default), ``i`` is incremented by the number
  68. ## of bytes that have been processed.
  69. bind ones
  70. if uint(s[i]) <= 127:
  71. result = Rune(uint(s[i]))
  72. when doInc: inc(i)
  73. elif uint(s[i]) shr 5 == 0b110:
  74. # assert(uint(s[i+1]) shr 6 == 0b10)
  75. if i <= s.len - 2:
  76. result = Rune((uint(s[i]) and (ones(5))) shl 6 or
  77. (uint(s[i+1]) and ones(6)))
  78. when doInc: inc(i, 2)
  79. else:
  80. result = replRune
  81. when doInc: inc(i)
  82. elif uint(s[i]) shr 4 == 0b1110:
  83. # assert(uint(s[i+1]) shr 6 == 0b10)
  84. # assert(uint(s[i+2]) shr 6 == 0b10)
  85. if i <= s.len - 3:
  86. result = Rune((uint(s[i]) and ones(4)) shl 12 or
  87. (uint(s[i+1]) and ones(6)) shl 6 or
  88. (uint(s[i+2]) and ones(6)))
  89. when doInc: inc(i, 3)
  90. else:
  91. result = replRune
  92. when doInc: inc(i)
  93. elif uint(s[i]) shr 3 == 0b11110:
  94. # assert(uint(s[i+1]) shr 6 == 0b10)
  95. # assert(uint(s[i+2]) shr 6 == 0b10)
  96. # assert(uint(s[i+3]) shr 6 == 0b10)
  97. if i <= s.len - 4:
  98. result = Rune((uint(s[i]) and ones(3)) shl 18 or
  99. (uint(s[i+1]) and ones(6)) shl 12 or
  100. (uint(s[i+2]) and ones(6)) shl 6 or
  101. (uint(s[i+3]) and ones(6)))
  102. when doInc: inc(i, 4)
  103. else:
  104. result = replRune
  105. when doInc: inc(i)
  106. elif uint(s[i]) shr 2 == 0b111110:
  107. # assert(uint(s[i+1]) shr 6 == 0b10)
  108. # assert(uint(s[i+2]) shr 6 == 0b10)
  109. # assert(uint(s[i+3]) shr 6 == 0b10)
  110. # assert(uint(s[i+4]) shr 6 == 0b10)
  111. if i <= s.len - 5:
  112. result = Rune((uint(s[i]) and ones(2)) shl 24 or
  113. (uint(s[i+1]) and ones(6)) shl 18 or
  114. (uint(s[i+2]) and ones(6)) shl 12 or
  115. (uint(s[i+3]) and ones(6)) shl 6 or
  116. (uint(s[i+4]) and ones(6)))
  117. when doInc: inc(i, 5)
  118. else:
  119. result = replRune
  120. when doInc: inc(i)
  121. elif uint(s[i]) shr 1 == 0b1111110:
  122. # assert(uint(s[i+1]) shr 6 == 0b10)
  123. # assert(uint(s[i+2]) shr 6 == 0b10)
  124. # assert(uint(s[i+3]) shr 6 == 0b10)
  125. # assert(uint(s[i+4]) shr 6 == 0b10)
  126. # assert(uint(s[i+5]) shr 6 == 0b10)
  127. if i <= s.len - 6:
  128. result = Rune((uint(s[i]) and ones(1)) shl 30 or
  129. (uint(s[i+1]) and ones(6)) shl 24 or
  130. (uint(s[i+2]) and ones(6)) shl 18 or
  131. (uint(s[i+3]) and ones(6)) shl 12 or
  132. (uint(s[i+4]) and ones(6)) shl 6 or
  133. (uint(s[i+5]) and ones(6)))
  134. when doInc: inc(i, 6)
  135. else:
  136. result = replRune
  137. when doInc: inc(i)
  138. else:
  139. result = Rune(uint(s[i]))
  140. when doInc: inc(i)
  141. proc runeAt*(s: string, i: Natural): Rune =
  142. ## Returns the rune in ``s`` at **byte index** ``i``.
  143. ##
  144. ## See also:
  145. ## * `runeAtPos proc <#runeAtPos,string,int>`_
  146. ## * `runeStrAtPos proc <#runeStrAtPos,string,Natural>`_
  147. ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_
  148. runnableExamples:
  149. let a = "añyóng"
  150. doAssert a.runeAt(1) == "ñ".runeAt(0)
  151. doAssert a.runeAt(2) == "ñ".runeAt(1)
  152. doAssert a.runeAt(3) == "y".runeAt(0)
  153. fastRuneAt(s, i, result, false)
  154. proc validateUtf8*(s: string): int =
  155. ## Returns the position of the invalid byte in ``s`` if the string ``s`` does
  156. ## not hold valid UTF-8 data. Otherwise ``-1`` is returned.
  157. ##
  158. ## See also:
  159. ## * `toUTF8 proc <#toUTF8,Rune>`_
  160. ## * `$ proc <#$,Rune>`_ alias for `toUTF8`
  161. ## * `fastToUTF8Copy template <#fastToUTF8Copy.t,Rune,string,int>`_
  162. var i = 0
  163. let L = s.len
  164. while i < L:
  165. if uint(s[i]) <= 127:
  166. inc(i)
  167. elif uint(s[i]) shr 5 == 0b110:
  168. if uint(s[i]) < 0xc2: return i # Catch overlong ascii representations.
  169. if i+1 < L and uint(s[i+1]) shr 6 == 0b10: inc(i, 2)
  170. else: return i
  171. elif uint(s[i]) shr 4 == 0b1110:
  172. if i+2 < L and uint(s[i+1]) shr 6 == 0b10 and uint(s[i+2]) shr 6 == 0b10:
  173. inc i, 3
  174. else: return i
  175. elif uint(s[i]) shr 3 == 0b11110:
  176. if i+3 < L and uint(s[i+1]) shr 6 == 0b10 and
  177. uint(s[i+2]) shr 6 == 0b10 and
  178. uint(s[i+3]) shr 6 == 0b10:
  179. inc i, 4
  180. else: return i
  181. else:
  182. return i
  183. return -1
  184. template fastToUTF8Copy*(c: Rune, s: var string, pos: int, doInc = true) =
  185. ## Copies UTF-8 representation of ``c`` into the preallocated string ``s``
  186. ## starting at position ``pos``.
  187. ##
  188. ## If ``doInc == true`` (default), ``pos`` is incremented
  189. ## by the number of bytes that have been processed.
  190. ##
  191. ## To be the most efficient, make sure ``s`` is preallocated
  192. ## with an additional amount equal to the byte length of ``c``.
  193. ##
  194. ## See also:
  195. ## * `validateUtf8 proc <#validateUtf8,string>`_
  196. ## * `toUTF8 proc <#toUTF8,Rune>`_
  197. ## * `$ proc <#$,Rune>`_ alias for `toUTF8`
  198. var i = RuneImpl(c)
  199. if i <=% 127:
  200. s.setLen(pos+1)
  201. s[pos+0] = chr(i)
  202. when doInc: inc(pos)
  203. elif i <=% 0x07FF:
  204. s.setLen(pos+2)
  205. s[pos+0] = chr((i shr 6) or 0b110_00000)
  206. s[pos+1] = chr((i and ones(6)) or 0b10_0000_00)
  207. when doInc: inc(pos, 2)
  208. elif i <=% 0xFFFF:
  209. s.setLen(pos+3)
  210. s[pos+0] = chr(i shr 12 or 0b1110_0000)
  211. s[pos+1] = chr(i shr 6 and ones(6) or 0b10_0000_00)
  212. s[pos+2] = chr(i and ones(6) or 0b10_0000_00)
  213. when doInc: inc(pos, 3)
  214. elif i <=% 0x001FFFFF:
  215. s.setLen(pos+4)
  216. s[pos+0] = chr(i shr 18 or 0b1111_0000)
  217. s[pos+1] = chr(i shr 12 and ones(6) or 0b10_0000_00)
  218. s[pos+2] = chr(i shr 6 and ones(6) or 0b10_0000_00)
  219. s[pos+3] = chr(i and ones(6) or 0b10_0000_00)
  220. when doInc: inc(pos, 4)
  221. elif i <=% 0x03FFFFFF:
  222. s.setLen(pos+5)
  223. s[pos+0] = chr(i shr 24 or 0b111110_00)
  224. s[pos+1] = chr(i shr 18 and ones(6) or 0b10_0000_00)
  225. s[pos+2] = chr(i shr 12 and ones(6) or 0b10_0000_00)
  226. s[pos+3] = chr(i shr 6 and ones(6) or 0b10_0000_00)
  227. s[pos+4] = chr(i and ones(6) or 0b10_0000_00)
  228. when doInc: inc(pos, 5)
  229. elif i <=% 0x7FFFFFFF:
  230. s.setLen(pos+6)
  231. s[pos+0] = chr(i shr 30 or 0b1111110_0)
  232. s[pos+1] = chr(i shr 24 and ones(6) or 0b10_0000_00)
  233. s[pos+2] = chr(i shr 18 and ones(6) or 0b10_0000_00)
  234. s[pos+3] = chr(i shr 12 and ones(6) or 0b10_0000_00)
  235. s[pos+4] = chr(i shr 6 and ones(6) or 0b10_0000_00)
  236. s[pos+5] = chr(i and ones(6) or 0b10_0000_00)
  237. when doInc: inc(pos, 6)
  238. else:
  239. discard # error, exception?
  240. proc toUTF8*(c: Rune): string {.rtl, extern: "nuc$1".} =
  241. ## Converts a rune into its UTF-8 representation.
  242. ##
  243. ## See also:
  244. ## * `validateUtf8 proc <#validateUtf8,string>`_
  245. ## * `$ proc <#$,Rune>`_ alias for `toUTF8`
  246. ## * `utf8 iterator <#utf8.i,string>`_
  247. ## * `fastToUTF8Copy template <#fastToUTF8Copy.t,Rune,string,int>`_
  248. runnableExamples:
  249. let a = "añyóng"
  250. doAssert a.runeAt(1).toUTF8 == "ñ"
  251. result = ""
  252. fastToUTF8Copy(c, result, 0, false)
  253. proc add*(s: var string; c: Rune) =
  254. ## Adds a rune ``c`` to a string ``s``.
  255. runnableExamples:
  256. var s = "abc"
  257. let c = "ä".runeAt(0)
  258. s.add(c)
  259. doAssert s == "abcä"
  260. let pos = s.len
  261. fastToUTF8Copy(c, s, pos, false)
  262. proc `$`*(rune: Rune): string =
  263. ## An alias for `toUTF8 <#toUTF8,Rune>`_.
  264. ##
  265. ## See also:
  266. ## * `validateUtf8 proc <#validateUtf8,string>`_
  267. ## * `fastToUTF8Copy template <#fastToUTF8Copy.t,Rune,string,int>`_
  268. rune.toUTF8
  269. proc `$`*(runes: seq[Rune]): string =
  270. ## Converts a sequence of Runes to a string.
  271. ##
  272. ## See also:
  273. ## * `toRunes <#toRunes,string>`_ for a reverse operation
  274. runnableExamples:
  275. let
  276. someString = "öÑ"
  277. someRunes = toRunes(someString)
  278. doAssert $someRunes == someString
  279. result = ""
  280. for rune in runes:
  281. result.add rune
  282. proc runeOffset*(s: string, pos: Natural, start: Natural = 0): int =
  283. ## Returns the byte position of rune
  284. ## at position ``pos`` in ``s`` with an optional start byte position.
  285. ## Returns the special value -1 if it runs out of the string.
  286. ##
  287. ## **Beware:** This can lead to unoptimized code and slow execution!
  288. ## Most problems can be solved more efficiently by using an iterator
  289. ## or conversion to a seq of Rune.
  290. ##
  291. ## See also:
  292. ## * `runeReverseOffset proc <#runeReverseOffset,string,Positive>`_
  293. runnableExamples:
  294. let a = "añyóng"
  295. doAssert a.runeOffset(1) == 1
  296. doAssert a.runeOffset(3) == 4
  297. doAssert a.runeOffset(4) == 6
  298. var
  299. i = 0
  300. o = start
  301. while i < pos:
  302. o += runeLenAt(s, o)
  303. if o >= s.len:
  304. return -1
  305. inc i
  306. return o
  307. proc runeReverseOffset*(s: string, rev: Positive): (int, int) =
  308. ## Returns a tuple with the byte offset of the
  309. ## rune at position ``rev`` in ``s``, counting
  310. ## from the end (starting with 1) and the total
  311. ## number of runes in the string.
  312. ##
  313. ## Returns a negative value for offset if there are too few runes in
  314. ## the string to satisfy the request.
  315. ##
  316. ## **Beware:** This can lead to unoptimized code and slow execution!
  317. ## Most problems can be solved more efficiently by using an iterator
  318. ## or conversion to a seq of Rune.
  319. ##
  320. ## See also:
  321. ## * `runeOffset proc <#runeOffset,string,Natural,Natural>`_
  322. var
  323. a = rev.int
  324. o = 0
  325. x = 0
  326. while o < s.len:
  327. let r = runeLenAt(s, o)
  328. o += r
  329. if a < 0:
  330. x += r
  331. dec a
  332. result = if a > 0: (-a, rev.int-a) else: (x, -a+rev.int)
  333. proc runeAtPos*(s: string, pos: int): Rune =
  334. ## Returns the rune at position ``pos``.
  335. ##
  336. ## **Beware:** This can lead to unoptimized code and slow execution!
  337. ## Most problems can be solved more efficiently by using an iterator
  338. ## or conversion to a seq of Rune.
  339. ##
  340. ## See also:
  341. ## * `runeAt proc <#runeAt,string,Natural>`_
  342. ## * `runeStrAtPos proc <#runeStrAtPos,string,Natural>`_
  343. ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_
  344. fastRuneAt(s, runeOffset(s, pos), result, false)
  345. proc runeStrAtPos*(s: string, pos: Natural): string =
  346. ## Returns the rune at position ``pos`` as UTF8 String.
  347. ##
  348. ## **Beware:** This can lead to unoptimized code and slow execution!
  349. ## Most problems can be solved more efficiently by using an iterator
  350. ## or conversion to a seq of Rune.
  351. ##
  352. ## See also:
  353. ## * `runeAt proc <#runeAt,string,Natural>`_
  354. ## * `runeAtPos proc <#runeAtPos,string,int>`_
  355. ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_
  356. let o = runeOffset(s, pos)
  357. s[o .. (o+runeLenAt(s, o)-1)]
  358. proc runeSubStr*(s: string, pos: int, len: int = int.high): string =
  359. ## Returns the UTF-8 substring starting at code point ``pos``
  360. ## with ``len`` code points.
  361. ##
  362. ## If ``pos`` or ``len`` is negative they count from
  363. ## the end of the string. If ``len`` is not given it means the longest
  364. ## possible string.
  365. runnableExamples:
  366. let s = "Hänsel ««: 10,00€"
  367. doAssert(runeSubStr(s, 0, 2) == "Hä")
  368. doAssert(runeSubStr(s, 10, 1) == ":")
  369. doAssert(runeSubStr(s, -6) == "10,00€")
  370. doAssert(runeSubStr(s, 10) == ": 10,00€")
  371. doAssert(runeSubStr(s, 12, 5) == "10,00")
  372. doAssert(runeSubStr(s, -6, 3) == "10,")
  373. if pos < 0:
  374. let (o, rl) = runeReverseOffset(s, -pos)
  375. if len >= rl:
  376. result = s.substr(o, s.len-1)
  377. elif len < 0:
  378. let e = rl + len
  379. if e < 0:
  380. result = ""
  381. else:
  382. result = s.substr(o, runeOffset(s, e-(rl+pos), o)-1)
  383. else:
  384. result = s.substr(o, runeOffset(s, len, o)-1)
  385. else:
  386. let o = runeOffset(s, pos)
  387. if o < 0:
  388. result = ""
  389. elif len == int.high:
  390. result = s.substr(o, s.len-1)
  391. elif len < 0:
  392. result = runeSubStr(s, pos, s.runeLen+len)
  393. else:
  394. var e = runeOffset(s, len, o)
  395. if e < 0:
  396. e = s.len
  397. result = s.substr(o, e-1)
  398. proc `<=%`*(a, b: Rune): bool =
  399. ## Checks if code point of `a` is smaller or equal to code point of `b`.
  400. runnableExamples:
  401. let
  402. a = "ú".runeAt(0)
  403. b = "ü".runeAt(0)
  404. doAssert a <=% b
  405. return int(a) <=% int(b)
  406. proc `<%`*(a, b: Rune): bool =
  407. ## Checks if code point of `a` is smaller than code point of `b`.
  408. runnableExamples:
  409. let
  410. a = "ú".runeAt(0)
  411. b = "ü".runeAt(0)
  412. doAssert a <% b
  413. return int(a) <% int(b)
  414. proc `==`*(a, b: Rune): bool =
  415. ## Checks if two runes are equal.
  416. return int(a) == int(b)
  417. include "includes/unicode_ranges"
  418. proc binarySearch(c: RuneImpl, tab: openArray[int], len, stride: int): int =
  419. var n = len
  420. var t = 0
  421. while n > 1:
  422. var m = n div 2
  423. var p = t + m*stride
  424. if c >= tab[p]:
  425. t = p
  426. n = n-m
  427. else:
  428. n = m
  429. if n != 0 and c >= tab[t]:
  430. return t
  431. return -1
  432. proc toLower*(c: Rune): Rune {.rtl, extern: "nuc$1".} =
  433. ## Converts ``c`` into lower case. This works for any rune.
  434. ##
  435. ## If possible, prefer ``toLower`` over ``toUpper``.
  436. ##
  437. ## See also:
  438. ## * `toUpper proc <#toUpper,Rune>`_
  439. ## * `toTitle proc <#toTitle,Rune>`_
  440. ## * `isLower proc <#isLower,Rune>`_
  441. var c = RuneImpl(c)
  442. var p = binarySearch(c, toLowerRanges, len(toLowerRanges) div 3, 3)
  443. if p >= 0 and c >= toLowerRanges[p] and c <= toLowerRanges[p+1]:
  444. return Rune(c + toLowerRanges[p+2] - 500)
  445. p = binarySearch(c, toLowerSinglets, len(toLowerSinglets) div 2, 2)
  446. if p >= 0 and c == toLowerSinglets[p]:
  447. return Rune(c + toLowerSinglets[p+1] - 500)
  448. return Rune(c)
  449. proc toUpper*(c: Rune): Rune {.rtl, extern: "nuc$1".} =
  450. ## Converts ``c`` into upper case. This works for any rune.
  451. ##
  452. ## If possible, prefer ``toLower`` over ``toUpper``.
  453. ##
  454. ## See also:
  455. ## * `toLower proc <#toLower,Rune>`_
  456. ## * `toTitle proc <#toTitle,Rune>`_
  457. ## * `isUpper proc <#isUpper,Rune>`_
  458. var c = RuneImpl(c)
  459. var p = binarySearch(c, toUpperRanges, len(toUpperRanges) div 3, 3)
  460. if p >= 0 and c >= toUpperRanges[p] and c <= toUpperRanges[p+1]:
  461. return Rune(c + toUpperRanges[p+2] - 500)
  462. p = binarySearch(c, toUpperSinglets, len(toUpperSinglets) div 2, 2)
  463. if p >= 0 and c == toUpperSinglets[p]:
  464. return Rune(c + toUpperSinglets[p+1] - 500)
  465. return Rune(c)
  466. proc toTitle*(c: Rune): Rune {.rtl, extern: "nuc$1".} =
  467. ## Converts ``c`` to title case.
  468. ##
  469. ## See also:
  470. ## * `toLower proc <#toLower,Rune>`_
  471. ## * `toUpper proc <#toUpper,Rune>`_
  472. ## * `isTitle proc <#isTitle,Rune>`_
  473. var c = RuneImpl(c)
  474. var p = binarySearch(c, toTitleSinglets, len(toTitleSinglets) div 2, 2)
  475. if p >= 0 and c == toTitleSinglets[p]:
  476. return Rune(c + toTitleSinglets[p+1] - 500)
  477. return Rune(c)
  478. proc isLower*(c: Rune): bool {.rtl, extern: "nuc$1".} =
  479. ## Returns true if ``c`` is a lower case rune.
  480. ##
  481. ## If possible, prefer ``isLower`` over ``isUpper``.
  482. ##
  483. ## See also:
  484. ## * `toLower proc <#toLower,Rune>`_
  485. ## * `isUpper proc <#isUpper,Rune>`_
  486. ## * `isTitle proc <#isTitle,Rune>`_
  487. var c = RuneImpl(c)
  488. # Note: toUpperRanges is correct here!
  489. var p = binarySearch(c, toUpperRanges, len(toUpperRanges) div 3, 3)
  490. if p >= 0 and c >= toUpperRanges[p] and c <= toUpperRanges[p+1]:
  491. return true
  492. p = binarySearch(c, toUpperSinglets, len(toUpperSinglets) div 2, 2)
  493. if p >= 0 and c == toUpperSinglets[p]:
  494. return true
  495. proc isUpper*(c: Rune): bool {.rtl, extern: "nuc$1".} =
  496. ## Returns true if ``c`` is a upper case rune.
  497. ##
  498. ## If possible, prefer ``isLower`` over ``isUpper``.
  499. ##
  500. ## See also:
  501. ## * `toUpper proc <#toUpper,Rune>`_
  502. ## * `isLower proc <#isLower,Rune>`_
  503. ## * `isTitle proc <#isTitle,Rune>`_
  504. ## * `isAlpha proc <#isAlpha,Rune>`_
  505. ## * `isWhiteSpace proc <#isWhiteSpace,Rune>`_
  506. var c = RuneImpl(c)
  507. # Note: toLowerRanges is correct here!
  508. var p = binarySearch(c, toLowerRanges, len(toLowerRanges) div 3, 3)
  509. if p >= 0 and c >= toLowerRanges[p] and c <= toLowerRanges[p+1]:
  510. return true
  511. p = binarySearch(c, toLowerSinglets, len(toLowerSinglets) div 2, 2)
  512. if p >= 0 and c == toLowerSinglets[p]:
  513. return true
  514. proc isAlpha*(c: Rune): bool {.rtl, extern: "nuc$1".} =
  515. ## Returns true if ``c`` is an *alpha* rune (i.e., a letter).
  516. ##
  517. ## See also:
  518. ## * `isLower proc <#isLower,Rune>`_
  519. ## * `isTitle proc <#isTitle,Rune>`_
  520. ## * `isAlpha proc <#isAlpha,Rune>`_
  521. ## * `isWhiteSpace proc <#isWhiteSpace,Rune>`_
  522. ## * `isCombining proc <#isCombining,Rune>`_
  523. if isUpper(c) or isLower(c):
  524. return true
  525. var c = RuneImpl(c)
  526. var p = binarySearch(c, alphaRanges, len(alphaRanges) div 2, 2)
  527. if p >= 0 and c >= alphaRanges[p] and c <= alphaRanges[p+1]:
  528. return true
  529. p = binarySearch(c, alphaSinglets, len(alphaSinglets), 1)
  530. if p >= 0 and c == alphaSinglets[p]:
  531. return true
  532. proc isTitle*(c: Rune): bool {.rtl, extern: "nuc$1".} =
  533. ## Returns true if ``c`` is a Unicode titlecase code point.
  534. ##
  535. ## See also:
  536. ## * `toTitle proc <#toTitle,Rune>`_
  537. ## * `isLower proc <#isLower,Rune>`_
  538. ## * `isUpper proc <#isUpper,Rune>`_
  539. ## * `isAlpha proc <#isAlpha,Rune>`_
  540. ## * `isWhiteSpace proc <#isWhiteSpace,Rune>`_
  541. return isUpper(c) and isLower(c)
  542. proc isWhiteSpace*(c: Rune): bool {.rtl, extern: "nuc$1".} =
  543. ## Returns true if ``c`` is a Unicode whitespace code point.
  544. ##
  545. ## See also:
  546. ## * `isLower proc <#isLower,Rune>`_
  547. ## * `isUpper proc <#isUpper,Rune>`_
  548. ## * `isTitle proc <#isTitle,Rune>`_
  549. ## * `isAlpha proc <#isAlpha,Rune>`_
  550. var c = RuneImpl(c)
  551. var p = binarySearch(c, spaceRanges, len(spaceRanges) div 2, 2)
  552. if p >= 0 and c >= spaceRanges[p] and c <= spaceRanges[p+1]:
  553. return true
  554. proc isCombining*(c: Rune): bool {.rtl, extern: "nuc$1".} =
  555. ## Returns true if ``c`` is a Unicode combining code unit.
  556. ##
  557. ## See also:
  558. ## * `isLower proc <#isLower,Rune>`_
  559. ## * `isUpper proc <#isUpper,Rune>`_
  560. ## * `isTitle proc <#isTitle,Rune>`_
  561. ## * `isAlpha proc <#isAlpha,Rune>`_
  562. var c = RuneImpl(c)
  563. # Optimized to return false immediately for ASCII
  564. return c >= 0x0300 and (c <= 0x036f or
  565. (c >= 0x1ab0 and c <= 0x1aff) or
  566. (c >= 0x1dc0 and c <= 0x1dff) or
  567. (c >= 0x20d0 and c <= 0x20ff) or
  568. (c >= 0xfe20 and c <= 0xfe2f))
  569. template runeCheck(s, runeProc) =
  570. ## Common code for isAlpha and isSpace.
  571. result = if len(s) == 0: false else: true
  572. var
  573. i = 0
  574. rune: Rune
  575. while i < len(s) and result:
  576. fastRuneAt(s, i, rune, doInc = true)
  577. result = runeProc(rune) and result
  578. proc isAlpha*(s: string): bool {.noSideEffect,
  579. rtl, extern: "nuc$1Str".} =
  580. ## Returns true if ``s`` contains all alphabetic runes.
  581. runnableExamples:
  582. let a = "añyóng"
  583. doAssert a.isAlpha
  584. runeCheck(s, isAlpha)
  585. proc isSpace*(s: string): bool {.noSideEffect,
  586. rtl, extern: "nuc$1Str".} =
  587. ## Returns true if ``s`` contains all whitespace runes.
  588. runnableExamples:
  589. let a = "\t\l \v\r\f"
  590. doAssert a.isSpace
  591. runeCheck(s, isWhiteSpace)
  592. template convertRune(s, runeProc) =
  593. ## Convert runes in ``s`` using ``runeProc`` as the converter.
  594. result = newString(len(s))
  595. var
  596. i = 0
  597. resultIndex = 0
  598. rune: Rune
  599. while i < len(s):
  600. fastRuneAt(s, i, rune, doInc = true)
  601. rune = runeProc(rune)
  602. fastToUTF8Copy(rune, result, resultIndex, doInc = true)
  603. proc toUpper*(s: string): string {.noSideEffect,
  604. rtl, extern: "nuc$1Str".} =
  605. ## Converts ``s`` into upper-case runes.
  606. runnableExamples:
  607. doAssert toUpper("abγ") == "ABΓ"
  608. convertRune(s, toUpper)
  609. proc toLower*(s: string): string {.noSideEffect,
  610. rtl, extern: "nuc$1Str".} =
  611. ## Converts ``s`` into lower-case runes.
  612. runnableExamples:
  613. doAssert toLower("ABΓ") == "abγ"
  614. convertRune(s, toLower)
  615. proc swapCase*(s: string): string {.noSideEffect,
  616. rtl, extern: "nuc$1".} =
  617. ## Swaps the case of runes in ``s``.
  618. ##
  619. ## Returns a new string such that the cases of all runes
  620. ## are swapped if possible.
  621. runnableExamples:
  622. doAssert swapCase("Αlpha Βeta Γamma") == "αLPHA βETA γAMMA"
  623. var
  624. i = 0
  625. resultIndex = 0
  626. rune: Rune
  627. result = newString(len(s))
  628. while i < len(s):
  629. fastRuneAt(s, i, rune)
  630. if rune.isUpper():
  631. rune = rune.toLower()
  632. elif rune.isLower():
  633. rune = rune.toUpper()
  634. fastToUTF8Copy(rune, result, resultIndex, doInc = true)
  635. proc capitalize*(s: string): string {.noSideEffect,
  636. rtl, extern: "nuc$1".} =
  637. ## Converts the first character of ``s`` into an upper-case rune.
  638. runnableExamples:
  639. doAssert capitalize("βeta") == "Βeta"
  640. if len(s) == 0:
  641. return ""
  642. var
  643. rune: Rune
  644. i = 0
  645. fastRuneAt(s, i, rune, doInc = true)
  646. result = $toUpper(rune) & substr(s, i)
  647. proc translate*(s: string, replacements: proc(key: string): string): string {.
  648. rtl, extern: "nuc$1".} =
  649. ## Translates words in a string using the ``replacements`` proc to substitute
  650. ## words inside ``s`` with their replacements.
  651. ##
  652. ## ``replacements`` is any proc that takes a word and returns
  653. ## a new word to fill it's place.
  654. runnableExamples:
  655. proc wordToNumber(s: string): string =
  656. case s
  657. of "one": "1"
  658. of "two": "2"
  659. else: s
  660. let a = "one two three four"
  661. doAssert a.translate(wordToNumber) == "1 2 three four"
  662. # Allocate memory for the new string based on the old one.
  663. # If the new string length is less than the old, no allocations
  664. # will be needed. If the new string length is greater than the
  665. # old, then maybe only one allocation is needed
  666. result = newStringOfCap(s.len)
  667. var
  668. index = 0
  669. lastIndex = 0
  670. wordStart = 0
  671. inWord = false
  672. rune: Rune
  673. while index < len(s):
  674. lastIndex = index
  675. fastRuneAt(s, index, rune)
  676. let whiteSpace = rune.isWhiteSpace()
  677. if whiteSpace and inWord:
  678. # If we've reached the end of a word
  679. let word = s[wordStart ..< lastIndex]
  680. result.add(replacements(word))
  681. result.add($rune)
  682. inWord = false
  683. elif not whiteSpace and not inWord:
  684. # If we've hit a non space character and
  685. # are not currently in a word, track
  686. # the starting index of the word
  687. inWord = true
  688. wordStart = lastIndex
  689. elif whiteSpace:
  690. result.add($rune)
  691. if wordStart < len(s) and inWord:
  692. # Get the trailing word at the end
  693. let word = s[wordStart .. ^1]
  694. result.add(replacements(word))
  695. proc title*(s: string): string {.noSideEffect,
  696. rtl, extern: "nuc$1".} =
  697. ## Converts ``s`` to a unicode title.
  698. ##
  699. ## Returns a new string such that the first character
  700. ## in each word inside ``s`` is capitalized.
  701. runnableExamples:
  702. doAssert title("αlpha βeta γamma") == "Αlpha Βeta Γamma"
  703. var
  704. i = 0
  705. resultIndex = 0
  706. rune: Rune
  707. result = newString(len(s))
  708. var firstRune = true
  709. while i < len(s):
  710. fastRuneAt(s, i, rune)
  711. if not rune.isWhiteSpace() and firstRune:
  712. rune = rune.toUpper()
  713. firstRune = false
  714. elif rune.isWhiteSpace():
  715. firstRune = true
  716. fastToUTF8Copy(rune, result, resultIndex, doInc = true)
  717. iterator runes*(s: string): Rune =
  718. ## Iterates over any rune of the string ``s`` returning runes.
  719. var
  720. i = 0
  721. result: Rune
  722. while i < len(s):
  723. fastRuneAt(s, i, result, true)
  724. yield result
  725. iterator utf8*(s: string): string =
  726. ## Iterates over any rune of the string ``s`` returning utf8 values.
  727. ##
  728. ## See also:
  729. ## * `validateUtf8 proc <#validateUtf8,string>`_
  730. ## * `toUTF8 proc <#toUTF8,Rune>`_
  731. ## * `$ proc <#$,Rune>`_ alias for `toUTF8`
  732. ## * `fastToUTF8Copy template <#fastToUTF8Copy.t,Rune,string,int>`_
  733. var o = 0
  734. while o < s.len:
  735. let n = runeLenAt(s, o)
  736. yield s[o .. (o+n-1)]
  737. o += n
  738. proc toRunes*(s: string): seq[Rune] =
  739. ## Obtains a sequence containing the Runes in ``s``.
  740. ##
  741. ## See also:
  742. ## * `$ proc <#$,Rune>`_ for a reverse operation
  743. runnableExamples:
  744. let a = toRunes("aáä")
  745. doAssert a == @["a".runeAt(0), "á".runeAt(0), "ä".runeAt(0)]
  746. result = newSeq[Rune]()
  747. for r in s.runes:
  748. result.add(r)
  749. proc cmpRunesIgnoreCase*(a, b: string): int {.rtl, extern: "nuc$1".} =
  750. ## Compares two UTF-8 strings and ignores the case. Returns:
  751. ##
  752. ## | 0 if a == b
  753. ## | < 0 if a < b
  754. ## | > 0 if a > b
  755. var i = 0
  756. var j = 0
  757. var ar, br: Rune
  758. while i < a.len and j < b.len:
  759. # slow path:
  760. fastRuneAt(a, i, ar)
  761. fastRuneAt(b, j, br)
  762. result = RuneImpl(toLower(ar)) - RuneImpl(toLower(br))
  763. if result != 0: return
  764. result = a.len - b.len
  765. proc reversed*(s: string): string =
  766. ## Returns the reverse of ``s``, interpreting it as runes.
  767. ##
  768. ## Unicode combining characters are correctly interpreted as well.
  769. runnableExamples:
  770. assert reversed("Reverse this!") == "!siht esreveR"
  771. assert reversed("先秦兩漢") == "漢兩秦先"
  772. assert reversed("as⃝df̅") == "f̅ds⃝a"
  773. assert reversed("a⃞b⃞c⃞") == "c⃞b⃞a⃞"
  774. var
  775. i = 0
  776. lastI = 0
  777. newPos = len(s) - 1
  778. blockPos = 0
  779. r: Rune
  780. template reverseUntil(pos) =
  781. var j = pos - 1
  782. while j > blockPos:
  783. result[newPos] = s[j]
  784. dec j
  785. dec newPos
  786. blockPos = pos - 1
  787. result = newString(len(s))
  788. while i < len(s):
  789. lastI = i
  790. fastRuneAt(s, i, r, true)
  791. if not isCombining(r):
  792. reverseUntil(lastI)
  793. reverseUntil(len(s))
  794. proc graphemeLen*(s: string; i: Natural): Natural =
  795. ## The number of bytes belonging to byte index ``s[i]``,
  796. ## including following combining code unit.
  797. runnableExamples:
  798. let a = "añyóng"
  799. doAssert a.graphemeLen(1) == 2 ## ñ
  800. doAssert a.graphemeLen(2) == 1
  801. doAssert a.graphemeLen(4) == 2 ## ó
  802. var j = i.int
  803. var r, r2: Rune
  804. if j < s.len:
  805. fastRuneAt(s, j, r, true)
  806. result = j-i
  807. while j < s.len:
  808. fastRuneAt(s, j, r2, true)
  809. if not isCombining(r2): break
  810. result = j-i
  811. proc lastRune*(s: string; last: int): (Rune, int) =
  812. ## Length of the last rune in ``s[0..last]``. Returns the rune and its length
  813. ## in bytes.
  814. if s[last] <= chr(127):
  815. result = (Rune(s[last]), 1)
  816. else:
  817. var L = 0
  818. while last-L >= 0 and uint(s[last-L]) shr 6 == 0b10: inc(L)
  819. var r: Rune
  820. fastRuneAt(s, last-L, r, false)
  821. result = (r, L+1)
  822. proc size*(r: Rune): int {.noSideEffect.} =
  823. ## Returns the number of bytes the rune ``r`` takes.
  824. runnableExamples:
  825. let a = toRunes "aá"
  826. doAssert size(a[0]) == 1
  827. doAssert size(a[1]) == 2
  828. let v = r.uint32
  829. if v <= 0x007F'u32: result = 1
  830. elif v <= 0x07FF'u32: result = 2
  831. elif v <= 0xFFFF'u32: result = 3
  832. elif v <= 0x1FFFFF'u32: result = 4
  833. elif v <= 0x3FFFFFF'u32: result = 5
  834. elif v <= 0x7FFFFFFF'u32: result = 6
  835. else: result = 1
  836. # --------- Private templates for different split separators -----------
  837. proc stringHasSep(s: string, index: int, seps: openArray[Rune]): bool =
  838. var rune: Rune
  839. fastRuneAt(s, index, rune, false)
  840. return seps.contains(rune)
  841. proc stringHasSep(s: string, index: int, sep: Rune): bool =
  842. var rune: Rune
  843. fastRuneAt(s, index, rune, false)
  844. return sep == rune
  845. template splitCommon(s, sep, maxsplit: untyped) =
  846. ## Common code for split procedures.
  847. let
  848. sLen = len(s)
  849. var
  850. last = 0
  851. splits = maxsplit
  852. if sLen > 0:
  853. while last <= sLen:
  854. var first = last
  855. while last < sLen and not stringHasSep(s, last, sep):
  856. inc(last, runeLenAt(s, last))
  857. if splits == 0: last = sLen
  858. yield s[first .. (last - 1)]
  859. if splits == 0: break
  860. dec(splits)
  861. inc(last, if last < sLen: runeLenAt(s, last) else: 1)
  862. iterator split*(s: string, seps: openArray[Rune] = unicodeSpaces,
  863. maxsplit: int = -1): string =
  864. ## Splits the unicode string ``s`` into substrings using a group of separators.
  865. ##
  866. ## Substrings are separated by a substring containing only ``seps``.
  867. runnableExamples:
  868. import std/sequtils
  869. assert toSeq("hÃllo\lthis\lis an\texample\l是".split) ==
  870. @["hÃllo", "this", "is", "an", "example", "是"]
  871. # And the following code splits the same string using a sequence of Runes.
  872. assert toSeq(split("añyóng:hÃllo;是$example", ";:$".toRunes)) ==
  873. @["añyóng", "hÃllo", "是", "example"]
  874. # example with a `Rune` separator and unused one `;`:
  875. assert toSeq(split("ab是de:f:", ";:是".toRunes)) == @["ab", "de", "f", ""]
  876. # Another example that splits a string containing a date.
  877. let date = "2012-11-20T22:08:08.398990"
  878. assert toSeq(split(date, " -:T".toRunes)) ==
  879. @["2012", "11", "20", "22", "08", "08.398990"]
  880. splitCommon(s, seps, maxsplit)
  881. iterator splitWhitespace*(s: string): string =
  882. ## Splits a unicode string at whitespace runes.
  883. splitCommon(s, unicodeSpaces, -1)
  884. template accResult(iter: untyped) =
  885. result = @[]
  886. for x in iter: add(result, x)
  887. proc splitWhitespace*(s: string): seq[string] {.noSideEffect,
  888. rtl, extern: "ncuSplitWhitespace".} =
  889. ## The same as the `splitWhitespace <#splitWhitespace.i,string>`_
  890. ## iterator, but is a proc that returns a sequence of substrings.
  891. accResult(splitWhitespace(s))
  892. iterator split*(s: string, sep: Rune, maxsplit: int = -1): string =
  893. ## Splits the unicode string ``s`` into substrings using a single separator.
  894. ## Substrings are separated by the rune ``sep``.
  895. runnableExamples:
  896. import std/sequtils
  897. assert toSeq(split(";;hÃllo;this;is;an;;example;;;是", ";".runeAt(0))) ==
  898. @["", "", "hÃllo", "this", "is", "an", "", "example", "", "", "是"]
  899. splitCommon(s, sep, maxsplit)
  900. proc split*(s: string, seps: openArray[Rune] = unicodeSpaces, maxsplit: int = -1):
  901. seq[string] {.noSideEffect, rtl, extern: "nucSplitRunes".} =
  902. ## The same as the `split iterator <#split.i,string,openArray[Rune],int>`_,
  903. ## but is a proc that returns a sequence of substrings.
  904. accResult(split(s, seps, maxsplit))
  905. proc split*(s: string, sep: Rune, maxsplit: int = -1): seq[string] {.noSideEffect,
  906. rtl, extern: "nucSplitRune".} =
  907. ## The same as the `split iterator <#split.i,string,Rune,int>`_, but is a proc
  908. ## that returns a sequence of substrings.
  909. accResult(split(s, sep, maxsplit))
  910. proc strip*(s: string, leading = true, trailing = true,
  911. runes: openArray[Rune] = unicodeSpaces): string {.noSideEffect,
  912. rtl, extern: "nucStrip".} =
  913. ## Strips leading or trailing ``runes`` from ``s`` and returns
  914. ## the resulting string.
  915. ##
  916. ## If ``leading`` is true (default), leading ``runes`` are stripped.
  917. ## If ``trailing`` is true (default), trailing ``runes`` are stripped.
  918. ## If both are false, the string is returned unchanged.
  919. runnableExamples:
  920. let a = "\táñyóng "
  921. doAssert a.strip == "áñyóng"
  922. doAssert a.strip(leading = false) == "\táñyóng"
  923. doAssert a.strip(trailing = false) == "áñyóng "
  924. var
  925. sI = 0 ## starting index into string ``s``
  926. eI = len(s) - 1 ## ending index into ``s``, where the last ``Rune`` starts
  927. if leading:
  928. var
  929. i = 0
  930. xI: int ## value of ``sI`` at the beginning of the iteration
  931. rune: Rune
  932. while i < len(s):
  933. xI = i
  934. fastRuneAt(s, i, rune)
  935. sI = i # Assume to start from next rune
  936. if not runes.contains(rune):
  937. sI = xI # Go back to where the current rune starts
  938. break
  939. if trailing:
  940. var
  941. i = eI
  942. xI: int
  943. rune: Rune
  944. while i >= 0:
  945. xI = i
  946. fastRuneAt(s, xI, rune)
  947. var yI = i - 1
  948. while yI >= 0:
  949. var
  950. yIend = yI
  951. pRune: Rune
  952. fastRuneAt(s, yIend, pRune)
  953. if yIend < xI: break
  954. i = yI
  955. rune = pRune
  956. dec(yI)
  957. if not runes.contains(rune):
  958. eI = xI - 1
  959. break
  960. dec(i)
  961. let newLen = eI - sI + 1
  962. result = newStringOfCap(newLen)
  963. if newLen > 0:
  964. result.add s[sI .. eI]
  965. proc repeat*(c: Rune, count: Natural): string {.noSideEffect,
  966. rtl, extern: "nucRepeatRune".} =
  967. ## Returns a string of ``count`` Runes ``c``.
  968. ##
  969. ## The returned string will have a rune-length of ``count``.
  970. runnableExamples:
  971. let a = "ñ".runeAt(0)
  972. doAssert a.repeat(5) == "ñññññ"
  973. let s = $c
  974. result = newStringOfCap(count * s.len)
  975. for i in 0 ..< count:
  976. result.add s
  977. proc align*(s: string, count: Natural, padding = ' '.Rune): string {.
  978. noSideEffect, rtl, extern: "nucAlignString".} =
  979. ## Aligns a unicode string ``s`` with ``padding``, so that it has a rune-length
  980. ## of ``count``.
  981. ##
  982. ## ``padding`` characters (by default spaces) are added before ``s`` resulting in
  983. ## right alignment. If ``s.runelen >= count``, no spaces are added and ``s`` is
  984. ## returned unchanged. If you need to left align a string use the `alignLeft
  985. ## proc <#alignLeft,string,Natural>`_.
  986. runnableExamples:
  987. assert align("abc", 4) == " abc"
  988. assert align("a", 0) == "a"
  989. assert align("1232", 6) == " 1232"
  990. assert align("1232", 6, '#'.Rune) == "##1232"
  991. assert align("Åge", 5) == " Åge"
  992. assert align("×", 4, '_'.Rune) == "___×"
  993. let sLen = s.runeLen
  994. if sLen < count:
  995. let padStr = $padding
  996. result = newStringOfCap(padStr.len * count)
  997. let spaces = count - sLen
  998. for i in 0 ..< spaces: result.add padStr
  999. result.add s
  1000. else:
  1001. result = s
  1002. proc alignLeft*(s: string, count: Natural, padding = ' '.Rune): string {.
  1003. noSideEffect.} =
  1004. ## Left-aligns a unicode string ``s`` with ``padding``, so that it has a
  1005. ## rune-length of ``count``.
  1006. ##
  1007. ## ``padding`` characters (by default spaces) are added after ``s`` resulting in
  1008. ## left alignment. If ``s.runelen >= count``, no spaces are added and ``s`` is
  1009. ## returned unchanged. If you need to right align a string use the `align
  1010. ## proc <#align,string,Natural>`_.
  1011. runnableExamples:
  1012. assert alignLeft("abc", 4) == "abc "
  1013. assert alignLeft("a", 0) == "a"
  1014. assert alignLeft("1232", 6) == "1232 "
  1015. assert alignLeft("1232", 6, '#'.Rune) == "1232##"
  1016. assert alignLeft("Åge", 5) == "Åge "
  1017. assert alignLeft("×", 4, '_'.Rune) == "×___"
  1018. let sLen = s.runeLen
  1019. if sLen < count:
  1020. let padStr = $padding
  1021. result = newStringOfCap(s.len + (count - sLen) * padStr.len)
  1022. result.add s
  1023. for i in sLen ..< count:
  1024. result.add padStr
  1025. else:
  1026. result = s