unicode.nim 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module provides support to handle the Unicode UTF-8 encoding.
  10. ##
  11. ## There are no specialized ``insert``, ``delete``, ``add`` and ``contains``
  12. ## procedures for ``seq[Rune]`` in this module because the generic variants
  13. ## of these procedures in the system module already work with it.
  14. ##
  15. ## The current version is compatible with Unicode v12.0.0.
  16. ##
  17. ## **See also:**
  18. ## * `strutils module <strutils.html>`_
  19. ## * `unidecode module <unidecode.html>`_
  20. ## * `encodings module <encodings.html>`_
  21. include "system/inclrtl"
  22. type
  23. RuneImpl = int32 # underlying type of Rune
  24. Rune* = distinct RuneImpl ## \
  25. ## Type that can hold a single Unicode code point.
  26. ##
  27. ## A Rune may be composed with other Runes to a character on the screen.
  28. ## `RuneImpl` is the underlying type used to store Runes, currently `int32`.
  29. template ones(n: untyped): untyped = ((1 shl n)-1)
  30. proc runeLen*(s: string): int {.rtl, extern: "nuc$1".} =
  31. ## Returns the number of runes of the string ``s``.
  32. runnableExamples:
  33. let a = "añyóng"
  34. doAssert a.runeLen == 6
  35. ## note: a.len == 8
  36. result = 0
  37. var i = 0
  38. while i < len(s):
  39. if uint(s[i]) <= 127: inc(i)
  40. elif uint(s[i]) shr 5 == 0b110: inc(i, 2)
  41. elif uint(s[i]) shr 4 == 0b1110: inc(i, 3)
  42. elif uint(s[i]) shr 3 == 0b11110: inc(i, 4)
  43. elif uint(s[i]) shr 2 == 0b111110: inc(i, 5)
  44. elif uint(s[i]) shr 1 == 0b1111110: inc(i, 6)
  45. else: inc i
  46. inc(result)
  47. proc runeLenAt*(s: string, i: Natural): int =
  48. ## Returns the number of bytes the rune starting at ``s[i]`` takes.
  49. ##
  50. ## See also:
  51. ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_
  52. runnableExamples:
  53. let a = "añyóng"
  54. doAssert a.runeLenAt(0) == 1
  55. doAssert a.runeLenAt(1) == 2
  56. if uint(s[i]) <= 127: result = 1
  57. elif uint(s[i]) shr 5 == 0b110: result = 2
  58. elif uint(s[i]) shr 4 == 0b1110: result = 3
  59. elif uint(s[i]) shr 3 == 0b11110: result = 4
  60. elif uint(s[i]) shr 2 == 0b111110: result = 5
  61. elif uint(s[i]) shr 1 == 0b1111110: result = 6
  62. else: result = 1
  63. const replRune = Rune(0xFFFD)
  64. template fastRuneAt*(s: string, i: int, result: untyped, doInc = true) =
  65. ## Returns the rune ``s[i]`` in ``result``.
  66. ##
  67. ## If ``doInc == true`` (default), ``i`` is incremented by the number
  68. ## of bytes that have been processed.
  69. bind ones
  70. if uint(s[i]) <= 127:
  71. result = Rune(uint(s[i]))
  72. when doInc: inc(i)
  73. elif uint(s[i]) shr 5 == 0b110:
  74. # assert(uint(s[i+1]) shr 6 == 0b10)
  75. if i <= s.len - 2:
  76. result = Rune((uint(s[i]) and (ones(5))) shl 6 or
  77. (uint(s[i+1]) and ones(6)))
  78. when doInc: inc(i, 2)
  79. else:
  80. result = replRune
  81. when doInc: inc(i)
  82. elif uint(s[i]) shr 4 == 0b1110:
  83. # assert(uint(s[i+1]) shr 6 == 0b10)
  84. # assert(uint(s[i+2]) shr 6 == 0b10)
  85. if i <= s.len - 3:
  86. result = Rune((uint(s[i]) and ones(4)) shl 12 or
  87. (uint(s[i+1]) and ones(6)) shl 6 or
  88. (uint(s[i+2]) and ones(6)))
  89. when doInc: inc(i, 3)
  90. else:
  91. result = replRune
  92. when doInc: inc(i)
  93. elif uint(s[i]) shr 3 == 0b11110:
  94. # assert(uint(s[i+1]) shr 6 == 0b10)
  95. # assert(uint(s[i+2]) shr 6 == 0b10)
  96. # assert(uint(s[i+3]) shr 6 == 0b10)
  97. if i <= s.len - 4:
  98. result = Rune((uint(s[i]) and ones(3)) shl 18 or
  99. (uint(s[i+1]) and ones(6)) shl 12 or
  100. (uint(s[i+2]) and ones(6)) shl 6 or
  101. (uint(s[i+3]) and ones(6)))
  102. when doInc: inc(i, 4)
  103. else:
  104. result = replRune
  105. when doInc: inc(i)
  106. elif uint(s[i]) shr 2 == 0b111110:
  107. # assert(uint(s[i+1]) shr 6 == 0b10)
  108. # assert(uint(s[i+2]) shr 6 == 0b10)
  109. # assert(uint(s[i+3]) shr 6 == 0b10)
  110. # assert(uint(s[i+4]) shr 6 == 0b10)
  111. if i <= s.len - 5:
  112. result = Rune((uint(s[i]) and ones(2)) shl 24 or
  113. (uint(s[i+1]) and ones(6)) shl 18 or
  114. (uint(s[i+2]) and ones(6)) shl 12 or
  115. (uint(s[i+3]) and ones(6)) shl 6 or
  116. (uint(s[i+4]) and ones(6)))
  117. when doInc: inc(i, 5)
  118. else:
  119. result = replRune
  120. when doInc: inc(i)
  121. elif uint(s[i]) shr 1 == 0b1111110:
  122. # assert(uint(s[i+1]) shr 6 == 0b10)
  123. # assert(uint(s[i+2]) shr 6 == 0b10)
  124. # assert(uint(s[i+3]) shr 6 == 0b10)
  125. # assert(uint(s[i+4]) shr 6 == 0b10)
  126. # assert(uint(s[i+5]) shr 6 == 0b10)
  127. if i <= s.len - 6:
  128. result = Rune((uint(s[i]) and ones(1)) shl 30 or
  129. (uint(s[i+1]) and ones(6)) shl 24 or
  130. (uint(s[i+2]) and ones(6)) shl 18 or
  131. (uint(s[i+3]) and ones(6)) shl 12 or
  132. (uint(s[i+4]) and ones(6)) shl 6 or
  133. (uint(s[i+5]) and ones(6)))
  134. when doInc: inc(i, 6)
  135. else:
  136. result = replRune
  137. when doInc: inc(i)
  138. else:
  139. result = Rune(uint(s[i]))
  140. when doInc: inc(i)
  141. proc runeAt*(s: string, i: Natural): Rune =
  142. ## Returns the rune in ``s`` at **byte index** ``i``.
  143. ##
  144. ## See also:
  145. ## * `runeAtPos proc <#runeAtPos,string,int>`_
  146. ## * `runeStrAtPos proc <#runeStrAtPos,string,Natural>`_
  147. ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_
  148. runnableExamples:
  149. let a = "añyóng"
  150. doAssert a.runeAt(1) == "ñ".runeAt(0)
  151. doAssert a.runeAt(2) == "ñ".runeAt(1)
  152. doAssert a.runeAt(3) == "y".runeAt(0)
  153. fastRuneAt(s, i, result, false)
  154. proc validateUtf8*(s: string): int =
  155. ## Returns the position of the invalid byte in ``s`` if the string ``s`` does
  156. ## not hold valid UTF-8 data. Otherwise ``-1`` is returned.
  157. ##
  158. ## See also:
  159. ## * `toUTF8 proc <#toUTF8,Rune>`_
  160. ## * `$ proc <#$,Rune>`_ alias for `toUTF8`
  161. ## * `fastToUTF8Copy template <#fastToUTF8Copy.t,Rune,string,int>`_
  162. var i = 0
  163. let L = s.len
  164. while i < L:
  165. if uint(s[i]) <= 127:
  166. inc(i)
  167. elif uint(s[i]) shr 5 == 0b110:
  168. if uint(s[i]) < 0xc2: return i # Catch overlong ascii representations.
  169. if i+1 < L and uint(s[i+1]) shr 6 == 0b10: inc(i, 2)
  170. else: return i
  171. elif uint(s[i]) shr 4 == 0b1110:
  172. if i+2 < L and uint(s[i+1]) shr 6 == 0b10 and uint(s[i+2]) shr 6 == 0b10:
  173. inc i, 3
  174. else: return i
  175. elif uint(s[i]) shr 3 == 0b11110:
  176. if i+3 < L and uint(s[i+1]) shr 6 == 0b10 and
  177. uint(s[i+2]) shr 6 == 0b10 and
  178. uint(s[i+3]) shr 6 == 0b10:
  179. inc i, 4
  180. else: return i
  181. else:
  182. return i
  183. return -1
  184. template fastToUTF8Copy*(c: Rune, s: var string, pos: int, doInc = true) =
  185. ## Copies UTF-8 representation of ``c`` into the preallocated string ``s``
  186. ## starting at position ``pos``.
  187. ##
  188. ## If ``doInc == true`` (default), ``pos`` is incremented
  189. ## by the number of bytes that have been processed.
  190. ##
  191. ## To be the most efficient, make sure ``s`` is preallocated
  192. ## with an additional amount equal to the byte length of ``c``.
  193. ##
  194. ## See also:
  195. ## * `validateUtf8 proc <#validateUtf8,string>`_
  196. ## * `toUTF8 proc <#toUTF8,Rune>`_
  197. ## * `$ proc <#$,Rune>`_ alias for `toUTF8`
  198. var i = RuneImpl(c)
  199. if i <=% 127:
  200. s.setLen(pos+1)
  201. s[pos+0] = chr(i)
  202. when doInc: inc(pos)
  203. elif i <=% 0x07FF:
  204. s.setLen(pos+2)
  205. s[pos+0] = chr((i shr 6) or 0b110_00000)
  206. s[pos+1] = chr((i and ones(6)) or 0b10_0000_00)
  207. when doInc: inc(pos, 2)
  208. elif i <=% 0xFFFF:
  209. s.setLen(pos+3)
  210. s[pos+0] = chr(i shr 12 or 0b1110_0000)
  211. s[pos+1] = chr(i shr 6 and ones(6) or 0b10_0000_00)
  212. s[pos+2] = chr(i and ones(6) or 0b10_0000_00)
  213. when doInc: inc(pos, 3)
  214. elif i <=% 0x001FFFFF:
  215. s.setLen(pos+4)
  216. s[pos+0] = chr(i shr 18 or 0b1111_0000)
  217. s[pos+1] = chr(i shr 12 and ones(6) or 0b10_0000_00)
  218. s[pos+2] = chr(i shr 6 and ones(6) or 0b10_0000_00)
  219. s[pos+3] = chr(i and ones(6) or 0b10_0000_00)
  220. when doInc: inc(pos, 4)
  221. elif i <=% 0x03FFFFFF:
  222. s.setLen(pos+5)
  223. s[pos+0] = chr(i shr 24 or 0b111110_00)
  224. s[pos+1] = chr(i shr 18 and ones(6) or 0b10_0000_00)
  225. s[pos+2] = chr(i shr 12 and ones(6) or 0b10_0000_00)
  226. s[pos+3] = chr(i shr 6 and ones(6) or 0b10_0000_00)
  227. s[pos+4] = chr(i and ones(6) or 0b10_0000_00)
  228. when doInc: inc(pos, 5)
  229. elif i <=% 0x7FFFFFFF:
  230. s.setLen(pos+6)
  231. s[pos+0] = chr(i shr 30 or 0b1111110_0)
  232. s[pos+1] = chr(i shr 24 and ones(6) or 0b10_0000_00)
  233. s[pos+2] = chr(i shr 18 and ones(6) or 0b10_0000_00)
  234. s[pos+3] = chr(i shr 12 and ones(6) or 0b10_0000_00)
  235. s[pos+4] = chr(i shr 6 and ones(6) or 0b10_0000_00)
  236. s[pos+5] = chr(i and ones(6) or 0b10_0000_00)
  237. when doInc: inc(pos, 6)
  238. else:
  239. discard # error, exception?
  240. proc toUTF8*(c: Rune): string {.rtl, extern: "nuc$1".} =
  241. ## Converts a rune into its UTF-8 representation.
  242. ##
  243. ## See also:
  244. ## * `validateUtf8 proc <#validateUtf8,string>`_
  245. ## * `$ proc <#$,Rune>`_ alias for `toUTF8`
  246. ## * `utf8 iterator <#utf8.i,string>`_
  247. ## * `fastToUTF8Copy template <#fastToUTF8Copy.t,Rune,string,int>`_
  248. runnableExamples:
  249. let a = "añyóng"
  250. doAssert a.runeAt(1).toUTF8 == "ñ"
  251. result = ""
  252. fastToUTF8Copy(c, result, 0, false)
  253. proc add*(s: var string; c: Rune) =
  254. ## Adds a rune ``c`` to a string ``s``.
  255. runnableExamples:
  256. var s = "abc"
  257. let c = "ä".runeAt(0)
  258. s.add(c)
  259. doAssert s == "abcä"
  260. let pos = s.len
  261. fastToUTF8Copy(c, s, pos, false)
  262. proc `$`*(rune: Rune): string =
  263. ## An alias for `toUTF8 <#toUTF8,Rune>`_.
  264. ##
  265. ## See also:
  266. ## * `validateUtf8 proc <#validateUtf8,string>`_
  267. ## * `fastToUTF8Copy template <#fastToUTF8Copy.t,Rune,string,int>`_
  268. rune.toUTF8
  269. proc `$`*(runes: seq[Rune]): string =
  270. ## Converts a sequence of Runes to a string.
  271. ##
  272. ## See also:
  273. ## * `toRunes <#toRunes,string>`_ for a reverse operation
  274. runnableExamples:
  275. let
  276. someString = "öÑ"
  277. someRunes = toRunes(someString)
  278. doAssert $someRunes == someString
  279. result = ""
  280. for rune in runes:
  281. result.add rune
  282. proc runeOffset*(s: string, pos: Natural, start: Natural = 0): int =
  283. ## Returns the byte position of rune
  284. ## at position ``pos`` in ``s`` with an optional start byte position.
  285. ## Returns the special value -1 if it runs out of the string.
  286. ##
  287. ## **Beware:** This can lead to unoptimized code and slow execution!
  288. ## Most problems can be solved more efficiently by using an iterator
  289. ## or conversion to a seq of Rune.
  290. ##
  291. ## See also:
  292. ## * `runeReverseOffset proc <#runeReverseOffset,string,Positive>`_
  293. runnableExamples:
  294. let a = "añyóng"
  295. doAssert a.runeOffset(1) == 1
  296. doAssert a.runeOffset(3) == 4
  297. doAssert a.runeOffset(4) == 6
  298. var
  299. i = 0
  300. o = start
  301. while i < pos:
  302. o += runeLenAt(s, o)
  303. if o >= s.len:
  304. return -1
  305. inc i
  306. return o
  307. proc runeReverseOffset*(s: string, rev: Positive): (int, int) =
  308. ## Returns a tuple with the byte offset of the
  309. ## rune at position ``rev`` in ``s``, counting
  310. ## from the end (starting with 1) and the total
  311. ## number of runes in the string.
  312. ##
  313. ## Returns a negative value for offset if there are too few runes in
  314. ## the string to satisfy the request.
  315. ##
  316. ## **Beware:** This can lead to unoptimized code and slow execution!
  317. ## Most problems can be solved more efficiently by using an iterator
  318. ## or conversion to a seq of Rune.
  319. ##
  320. ## See also:
  321. ## * `runeOffset proc <#runeOffset,string,Natural,Natural>`_
  322. var
  323. a = rev.int
  324. o = 0
  325. x = 0
  326. let times = 2*rev.int-s.runeLen # transformed from rev.int - a < s.runeLen - rev.int
  327. while o < s.len:
  328. let r = runeLenAt(s, o)
  329. o += r
  330. if a > times:
  331. x += r
  332. dec a
  333. result = if a > 0: (-a, rev.int-a) else: (x, -a+rev.int)
  334. proc runeAtPos*(s: string, pos: int): Rune =
  335. ## Returns the rune at position ``pos``.
  336. ##
  337. ## **Beware:** This can lead to unoptimized code and slow execution!
  338. ## Most problems can be solved more efficiently by using an iterator
  339. ## or conversion to a seq of Rune.
  340. ##
  341. ## See also:
  342. ## * `runeAt proc <#runeAt,string,Natural>`_
  343. ## * `runeStrAtPos proc <#runeStrAtPos,string,Natural>`_
  344. ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_
  345. fastRuneAt(s, runeOffset(s, pos), result, false)
  346. proc runeStrAtPos*(s: string, pos: Natural): string =
  347. ## Returns the rune at position ``pos`` as UTF8 String.
  348. ##
  349. ## **Beware:** This can lead to unoptimized code and slow execution!
  350. ## Most problems can be solved more efficiently by using an iterator
  351. ## or conversion to a seq of Rune.
  352. ##
  353. ## See also:
  354. ## * `runeAt proc <#runeAt,string,Natural>`_
  355. ## * `runeAtPos proc <#runeAtPos,string,int>`_
  356. ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_
  357. let o = runeOffset(s, pos)
  358. s[o .. (o+runeLenAt(s, o)-1)]
  359. proc runeSubStr*(s: string, pos: int, len: int = int.high): string =
  360. ## Returns the UTF-8 substring starting at code point ``pos``
  361. ## with ``len`` code points.
  362. ##
  363. ## If ``pos`` or ``len`` is negative they count from
  364. ## the end of the string. If ``len`` is not given it means the longest
  365. ## possible string.
  366. runnableExamples:
  367. let s = "Hänsel ««: 10,00€"
  368. doAssert(runeSubStr(s, 0, 2) == "Hä")
  369. doAssert(runeSubStr(s, 10, 1) == ":")
  370. doAssert(runeSubStr(s, -6) == "10,00€")
  371. doAssert(runeSubStr(s, 10) == ": 10,00€")
  372. doAssert(runeSubStr(s, 12, 5) == "10,00")
  373. doAssert(runeSubStr(s, -6, 3) == "10,")
  374. if pos < 0:
  375. let (o, rl) = runeReverseOffset(s, -pos)
  376. if len >= rl:
  377. result = s.substr(o, s.len-1)
  378. elif len < 0:
  379. let e = rl + len
  380. if e < 0:
  381. result = ""
  382. else:
  383. result = s.substr(o, runeOffset(s, e-(rl+pos), o)-1)
  384. else:
  385. result = s.substr(o, runeOffset(s, len, o)-1)
  386. else:
  387. let o = runeOffset(s, pos)
  388. if o < 0:
  389. result = ""
  390. elif len == int.high:
  391. result = s.substr(o, s.len-1)
  392. elif len < 0:
  393. let (e, rl) = runeReverseOffset(s, -len)
  394. discard rl
  395. if e <= 0:
  396. result = ""
  397. else:
  398. result = s.substr(o, e-1)
  399. else:
  400. var e = runeOffset(s, len, o)
  401. if e < 0:
  402. e = s.len
  403. result = s.substr(o, e-1)
  404. proc `<=%`*(a, b: Rune): bool =
  405. ## Checks if code point of `a` is smaller or equal to code point of `b`.
  406. runnableExamples:
  407. let
  408. a = "ú".runeAt(0)
  409. b = "ü".runeAt(0)
  410. doAssert a <=% b
  411. return int(a) <=% int(b)
  412. proc `<%`*(a, b: Rune): bool =
  413. ## Checks if code point of `a` is smaller than code point of `b`.
  414. runnableExamples:
  415. let
  416. a = "ú".runeAt(0)
  417. b = "ü".runeAt(0)
  418. doAssert a <% b
  419. return int(a) <% int(b)
  420. proc `==`*(a, b: Rune): bool =
  421. ## Checks if two runes are equal.
  422. return int(a) == int(b)
  423. include "includes/unicode_ranges"
  424. proc binarySearch(c: RuneImpl, tab: openArray[int], len, stride: int): int =
  425. var n = len
  426. var t = 0
  427. while n > 1:
  428. var m = n div 2
  429. var p = t + m*stride
  430. if c >= tab[p]:
  431. t = p
  432. n = n-m
  433. else:
  434. n = m
  435. if n != 0 and c >= tab[t]:
  436. return t
  437. return -1
  438. proc toLower*(c: Rune): Rune {.rtl, extern: "nuc$1".} =
  439. ## Converts ``c`` into lower case. This works for any rune.
  440. ##
  441. ## If possible, prefer ``toLower`` over ``toUpper``.
  442. ##
  443. ## See also:
  444. ## * `toUpper proc <#toUpper,Rune>`_
  445. ## * `toTitle proc <#toTitle,Rune>`_
  446. ## * `isLower proc <#isLower,Rune>`_
  447. var c = RuneImpl(c)
  448. var p = binarySearch(c, toLowerRanges, len(toLowerRanges) div 3, 3)
  449. if p >= 0 and c >= toLowerRanges[p] and c <= toLowerRanges[p+1]:
  450. return Rune(c + toLowerRanges[p+2] - 500)
  451. p = binarySearch(c, toLowerSinglets, len(toLowerSinglets) div 2, 2)
  452. if p >= 0 and c == toLowerSinglets[p]:
  453. return Rune(c + toLowerSinglets[p+1] - 500)
  454. return Rune(c)
  455. proc toUpper*(c: Rune): Rune {.rtl, extern: "nuc$1".} =
  456. ## Converts ``c`` into upper case. This works for any rune.
  457. ##
  458. ## If possible, prefer ``toLower`` over ``toUpper``.
  459. ##
  460. ## See also:
  461. ## * `toLower proc <#toLower,Rune>`_
  462. ## * `toTitle proc <#toTitle,Rune>`_
  463. ## * `isUpper proc <#isUpper,Rune>`_
  464. var c = RuneImpl(c)
  465. var p = binarySearch(c, toUpperRanges, len(toUpperRanges) div 3, 3)
  466. if p >= 0 and c >= toUpperRanges[p] and c <= toUpperRanges[p+1]:
  467. return Rune(c + toUpperRanges[p+2] - 500)
  468. p = binarySearch(c, toUpperSinglets, len(toUpperSinglets) div 2, 2)
  469. if p >= 0 and c == toUpperSinglets[p]:
  470. return Rune(c + toUpperSinglets[p+1] - 500)
  471. return Rune(c)
  472. proc toTitle*(c: Rune): Rune {.rtl, extern: "nuc$1".} =
  473. ## Converts ``c`` to title case.
  474. ##
  475. ## See also:
  476. ## * `toLower proc <#toLower,Rune>`_
  477. ## * `toUpper proc <#toUpper,Rune>`_
  478. ## * `isTitle proc <#isTitle,Rune>`_
  479. var c = RuneImpl(c)
  480. var p = binarySearch(c, toTitleSinglets, len(toTitleSinglets) div 2, 2)
  481. if p >= 0 and c == toTitleSinglets[p]:
  482. return Rune(c + toTitleSinglets[p+1] - 500)
  483. return Rune(c)
  484. proc isLower*(c: Rune): bool {.rtl, extern: "nuc$1".} =
  485. ## Returns true if ``c`` is a lower case rune.
  486. ##
  487. ## If possible, prefer ``isLower`` over ``isUpper``.
  488. ##
  489. ## See also:
  490. ## * `toLower proc <#toLower,Rune>`_
  491. ## * `isUpper proc <#isUpper,Rune>`_
  492. ## * `isTitle proc <#isTitle,Rune>`_
  493. var c = RuneImpl(c)
  494. # Note: toUpperRanges is correct here!
  495. var p = binarySearch(c, toUpperRanges, len(toUpperRanges) div 3, 3)
  496. if p >= 0 and c >= toUpperRanges[p] and c <= toUpperRanges[p+1]:
  497. return true
  498. p = binarySearch(c, toUpperSinglets, len(toUpperSinglets) div 2, 2)
  499. if p >= 0 and c == toUpperSinglets[p]:
  500. return true
  501. proc isUpper*(c: Rune): bool {.rtl, extern: "nuc$1".} =
  502. ## Returns true if ``c`` is a upper case rune.
  503. ##
  504. ## If possible, prefer ``isLower`` over ``isUpper``.
  505. ##
  506. ## See also:
  507. ## * `toUpper proc <#toUpper,Rune>`_
  508. ## * `isLower proc <#isLower,Rune>`_
  509. ## * `isTitle proc <#isTitle,Rune>`_
  510. ## * `isAlpha proc <#isAlpha,Rune>`_
  511. ## * `isWhiteSpace proc <#isWhiteSpace,Rune>`_
  512. var c = RuneImpl(c)
  513. # Note: toLowerRanges is correct here!
  514. var p = binarySearch(c, toLowerRanges, len(toLowerRanges) div 3, 3)
  515. if p >= 0 and c >= toLowerRanges[p] and c <= toLowerRanges[p+1]:
  516. return true
  517. p = binarySearch(c, toLowerSinglets, len(toLowerSinglets) div 2, 2)
  518. if p >= 0 and c == toLowerSinglets[p]:
  519. return true
  520. proc isAlpha*(c: Rune): bool {.rtl, extern: "nuc$1".} =
  521. ## Returns true if ``c`` is an *alpha* rune (i.e., a letter).
  522. ##
  523. ## See also:
  524. ## * `isLower proc <#isLower,Rune>`_
  525. ## * `isTitle proc <#isTitle,Rune>`_
  526. ## * `isAlpha proc <#isAlpha,Rune>`_
  527. ## * `isWhiteSpace proc <#isWhiteSpace,Rune>`_
  528. ## * `isCombining proc <#isCombining,Rune>`_
  529. if isUpper(c) or isLower(c):
  530. return true
  531. var c = RuneImpl(c)
  532. var p = binarySearch(c, alphaRanges, len(alphaRanges) div 2, 2)
  533. if p >= 0 and c >= alphaRanges[p] and c <= alphaRanges[p+1]:
  534. return true
  535. p = binarySearch(c, alphaSinglets, len(alphaSinglets), 1)
  536. if p >= 0 and c == alphaSinglets[p]:
  537. return true
  538. proc isTitle*(c: Rune): bool {.rtl, extern: "nuc$1".} =
  539. ## Returns true if ``c`` is a Unicode titlecase code point.
  540. ##
  541. ## See also:
  542. ## * `toTitle proc <#toTitle,Rune>`_
  543. ## * `isLower proc <#isLower,Rune>`_
  544. ## * `isUpper proc <#isUpper,Rune>`_
  545. ## * `isAlpha proc <#isAlpha,Rune>`_
  546. ## * `isWhiteSpace proc <#isWhiteSpace,Rune>`_
  547. return isUpper(c) and isLower(c)
  548. proc isWhiteSpace*(c: Rune): bool {.rtl, extern: "nuc$1".} =
  549. ## Returns true if ``c`` is a Unicode whitespace code point.
  550. ##
  551. ## See also:
  552. ## * `isLower proc <#isLower,Rune>`_
  553. ## * `isUpper proc <#isUpper,Rune>`_
  554. ## * `isTitle proc <#isTitle,Rune>`_
  555. ## * `isAlpha proc <#isAlpha,Rune>`_
  556. var c = RuneImpl(c)
  557. var p = binarySearch(c, spaceRanges, len(spaceRanges) div 2, 2)
  558. if p >= 0 and c >= spaceRanges[p] and c <= spaceRanges[p+1]:
  559. return true
  560. proc isCombining*(c: Rune): bool {.rtl, extern: "nuc$1".} =
  561. ## Returns true if ``c`` is a Unicode combining code unit.
  562. ##
  563. ## See also:
  564. ## * `isLower proc <#isLower,Rune>`_
  565. ## * `isUpper proc <#isUpper,Rune>`_
  566. ## * `isTitle proc <#isTitle,Rune>`_
  567. ## * `isAlpha proc <#isAlpha,Rune>`_
  568. var c = RuneImpl(c)
  569. # Optimized to return false immediately for ASCII
  570. return c >= 0x0300 and (c <= 0x036f or
  571. (c >= 0x1ab0 and c <= 0x1aff) or
  572. (c >= 0x1dc0 and c <= 0x1dff) or
  573. (c >= 0x20d0 and c <= 0x20ff) or
  574. (c >= 0xfe20 and c <= 0xfe2f))
  575. template runeCheck(s, runeProc) =
  576. ## Common code for isAlpha and isSpace.
  577. result = if len(s) == 0: false else: true
  578. var
  579. i = 0
  580. rune: Rune
  581. while i < len(s) and result:
  582. fastRuneAt(s, i, rune, doInc = true)
  583. result = runeProc(rune) and result
  584. proc isAlpha*(s: string): bool {.noSideEffect,
  585. rtl, extern: "nuc$1Str".} =
  586. ## Returns true if ``s`` contains all alphabetic runes.
  587. runnableExamples:
  588. let a = "añyóng"
  589. doAssert a.isAlpha
  590. runeCheck(s, isAlpha)
  591. proc isSpace*(s: string): bool {.noSideEffect,
  592. rtl, extern: "nuc$1Str".} =
  593. ## Returns true if ``s`` contains all whitespace runes.
  594. runnableExamples:
  595. let a = "\t\l \v\r\f"
  596. doAssert a.isSpace
  597. runeCheck(s, isWhiteSpace)
  598. template convertRune(s, runeProc) =
  599. ## Convert runes in ``s`` using ``runeProc`` as the converter.
  600. result = newString(len(s))
  601. var
  602. i = 0
  603. resultIndex = 0
  604. rune: Rune
  605. while i < len(s):
  606. fastRuneAt(s, i, rune, doInc = true)
  607. rune = runeProc(rune)
  608. fastToUTF8Copy(rune, result, resultIndex, doInc = true)
  609. proc toUpper*(s: string): string {.noSideEffect,
  610. rtl, extern: "nuc$1Str".} =
  611. ## Converts ``s`` into upper-case runes.
  612. runnableExamples:
  613. doAssert toUpper("abγ") == "ABΓ"
  614. convertRune(s, toUpper)
  615. proc toLower*(s: string): string {.noSideEffect,
  616. rtl, extern: "nuc$1Str".} =
  617. ## Converts ``s`` into lower-case runes.
  618. runnableExamples:
  619. doAssert toLower("ABΓ") == "abγ"
  620. convertRune(s, toLower)
  621. proc swapCase*(s: string): string {.noSideEffect,
  622. rtl, extern: "nuc$1".} =
  623. ## Swaps the case of runes in ``s``.
  624. ##
  625. ## Returns a new string such that the cases of all runes
  626. ## are swapped if possible.
  627. runnableExamples:
  628. doAssert swapCase("Αlpha Βeta Γamma") == "αLPHA βETA γAMMA"
  629. var
  630. i = 0
  631. resultIndex = 0
  632. rune: Rune
  633. result = newString(len(s))
  634. while i < len(s):
  635. fastRuneAt(s, i, rune)
  636. if rune.isUpper():
  637. rune = rune.toLower()
  638. elif rune.isLower():
  639. rune = rune.toUpper()
  640. fastToUTF8Copy(rune, result, resultIndex, doInc = true)
  641. proc capitalize*(s: string): string {.noSideEffect,
  642. rtl, extern: "nuc$1".} =
  643. ## Converts the first character of ``s`` into an upper-case rune.
  644. runnableExamples:
  645. doAssert capitalize("βeta") == "Βeta"
  646. if len(s) == 0:
  647. return ""
  648. var
  649. rune: Rune
  650. i = 0
  651. fastRuneAt(s, i, rune, doInc = true)
  652. result = $toUpper(rune) & substr(s, i)
  653. when not defined(nimHasEffectsOf):
  654. {.pragma: effectsOf.}
  655. proc translate*(s: string, replacements: proc(key: string): string): string {.
  656. rtl, extern: "nuc$1", effectsOf: replacements.} =
  657. ## Translates words in a string using the ``replacements`` proc to substitute
  658. ## words inside ``s`` with their replacements.
  659. ##
  660. ## ``replacements`` is any proc that takes a word and returns
  661. ## a new word to fill it's place.
  662. runnableExamples:
  663. proc wordToNumber(s: string): string =
  664. case s
  665. of "one": "1"
  666. of "two": "2"
  667. else: s
  668. let a = "one two three four"
  669. doAssert a.translate(wordToNumber) == "1 2 three four"
  670. # Allocate memory for the new string based on the old one.
  671. # If the new string length is less than the old, no allocations
  672. # will be needed. If the new string length is greater than the
  673. # old, then maybe only one allocation is needed
  674. result = newStringOfCap(s.len)
  675. var
  676. index = 0
  677. lastIndex = 0
  678. wordStart = 0
  679. inWord = false
  680. rune: Rune
  681. while index < len(s):
  682. lastIndex = index
  683. fastRuneAt(s, index, rune)
  684. let whiteSpace = rune.isWhiteSpace()
  685. if whiteSpace and inWord:
  686. # If we've reached the end of a word
  687. let word = s[wordStart ..< lastIndex]
  688. result.add(replacements(word))
  689. result.add($rune)
  690. inWord = false
  691. elif not whiteSpace and not inWord:
  692. # If we've hit a non space character and
  693. # are not currently in a word, track
  694. # the starting index of the word
  695. inWord = true
  696. wordStart = lastIndex
  697. elif whiteSpace:
  698. result.add($rune)
  699. if wordStart < len(s) and inWord:
  700. # Get the trailing word at the end
  701. let word = s[wordStart .. ^1]
  702. result.add(replacements(word))
  703. proc title*(s: string): string {.noSideEffect,
  704. rtl, extern: "nuc$1".} =
  705. ## Converts ``s`` to a unicode title.
  706. ##
  707. ## Returns a new string such that the first character
  708. ## in each word inside ``s`` is capitalized.
  709. runnableExamples:
  710. doAssert title("αlpha βeta γamma") == "Αlpha Βeta Γamma"
  711. var
  712. i = 0
  713. resultIndex = 0
  714. rune: Rune
  715. result = newString(len(s))
  716. var firstRune = true
  717. while i < len(s):
  718. fastRuneAt(s, i, rune)
  719. if not rune.isWhiteSpace() and firstRune:
  720. rune = rune.toUpper()
  721. firstRune = false
  722. elif rune.isWhiteSpace():
  723. firstRune = true
  724. fastToUTF8Copy(rune, result, resultIndex, doInc = true)
  725. iterator runes*(s: string): Rune =
  726. ## Iterates over any rune of the string ``s`` returning runes.
  727. var
  728. i = 0
  729. result: Rune
  730. while i < len(s):
  731. fastRuneAt(s, i, result, true)
  732. yield result
  733. iterator utf8*(s: string): string =
  734. ## Iterates over any rune of the string ``s`` returning utf8 values.
  735. ##
  736. ## See also:
  737. ## * `validateUtf8 proc <#validateUtf8,string>`_
  738. ## * `toUTF8 proc <#toUTF8,Rune>`_
  739. ## * `$ proc <#$,Rune>`_ alias for `toUTF8`
  740. ## * `fastToUTF8Copy template <#fastToUTF8Copy.t,Rune,string,int>`_
  741. var o = 0
  742. while o < s.len:
  743. let n = runeLenAt(s, o)
  744. yield s[o .. (o+n-1)]
  745. o += n
  746. proc toRunes*(s: string): seq[Rune] =
  747. ## Obtains a sequence containing the Runes in ``s``.
  748. ##
  749. ## See also:
  750. ## * `$ proc <#$,Rune>`_ for a reverse operation
  751. runnableExamples:
  752. let a = toRunes("aáä")
  753. doAssert a == @["a".runeAt(0), "á".runeAt(0), "ä".runeAt(0)]
  754. result = newSeq[Rune]()
  755. for r in s.runes:
  756. result.add(r)
  757. proc cmpRunesIgnoreCase*(a, b: string): int {.rtl, extern: "nuc$1".} =
  758. ## Compares two UTF-8 strings and ignores the case. Returns:
  759. ##
  760. ## | 0 if a == b
  761. ## | < 0 if a < b
  762. ## | > 0 if a > b
  763. var i = 0
  764. var j = 0
  765. var ar, br: Rune
  766. while i < a.len and j < b.len:
  767. # slow path:
  768. fastRuneAt(a, i, ar)
  769. fastRuneAt(b, j, br)
  770. result = RuneImpl(toLower(ar)) - RuneImpl(toLower(br))
  771. if result != 0: return
  772. result = a.len - b.len
  773. proc reversed*(s: string): string =
  774. ## Returns the reverse of ``s``, interpreting it as runes.
  775. ##
  776. ## Unicode combining characters are correctly interpreted as well.
  777. runnableExamples:
  778. assert reversed("Reverse this!") == "!siht esreveR"
  779. assert reversed("先秦兩漢") == "漢兩秦先"
  780. assert reversed("as⃝df̅") == "f̅ds⃝a"
  781. assert reversed("a⃞b⃞c⃞") == "c⃞b⃞a⃞"
  782. var
  783. i = 0
  784. lastI = 0
  785. newPos = len(s) - 1
  786. blockPos = 0
  787. r: Rune
  788. template reverseUntil(pos) =
  789. var j = pos - 1
  790. while j > blockPos:
  791. result[newPos] = s[j]
  792. dec j
  793. dec newPos
  794. blockPos = pos - 1
  795. result = newString(len(s))
  796. while i < len(s):
  797. lastI = i
  798. fastRuneAt(s, i, r, true)
  799. if not isCombining(r):
  800. reverseUntil(lastI)
  801. reverseUntil(len(s))
  802. proc graphemeLen*(s: string; i: Natural): Natural =
  803. ## The number of bytes belonging to byte index ``s[i]``,
  804. ## including following combining code unit.
  805. runnableExamples:
  806. let a = "añyóng"
  807. doAssert a.graphemeLen(1) == 2 ## ñ
  808. doAssert a.graphemeLen(2) == 1
  809. doAssert a.graphemeLen(4) == 2 ## ó
  810. var j = i.int
  811. var r, r2: Rune
  812. if j < s.len:
  813. fastRuneAt(s, j, r, true)
  814. result = j-i
  815. while j < s.len:
  816. fastRuneAt(s, j, r2, true)
  817. if not isCombining(r2): break
  818. result = j-i
  819. proc lastRune*(s: string; last: int): (Rune, int) =
  820. ## Length of the last rune in ``s[0..last]``. Returns the rune and its length
  821. ## in bytes.
  822. if s[last] <= chr(127):
  823. result = (Rune(s[last]), 1)
  824. else:
  825. var L = 0
  826. while last-L >= 0 and uint(s[last-L]) shr 6 == 0b10: inc(L)
  827. var r: Rune
  828. fastRuneAt(s, last-L, r, false)
  829. result = (r, L+1)
  830. proc size*(r: Rune): int {.noSideEffect.} =
  831. ## Returns the number of bytes the rune ``r`` takes.
  832. runnableExamples:
  833. let a = toRunes "aá"
  834. doAssert size(a[0]) == 1
  835. doAssert size(a[1]) == 2
  836. let v = r.uint32
  837. if v <= 0x007F'u32: result = 1
  838. elif v <= 0x07FF'u32: result = 2
  839. elif v <= 0xFFFF'u32: result = 3
  840. elif v <= 0x1FFFFF'u32: result = 4
  841. elif v <= 0x3FFFFFF'u32: result = 5
  842. elif v <= 0x7FFFFFFF'u32: result = 6
  843. else: result = 1
  844. # --------- Private templates for different split separators -----------
  845. proc stringHasSep(s: string, index: int, seps: openArray[Rune]): bool =
  846. var rune: Rune
  847. fastRuneAt(s, index, rune, false)
  848. return seps.contains(rune)
  849. proc stringHasSep(s: string, index: int, sep: Rune): bool =
  850. var rune: Rune
  851. fastRuneAt(s, index, rune, false)
  852. return sep == rune
  853. template splitCommon(s, sep, maxsplit: untyped) =
  854. ## Common code for split procedures.
  855. let
  856. sLen = len(s)
  857. var
  858. last = 0
  859. splits = maxsplit
  860. if sLen > 0:
  861. while last <= sLen:
  862. var first = last
  863. while last < sLen and not stringHasSep(s, last, sep):
  864. inc(last, runeLenAt(s, last))
  865. if splits == 0: last = sLen
  866. yield s[first .. (last - 1)]
  867. if splits == 0: break
  868. dec(splits)
  869. inc(last, if last < sLen: runeLenAt(s, last) else: 1)
  870. iterator split*(s: string, seps: openArray[Rune] = unicodeSpaces,
  871. maxsplit: int = -1): string =
  872. ## Splits the unicode string ``s`` into substrings using a group of separators.
  873. ##
  874. ## Substrings are separated by a substring containing only ``seps``.
  875. runnableExamples:
  876. import std/sequtils
  877. assert toSeq("hÃllo\lthis\lis an\texample\l是".split) ==
  878. @["hÃllo", "this", "is", "an", "example", "是"]
  879. # And the following code splits the same string using a sequence of Runes.
  880. assert toSeq(split("añyóng:hÃllo;是$example", ";:$".toRunes)) ==
  881. @["añyóng", "hÃllo", "是", "example"]
  882. # example with a `Rune` separator and unused one `;`:
  883. assert toSeq(split("ab是de:f:", ";:是".toRunes)) == @["ab", "de", "f", ""]
  884. # Another example that splits a string containing a date.
  885. let date = "2012-11-20T22:08:08.398990"
  886. assert toSeq(split(date, " -:T".toRunes)) ==
  887. @["2012", "11", "20", "22", "08", "08.398990"]
  888. splitCommon(s, seps, maxsplit)
  889. iterator splitWhitespace*(s: string): string =
  890. ## Splits a unicode string at whitespace runes.
  891. splitCommon(s, unicodeSpaces, -1)
  892. template accResult(iter: untyped) =
  893. result = @[]
  894. for x in iter: add(result, x)
  895. proc splitWhitespace*(s: string): seq[string] {.noSideEffect,
  896. rtl, extern: "ncuSplitWhitespace".} =
  897. ## The same as the `splitWhitespace <#splitWhitespace.i,string>`_
  898. ## iterator, but is a proc that returns a sequence of substrings.
  899. accResult(splitWhitespace(s))
  900. iterator split*(s: string, sep: Rune, maxsplit: int = -1): string =
  901. ## Splits the unicode string ``s`` into substrings using a single separator.
  902. ## Substrings are separated by the rune ``sep``.
  903. runnableExamples:
  904. import std/sequtils
  905. assert toSeq(split(";;hÃllo;this;is;an;;example;;;是", ";".runeAt(0))) ==
  906. @["", "", "hÃllo", "this", "is", "an", "", "example", "", "", "是"]
  907. splitCommon(s, sep, maxsplit)
  908. proc split*(s: string, seps: openArray[Rune] = unicodeSpaces, maxsplit: int = -1):
  909. seq[string] {.noSideEffect, rtl, extern: "nucSplitRunes".} =
  910. ## The same as the `split iterator <#split.i,string,openArray[Rune],int>`_,
  911. ## but is a proc that returns a sequence of substrings.
  912. accResult(split(s, seps, maxsplit))
  913. proc split*(s: string, sep: Rune, maxsplit: int = -1): seq[string] {.noSideEffect,
  914. rtl, extern: "nucSplitRune".} =
  915. ## The same as the `split iterator <#split.i,string,Rune,int>`_, but is a proc
  916. ## that returns a sequence of substrings.
  917. accResult(split(s, sep, maxsplit))
  918. proc strip*(s: string, leading = true, trailing = true,
  919. runes: openArray[Rune] = unicodeSpaces): string {.noSideEffect,
  920. rtl, extern: "nucStrip".} =
  921. ## Strips leading or trailing ``runes`` from ``s`` and returns
  922. ## the resulting string.
  923. ##
  924. ## If ``leading`` is true (default), leading ``runes`` are stripped.
  925. ## If ``trailing`` is true (default), trailing ``runes`` are stripped.
  926. ## If both are false, the string is returned unchanged.
  927. runnableExamples:
  928. let a = "\táñyóng "
  929. doAssert a.strip == "áñyóng"
  930. doAssert a.strip(leading = false) == "\táñyóng"
  931. doAssert a.strip(trailing = false) == "áñyóng "
  932. var
  933. sI = 0 ## starting index into string ``s``
  934. eI = len(s) - 1 ## ending index into ``s``, where the last ``Rune`` starts
  935. if leading:
  936. var
  937. i = 0
  938. xI: int ## value of ``sI`` at the beginning of the iteration
  939. rune: Rune
  940. while i < len(s):
  941. xI = i
  942. fastRuneAt(s, i, rune)
  943. sI = i # Assume to start from next rune
  944. if not runes.contains(rune):
  945. sI = xI # Go back to where the current rune starts
  946. break
  947. if trailing:
  948. var
  949. i = eI
  950. xI: int
  951. rune: Rune
  952. while i >= 0:
  953. xI = i
  954. fastRuneAt(s, xI, rune)
  955. var yI = i - 1
  956. while yI >= 0:
  957. var
  958. yIend = yI
  959. pRune: Rune
  960. fastRuneAt(s, yIend, pRune)
  961. if yIend < xI: break
  962. i = yI
  963. rune = pRune
  964. dec(yI)
  965. if not runes.contains(rune):
  966. eI = xI - 1
  967. break
  968. dec(i)
  969. let newLen = eI - sI + 1
  970. result = newStringOfCap(newLen)
  971. if newLen > 0:
  972. result.add s[sI .. eI]
  973. proc repeat*(c: Rune, count: Natural): string {.noSideEffect,
  974. rtl, extern: "nucRepeatRune".} =
  975. ## Returns a string of ``count`` Runes ``c``.
  976. ##
  977. ## The returned string will have a rune-length of ``count``.
  978. runnableExamples:
  979. let a = "ñ".runeAt(0)
  980. doAssert a.repeat(5) == "ñññññ"
  981. let s = $c
  982. result = newStringOfCap(count * s.len)
  983. for i in 0 ..< count:
  984. result.add s
  985. proc align*(s: string, count: Natural, padding = ' '.Rune): string {.
  986. noSideEffect, rtl, extern: "nucAlignString".} =
  987. ## Aligns a unicode string ``s`` with ``padding``, so that it has a rune-length
  988. ## of ``count``.
  989. ##
  990. ## ``padding`` characters (by default spaces) are added before ``s`` resulting in
  991. ## right alignment. If ``s.runelen >= count``, no spaces are added and ``s`` is
  992. ## returned unchanged. If you need to left align a string use the `alignLeft
  993. ## proc <#alignLeft,string,Natural>`_.
  994. runnableExamples:
  995. assert align("abc", 4) == " abc"
  996. assert align("a", 0) == "a"
  997. assert align("1232", 6) == " 1232"
  998. assert align("1232", 6, '#'.Rune) == "##1232"
  999. assert align("Åge", 5) == " Åge"
  1000. assert align("×", 4, '_'.Rune) == "___×"
  1001. let sLen = s.runeLen
  1002. if sLen < count:
  1003. let padStr = $padding
  1004. result = newStringOfCap(padStr.len * count)
  1005. let spaces = count - sLen
  1006. for i in 0 ..< spaces: result.add padStr
  1007. result.add s
  1008. else:
  1009. result = s
  1010. proc alignLeft*(s: string, count: Natural, padding = ' '.Rune): string {.
  1011. noSideEffect.} =
  1012. ## Left-aligns a unicode string ``s`` with ``padding``, so that it has a
  1013. ## rune-length of ``count``.
  1014. ##
  1015. ## ``padding`` characters (by default spaces) are added after ``s`` resulting in
  1016. ## left alignment. If ``s.runelen >= count``, no spaces are added and ``s`` is
  1017. ## returned unchanged. If you need to right align a string use the `align
  1018. ## proc <#align,string,Natural>`_.
  1019. runnableExamples:
  1020. assert alignLeft("abc", 4) == "abc "
  1021. assert alignLeft("a", 0) == "a"
  1022. assert alignLeft("1232", 6) == "1232 "
  1023. assert alignLeft("1232", 6, '#'.Rune) == "1232##"
  1024. assert alignLeft("Åge", 5) == "Åge "
  1025. assert alignLeft("×", 4, '_'.Rune) == "×___"
  1026. let sLen = s.runeLen
  1027. if sLen < count:
  1028. let padStr = $padding
  1029. result = newStringOfCap(s.len + (count - sLen) * padStr.len)
  1030. result.add s
  1031. for i in sLen ..< count:
  1032. result.add padStr
  1033. else:
  1034. result = s