strutils.nim 111 KB


  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## The system module defines several common functions for working with strings,
  10. ## such as:
  11. ## * ``$`` for converting other data-types to strings
  12. ## * ``&`` for string concatenation
  13. ## * ``add`` for adding a new character or a string to the existing one
  14. ## * ``in`` (alias for ``contains``) and ``notin`` for checking if a character
  15. ## is in a string
  16. ##
  17. ## This module builds upon that, providing additional functionality in form of
  18. ## procedures, iterators and templates for strings.
  19. ##
  20. ## .. code-block::
  21. ## import strutils
  22. ##
  23. ## let
  24. ## numbers = @[867, 5309]
  25. ## multiLineString = "first line\nsecond line\nthird line"
  26. ##
  27. ## let jenny = numbers.join("-")
  28. ## assert jenny == "867-5309"
  29. ##
  30. ## assert splitLines(multiLineString) ==
  31. ## @["first line", "second line", "third line"]
  32. ## assert split(multiLineString) == @["first", "line", "second",
  33. ## "line", "third", "line"]
  34. ## assert indent(multiLineString, 4) ==
  35. ## " first line\n second line\n third line"
  36. ## assert 'z'.repeat(5) == "zzzzz"
  37. ##
  38. ## The chaining of functions is possible thanks to the
  39. ## `method call syntax<manual.html#procedures-method-call-syntax>`_:
  40. ##
  41. ## .. code-block::
  42. ## import strutils
  43. ## from sequtils import map
  44. ##
  45. ## let jenny = "867-5309"
  46. ## assert jenny.split('-').map(parseInt) == @[867, 5309]
  47. ##
  48. ## assert "Beetlejuice".indent(1).repeat(3).strip ==
  49. ## "Beetlejuice Beetlejuice Beetlejuice"
  50. ##
  51. ## This module is available for the `JavaScript target
  52. ## <backends.html#backends-the-javascript-target>`_.
  53. ##
  54. ## ----
  55. ##
  56. ## **See also:**
  57. ## * `strformat module<strformat.html>`_ for string interpolation and formatting
  58. ## * `unicode module<unicode.html>`_ for Unicode UTF-8 handling
  59. ## * `sequtils module<sequtils.html>`_ for operations on container
  60. ## types (including strings)
  61. ## * `parseutils module<parseutils.html>`_ for lower-level parsing of tokens,
  62. ## numbers, identifiers, etc.
  63. ## * `parseopt module<parseopt.html>`_ for command-line parsing
  64. ## * `strtabs module<strtabs.html>`_ for efficient hash tables
  65. ## (dictionaries, in some programming languages) mapping from strings to strings
  66. ## * `pegs module<pegs.html>`_ for PEG (Parsing Expression Grammar) support
  67. ## * `ropes module<ropes.html>`_ for rope data type, which can represent very
  68. ## long strings efficiently
  69. ## * `re module<re.html>`_ for regular expression (regex) support
  70. ## * `strscans<strscans.html>`_ for ``scanf`` and ``scanp`` macros, which offer
  71. ## easier substring extraction than regular expressions
  72. import parseutils
  73. from math import pow, floor, log10
  74. from algorithm import reverse
  75. when defined(nimVmExportFixed):
  76. from unicode import toLower, toUpper
  77. export toLower, toUpper
  78. {.deadCodeElim: on.} # dce option deprecated
  79. {.push debugger: off.} # the user does not want to trace a part
  80. # of the standard library!
  81. include "system/inclrtl"
  82. {.pop.}
  83. # Support old split with set[char]
  84. when defined(nimOldSplit):
  85. {.pragma: deprecatedSplit, deprecated.}
  86. else:
  87. {.pragma: deprecatedSplit.}
  88. const
  89. Whitespace* = {' ', '\t', '\v', '\r', '\l', '\f'}
  90. ## All the characters that count as whitespace (space, tab, vertical tab,
  91. ## carriage return, new line, form feed)
  92. Letters* = {'A'..'Z', 'a'..'z'}
  93. ## the set of letters
  94. Digits* = {'0'..'9'}
  95. ## the set of digits
  96. HexDigits* = {'0'..'9', 'A'..'F', 'a'..'f'}
  97. ## the set of hexadecimal digits
  98. IdentChars* = {'a'..'z', 'A'..'Z', '0'..'9', '_'}
  99. ## the set of characters an identifier can consist of
  100. IdentStartChars* = {'a'..'z', 'A'..'Z', '_'}
  101. ## the set of characters an identifier can start with
  102. Newlines* = {'\13', '\10'}
  103. ## the set of characters a newline terminator can start with (carriage
  104. ## return, line feed)
  105. AllChars* = {'\x00'..'\xFF'}
  106. ## A set with all the possible characters.
  107. ##
  108. ## Not very useful by its own, you can use it to create *inverted* sets to
  109. ## make the `find proc<#find,string,set[char],Natural,int>`_
  110. ## find **invalid** characters in strings. Example:
  111. ##
  112. ## .. code-block:: nim
  113. ## let invalid = AllChars - Digits
  114. ## doAssert "01234".find(invalid) == -1
  115. ## doAssert "01A34".find(invalid) == 2
  116. proc isAlphaAscii*(c: char): bool {.noSideEffect, procvar,
  117. rtl, extern: "nsuIsAlphaAsciiChar".} =
  118. ## Checks whether or not character `c` is alphabetical.
  119. ##
  120. ## This checks a-z, A-Z ASCII characters only.
  121. ## Use `Unicode module<unicode.html>`_ for UTF-8 support.
  122. runnableExamples:
  123. doAssert isAlphaAscii('e') == true
  124. doAssert isAlphaAscii('E') == true
  125. doAssert isAlphaAscii('8') == false
  126. return c in Letters
  127. proc isAlphaNumeric*(c: char): bool {.noSideEffect, procvar,
  128. rtl, extern: "nsuIsAlphaNumericChar".} =
  129. ## Checks whether or not `c` is alphanumeric.
  130. ##
  131. ## This checks a-z, A-Z, 0-9 ASCII characters only.
  132. runnableExamples:
  133. doAssert isAlphaNumeric('n') == true
  134. doAssert isAlphaNumeric('8') == true
  135. doAssert isAlphaNumeric(' ') == false
  136. return c in Letters+Digits
  137. proc isDigit*(c: char): bool {.noSideEffect, procvar,
  138. rtl, extern: "nsuIsDigitChar".} =
  139. ## Checks whether or not `c` is a number.
  140. ##
  141. ## This checks 0-9 ASCII characters only.
  142. runnableExamples:
  143. doAssert isDigit('n') == false
  144. doAssert isDigit('8') == true
  145. return c in Digits
  146. proc isSpaceAscii*(c: char): bool {.noSideEffect, procvar,
  147. rtl, extern: "nsuIsSpaceAsciiChar".} =
  148. ## Checks whether or not `c` is a whitespace character.
  149. runnableExamples:
  150. doAssert isSpaceAscii('n') == false
  151. doAssert isSpaceAscii(' ') == true
  152. doAssert isSpaceAscii('\t') == true
  153. return c in Whitespace
  154. proc isLowerAscii*(c: char): bool {.noSideEffect, procvar,
  155. rtl, extern: "nsuIsLowerAsciiChar".} =
  156. ## Checks whether or not `c` is a lower case character.
  157. ##
  158. ## This checks ASCII characters only.
  159. ## Use `Unicode module<unicode.html>`_ for UTF-8 support.
  160. ##
  161. ## See also:
  162. ## * `toLowerAscii proc<#toLowerAscii,char>`_
  163. runnableExamples:
  164. doAssert isLowerAscii('e') == true
  165. doAssert isLowerAscii('E') == false
  166. doAssert isLowerAscii('7') == false
  167. return c in {'a'..'z'}
  168. proc isUpperAscii*(c: char): bool {.noSideEffect, procvar,
  169. rtl, extern: "nsuIsUpperAsciiChar".} =
  170. ## Checks whether or not `c` is an upper case character.
  171. ##
  172. ## This checks ASCII characters only.
  173. ## Use `Unicode module<unicode.html>`_ for UTF-8 support.
  174. ##
  175. ## See also:
  176. ## * `toUpperAscii proc<#toUpperAscii,char>`_
  177. runnableExamples:
  178. doAssert isUpperAscii('e') == false
  179. doAssert isUpperAscii('E') == true
  180. doAssert isUpperAscii('7') == false
  181. return c in {'A'..'Z'}
  182. proc toLowerAscii*(c: char): char {.noSideEffect, procvar,
  183. rtl, extern: "nsuToLowerAsciiChar".} =
  184. ## Returns the lower case version of character ``c``.
  185. ##
  186. ## This works only for the letters ``A-Z``. See `unicode.toLower
  187. ## <unicode.html#toLower,Rune>`_ for a version that works for any Unicode
  188. ## character.
  189. ##
  190. ## See also:
  191. ## * `isLowerAscii proc<#isLowerAscii,char>`_
  192. ## * `toLowerAscii proc<#toLowerAscii,string>`_ for converting a string
  193. runnableExamples:
  194. doAssert toLowerAscii('A') == 'a'
  195. doAssert toLowerAscii('e') == 'e'
  196. if c in {'A'..'Z'}:
  197. result = chr(ord(c) + (ord('a') - ord('A')))
  198. else:
  199. result = c
  200. template toImpl(call) =
  201. result = newString(len(s))
  202. for i in 0..len(s) - 1:
  203. result[i] = call(s[i])
  204. proc toLowerAscii*(s: string): string {.noSideEffect, procvar,
  205. rtl, extern: "nsuToLowerAsciiStr".} =
  206. ## Converts string `s` into lower case.
  207. ##
  208. ## This works only for the letters ``A-Z``. See `unicode.toLower
  209. ## <unicode.html#toLower,string>`_ for a version that works for any Unicode
  210. ## character.
  211. ##
  212. ## See also:
  213. ## * `normalize proc<#normalize,string>`_
  214. runnableExamples:
  215. doAssert toLowerAscii("FooBar!") == "foobar!"
  216. toImpl toLowerAscii
  217. proc toUpperAscii*(c: char): char {.noSideEffect, procvar,
  218. rtl, extern: "nsuToUpperAsciiChar".} =
  219. ## Converts character `c` into upper case.
  220. ##
  221. ## This works only for the letters ``A-Z``. See `unicode.toUpper
  222. ## <unicode.html#toUpper,Rune>`_ for a version that works for any Unicode
  223. ## character.
  224. ##
  225. ## See also:
  226. ## * `isLowerAscii proc<#isLowerAscii,char>`_
  227. ## * `toUpperAscii proc<#toUpperAscii,string>`_ for converting a string
  228. ## * `capitalizeAscii proc<#capitalizeAscii,string>`_
  229. runnableExamples:
  230. doAssert toUpperAscii('a') == 'A'
  231. doAssert toUpperAscii('E') == 'E'
  232. if c in {'a'..'z'}:
  233. result = chr(ord(c) - (ord('a') - ord('A')))
  234. else:
  235. result = c
  236. proc toUpperAscii*(s: string): string {.noSideEffect, procvar,
  237. rtl, extern: "nsuToUpperAsciiStr".} =
  238. ## Converts string `s` into upper case.
  239. ##
  240. ## This works only for the letters ``A-Z``. See `unicode.toUpper
  241. ## <unicode.html#toUpper,string>`_ for a version that works for any Unicode
  242. ## character.
  243. ##
  244. ## See also:
  245. ## * `capitalizeAscii proc<#capitalizeAscii,string>`_
  246. runnableExamples:
  247. doAssert toUpperAscii("FooBar!") == "FOOBAR!"
  248. toImpl toUpperAscii
  249. proc capitalizeAscii*(s: string): string {.noSideEffect, procvar,
  250. rtl, extern: "nsuCapitalizeAscii".} =
  251. ## Converts the first character of string `s` into upper case.
  252. ##
  253. ## This works only for the letters ``A-Z``.
  254. ## Use `Unicode module<unicode.html>`_ for UTF-8 support.
  255. ##
  256. ## See also:
  257. ## * `toUpperAscii proc<#toUpperAscii,char>`_
  258. runnableExamples:
  259. doAssert capitalizeAscii("foo") == "Foo"
  260. doAssert capitalizeAscii("-bar") == "-bar"
  261. if s.len == 0: result = ""
  262. else: result = toUpperAscii(s[0]) & substr(s, 1)
  263. proc normalize*(s: string): string {.noSideEffect, procvar,
  264. rtl, extern: "nsuNormalize".} =
  265. ## Normalizes the string `s`.
  266. ##
  267. ## That means to convert it to lower case and remove any '_'. This
  268. ## should NOT be used to normalize Nim identifier names.
  269. ##
  270. ## See also:
  271. ## * `toLowerAscii proc<#toLowerAscii,string>`_
  272. runnableExamples:
  273. doAssert normalize("Foo_bar") == "foobar"
  274. doAssert normalize("Foo Bar") == "foo bar"
  275. result = newString(s.len)
  276. var j = 0
  277. for i in 0..len(s) - 1:
  278. if s[i] in {'A'..'Z'}:
  279. result[j] = chr(ord(s[i]) + (ord('a') - ord('A')))
  280. inc j
  281. elif s[i] != '_':
  282. result[j] = s[i]
  283. inc j
  284. if j != s.len: setLen(result, j)
  285. proc cmpIgnoreCase*(a, b: string): int {.noSideEffect,
  286. rtl, extern: "nsuCmpIgnoreCase", procvar.} =
  287. ## Compares two strings in a case insensitive manner. Returns:
  288. ##
  289. ## | 0 if a == b
  290. ## | < 0 if a < b
  291. ## | > 0 if a > b
  292. runnableExamples:
  293. doAssert cmpIgnoreCase("FooBar", "foobar") == 0
  294. doAssert cmpIgnoreCase("bar", "Foo") < 0
  295. doAssert cmpIgnoreCase("Foo5", "foo4") > 0
  296. var i = 0
  297. var m = min(a.len, b.len)
  298. while i < m:
  299. result = ord(toLowerAscii(a[i])) - ord(toLowerAscii(b[i]))
  300. if result != 0: return
  301. inc(i)
  302. result = a.len - b.len
  303. {.push checks: off, line_trace: off.} # this is a hot-spot in the compiler!
  304. # thus we compile without checks here
  305. proc cmpIgnoreStyle*(a, b: string): int {.noSideEffect,
  306. rtl, extern: "nsuCmpIgnoreStyle", procvar.} =
  307. ## Semantically the same as ``cmp(normalize(a), normalize(b))``. It
  308. ## is just optimized to not allocate temporary strings. This should
  309. ## NOT be used to compare Nim identifier names.
  310. ## Use `macros.eqIdent<macros.html#eqIdent,string,string>`_ for that.
  311. ##
  312. ## Returns:
  313. ##
  314. ## | 0 if a == b
  315. ## | < 0 if a < b
  316. ## | > 0 if a > b
  317. runnableExamples:
  318. doAssert cmpIgnoreStyle("foo_bar", "FooBar") == 0
  319. doAssert cmpIgnoreStyle("foo_bar_5", "FooBar4") > 0
  320. var i = 0
  321. var j = 0
  322. while true:
  323. while i < a.len and a[i] == '_': inc i
  324. while j < b.len and b[j] == '_': inc j
  325. var aa = if i < a.len: toLowerAscii(a[i]) else: '\0'
  326. var bb = if j < b.len: toLowerAscii(b[j]) else: '\0'
  327. result = ord(aa) - ord(bb)
  328. if result != 0: return result
  329. # the characters are identical:
  330. if i >= a.len:
  331. # both cursors at the end:
  332. if j >= b.len: return 0
  333. # not yet at the end of 'b':
  334. return -1
  335. elif j >= b.len:
  336. return 1
  337. inc i
  338. inc j
  339. {.pop.}
  340. # --------- Private templates for different split separators -----------
  341. proc substrEq(s: string, pos: int, substr: string): bool =
  342. var i = 0
  343. var length = substr.len
  344. while i < length and pos+i < s.len and s[pos+i] == substr[i]:
  345. inc i
  346. return i == length
  347. template stringHasSep(s: string, index: int, seps: set[char]): bool =
  348. s[index] in seps
  349. template stringHasSep(s: string, index: int, sep: char): bool =
  350. s[index] == sep
  351. template stringHasSep(s: string, index: int, sep: string): bool =
  352. s.substrEq(index, sep)
  353. template splitCommon(s, sep, maxsplit, sepLen) =
  354. ## Common code for split procs
  355. var last = 0
  356. var splits = maxsplit
  357. while last <= len(s):
  358. var first = last
  359. while last < len(s) and not stringHasSep(s, last, sep):
  360. inc(last)
  361. if splits == 0: last = len(s)
  362. yield substr(s, first, last-1)
  363. if splits == 0: break
  364. dec(splits)
  365. inc(last, sepLen)
  366. template oldSplit(s, seps, maxsplit) =
  367. var last = 0
  368. var splits = maxsplit
  369. assert(not ('\0' in seps))
  370. while last < len(s):
  371. while last < len(s) and s[last] in seps: inc(last)
  372. var first = last
  373. while last < len(s) and s[last] notin seps: inc(last)
  374. if first <= last-1:
  375. if splits == 0: last = len(s)
  376. yield substr(s, first, last-1)
  377. if splits == 0: break
  378. dec(splits)
  379. template accResult(iter: untyped) =
  380. result = @[]
  381. for x in iter: add(result, x)
  382. iterator split*(s: string, sep: char, maxsplit: int = -1): string =
  383. ## Splits the string `s` into substrings using a single separator.
  384. ##
  385. ## Substrings are separated by the character `sep`.
  386. ## The code:
  387. ##
  388. ## .. code-block:: nim
  389. ## for word in split(";;this;is;an;;example;;;", ';'):
  390. ## writeLine(stdout, word)
  391. ##
  392. ## Results in:
  393. ##
  394. ## .. code-block::
  395. ## ""
  396. ## ""
  397. ## "this"
  398. ## "is"
  399. ## "an"
  400. ## ""
  401. ## "example"
  402. ## ""
  403. ## ""
  404. ## ""
  405. ##
  406. ## See also:
  407. ## * `rsplit iterator<#rsplit.i,string,char,int>`_
  408. ## * `splitLines iterator<#splitLines.i,string>`_
  409. ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_
  410. ## * `split proc<#split,string,char,int>`_
  411. splitCommon(s, sep, maxsplit, 1)
  412. iterator split*(s: string, seps: set[char] = Whitespace,
  413. maxsplit: int = -1): string =
  414. ## Splits the string `s` into substrings using a group of separators.
  415. ##
  416. ## Substrings are separated by a substring containing only `seps`.
  417. ##
  418. ## .. code-block:: nim
  419. ## for word in split("this\lis an\texample"):
  420. ## writeLine(stdout, word)
  421. ##
  422. ## ...generates this output:
  423. ##
  424. ## .. code-block::
  425. ## "this"
  426. ## "is"
  427. ## "an"
  428. ## "example"
  429. ##
  430. ## And the following code:
  431. ##
  432. ## .. code-block:: nim
  433. ## for word in split("this:is;an$example", {';', ':', '$'}):
  434. ## writeLine(stdout, word)
  435. ##
  436. ## ...produces the same output as the first example. The code:
  437. ##
  438. ## .. code-block:: nim
  439. ## let date = "2012-11-20T22:08:08.398990"
  440. ## let separators = {' ', '-', ':', 'T'}
  441. ## for number in split(date, separators):
  442. ## writeLine(stdout, number)
  443. ##
  444. ## ...results in:
  445. ##
  446. ## .. code-block::
  447. ## "2012"
  448. ## "11"
  449. ## "20"
  450. ## "22"
  451. ## "08"
  452. ## "08.398990"
  453. ##
  454. ## See also:
  455. ## * `rsplit iterator<#rsplit.i,string,set[char],int>`_
  456. ## * `splitLines iterator<#splitLines.i,string>`_
  457. ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_
  458. ## * `split proc<#split,string,set[char],int>`_
  459. splitCommon(s, seps, maxsplit, 1)
  460. iterator split*(s: string, sep: string, maxsplit: int = -1): string =
  461. ## Splits the string `s` into substrings using a string separator.
  462. ##
  463. ## Substrings are separated by the string `sep`.
  464. ## The code:
  465. ##
  466. ## .. code-block:: nim
  467. ## for word in split("thisDATAisDATAcorrupted", "DATA"):
  468. ## writeLine(stdout, word)
  469. ##
  470. ## Results in:
  471. ##
  472. ## .. code-block::
  473. ## "this"
  474. ## "is"
  475. ## "corrupted"
  476. ##
  477. ## See also:
  478. ## * `rsplit iterator<#rsplit.i,string,string,int,bool>`_
  479. ## * `splitLines iterator<#splitLines.i,string>`_
  480. ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_
  481. ## * `split proc<#split,string,string,int>`_
  482. splitCommon(s, sep, maxsplit, sep.len)
  483. template rsplitCommon(s, sep, maxsplit, sepLen) =
  484. ## Common code for rsplit functions
  485. var
  486. last = s.len - 1
  487. first = last
  488. splits = maxsplit
  489. startPos = 0
  490. # go to -1 in order to get separators at the beginning
  491. while first >= -1:
  492. while first >= 0 and not stringHasSep(s, first, sep):
  493. dec(first)
  494. if splits == 0:
  495. # No more splits means set first to the beginning
  496. first = -1
  497. if first == -1:
  498. startPos = 0
  499. else:
  500. startPos = first + sepLen
  501. yield substr(s, startPos, last)
  502. if splits == 0: break
  503. dec(splits)
  504. dec(first)
  505. last = first
  506. iterator rsplit*(s: string, sep: char,
  507. maxsplit: int = -1): string =
  508. ## Splits the string `s` into substrings from the right using a
  509. ## string separator. Works exactly the same as `split iterator
  510. ## <#split.i,string,char,int>`_ except in reverse order.
  511. ##
  512. ## .. code-block:: nim
  513. ## for piece in "foo:bar".rsplit(':'):
  514. ## echo piece
  515. ##
  516. ## Results in:
  517. ##
  518. ## .. code-block:: nim
  519. ## "bar"
  520. ## "foo"
  521. ##
  522. ## Substrings are separated from the right by the char `sep`.
  523. ##
  524. ## See also:
  525. ## * `split iterator<#split.i,string,char,int>`_
  526. ## * `splitLines iterator<#splitLines.i,string>`_
  527. ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_
  528. ## * `rsplit proc<#rsplit,string,char,int>`_
  529. rsplitCommon(s, sep, maxsplit, 1)
  530. iterator rsplit*(s: string, seps: set[char] = Whitespace,
  531. maxsplit: int = -1): string =
  532. ## Splits the string `s` into substrings from the right using a
  533. ## string separator. Works exactly the same as `split iterator
  534. ## <#split.i,string,char,int>`_ except in reverse order.
  535. ##
  536. ## .. code-block:: nim
  537. ## for piece in "foo bar".rsplit(WhiteSpace):
  538. ## echo piece
  539. ##
  540. ## Results in:
  541. ##
  542. ## .. code-block:: nim
  543. ## "bar"
  544. ## "foo"
  545. ##
  546. ## Substrings are separated from the right by the set of chars `seps`
  547. ##
  548. ## See also:
  549. ## * `split iterator<#split.i,string,set[char],int>`_
  550. ## * `splitLines iterator<#splitLines.i,string>`_
  551. ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_
  552. ## * `rsplit proc<#rsplit,string,set[char],int>`_
  553. rsplitCommon(s, seps, maxsplit, 1)
  554. iterator rsplit*(s: string, sep: string, maxsplit: int = -1,
  555. keepSeparators: bool = false): string =
  556. ## Splits the string `s` into substrings from the right using a
  557. ## string separator. Works exactly the same as `split iterator
  558. ## <#split.i,string,string,int>`_ except in reverse order.
  559. ##
  560. ## .. code-block:: nim
  561. ## for piece in "foothebar".rsplit("the"):
  562. ## echo piece
  563. ##
  564. ## Results in:
  565. ##
  566. ## .. code-block:: nim
  567. ## "bar"
  568. ## "foo"
  569. ##
  570. ## Substrings are separated from the right by the string `sep`
  571. ##
  572. ## See also:
  573. ## * `split iterator<#split.i,string,string,int>`_
  574. ## * `splitLines iterator<#splitLines.i,string>`_
  575. ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_
  576. ## * `rsplit proc<#rsplit,string,string,int>`_
  577. rsplitCommon(s, sep, maxsplit, sep.len)
  578. iterator splitLines*(s: string, keepEol = false): string =
  579. ## Splits the string `s` into its containing lines.
  580. ##
  581. ## Every `character literal <manual.html#lexical-analysis-character-literals>`_
  582. ## newline combination (CR, LF, CR-LF) is supported. The result strings
  583. ## contain no trailing end of line characters unless parameter ``keepEol``
  584. ## is set to ``true``.
  585. ##
  586. ## Example:
  587. ##
  588. ## .. code-block:: nim
  589. ## for line in splitLines("\nthis\nis\nan\n\nexample\n"):
  590. ## writeLine(stdout, line)
  591. ##
  592. ## Results in:
  593. ##
  594. ## .. code-block:: nim
  595. ## ""
  596. ## "this"
  597. ## "is"
  598. ## "an"
  599. ## ""
  600. ## "example"
  601. ## ""
  602. ##
  603. ## See also:
  604. ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_
  605. ## * `splitLines proc<#splitLines,string>`_
  606. var first = 0
  607. var last = 0
  608. var eolpos = 0
  609. while true:
  610. while last < s.len and s[last] notin {'\c', '\l'}: inc(last)
  611. eolpos = last
  612. if last < s.len:
  613. if s[last] == '\l': inc(last)
  614. elif s[last] == '\c':
  615. inc(last)
  616. if last < s.len and s[last] == '\l': inc(last)
  617. yield substr(s, first, if keepEol: last-1 else: eolpos-1)
  618. # no eol characters consumed means that the string is over
  619. if eolpos == last:
  620. break
  621. first = last
  622. iterator splitWhitespace*(s: string, maxsplit: int = -1): string =
  623. ## Splits the string ``s`` at whitespace stripping leading and trailing
  624. ## whitespace if necessary. If ``maxsplit`` is specified and is positive,
  625. ## no more than ``maxsplit`` splits is made.
  626. ##
  627. ## The following code:
  628. ##
  629. ## .. code-block:: nim
  630. ## let s = " foo \t bar baz "
  631. ## for ms in [-1, 1, 2, 3]:
  632. ## echo "------ maxsplit = ", ms, ":"
  633. ## for item in s.splitWhitespace(maxsplit=ms):
  634. ## echo '"', item, '"'
  635. ##
  636. ## ...results in:
  637. ##
  638. ## .. code-block::
  639. ## ------ maxsplit = -1:
  640. ## "foo"
  641. ## "bar"
  642. ## "baz"
  643. ## ------ maxsplit = 1:
  644. ## "foo"
  645. ## "bar baz "
  646. ## ------ maxsplit = 2:
  647. ## "foo"
  648. ## "bar"
  649. ## "baz "
  650. ## ------ maxsplit = 3:
  651. ## "foo"
  652. ## "bar"
  653. ## "baz"
  654. ##
  655. ## See also:
  656. ## * `splitLines iterator<#splitLines.i,string>`_
  657. ## * `splitWhitespace proc<#splitWhitespace,string,int>`_
  658. oldSplit(s, Whitespace, maxsplit)
  659. proc split*(s: string, sep: char, maxsplit: int = -1): seq[string] {.noSideEffect,
  660. rtl, extern: "nsuSplitChar".} =
  661. ## The same as the `split iterator <#split.i,string,char,int>`_ (see its
  662. ## documentation), but is a proc that returns a sequence of substrings.
  663. ##
  664. ## See also:
  665. ## * `split iterator <#split.i,string,char,int>`_
  666. ## * `rsplit proc<#rsplit,string,char,int>`_
  667. ## * `splitLines proc<#splitLines,string>`_
  668. ## * `splitWhitespace proc<#splitWhitespace,string,int>`_
  669. runnableExamples:
  670. doAssert "a,b,c".split(',') == @["a", "b", "c"]
  671. doAssert "".split(' ') == @[""]
  672. accResult(split(s, sep, maxsplit))
  673. proc split*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): seq[string] {.
  674. noSideEffect, rtl, extern: "nsuSplitCharSet".} =
  675. ## The same as the `split iterator <#split.i,string,set[char],int>`_ (see its
  676. ## documentation), but is a proc that returns a sequence of substrings.
  677. ##
  678. ## See also:
  679. ## * `split iterator <#split.i,string,set[char],int>`_
  680. ## * `rsplit proc<#rsplit,string,set[char],int>`_
  681. ## * `splitLines proc<#splitLines,string>`_
  682. ## * `splitWhitespace proc<#splitWhitespace,string,int>`_
  683. runnableExamples:
  684. doAssert "a,b;c".split({',', ';'}) == @["a", "b", "c"]
  685. doAssert "".split({' '}) == @[""]
  686. accResult(split(s, seps, maxsplit))
  687. proc split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.noSideEffect,
  688. rtl, extern: "nsuSplitString".} =
  689. ## Splits the string `s` into substrings using a string separator.
  690. ##
  691. ## Substrings are separated by the string `sep`. This is a wrapper around the
  692. ## `split iterator <#split.i,string,string,int>`_.
  693. ##
  694. ## See also:
  695. ## * `split iterator <#split.i,string,string,int>`_
  696. ## * `rsplit proc<#rsplit,string,string,int>`_
  697. ## * `splitLines proc<#splitLines,string>`_
  698. ## * `splitWhitespace proc<#splitWhitespace,string,int>`_
  699. runnableExamples:
  700. doAssert "a,b,c".split(",") == @["a", "b", "c"]
  701. doAssert "a man a plan a canal panama".split("a ") == @["", "man ", "plan ", "canal panama"]
  702. doAssert "".split("Elon Musk") == @[""]
  703. doAssert "a largely spaced sentence".split(" ") == @["a", "", "largely",
  704. "", "", "", "spaced", "sentence"]
  705. doAssert "a largely spaced sentence".split(" ", maxsplit = 1) == @["a", " largely spaced sentence"]
  706. doAssert(sep.len > 0)
  707. accResult(split(s, sep, maxsplit))
  708. proc rsplit*(s: string, sep: char, maxsplit: int = -1): seq[string]
  709. {.noSideEffect, rtl, extern: "nsuRSplitChar".} =
  710. ## The same as the `rsplit iterator <#rsplit.i,string,char,int>`_, but is a proc
  711. ## that returns a sequence of substrings.
  712. ##
  713. ## A possible common use case for `rsplit` is path manipulation,
  714. ## particularly on systems that don't use a common delimiter.
  715. ##
  716. ## For example, if a system had `#` as a delimiter, you could
  717. ## do the following to get the tail of the path:
  718. ##
  719. ## .. code-block:: nim
  720. ## var tailSplit = rsplit("Root#Object#Method#Index", '#', maxsplit=1)
  721. ##
  722. ## Results in `tailSplit` containing:
  723. ##
  724. ## .. code-block:: nim
  725. ## @["Root#Object#Method", "Index"]
  726. ##
  727. ## See also:
  728. ## * `rsplit iterator <#rsplit.i,string,char,int>`_
  729. ## * `split proc<#split,string,char,int>`_
  730. ## * `splitLines proc<#splitLines,string>`_
  731. ## * `splitWhitespace proc<#splitWhitespace,string,int>`_
  732. accResult(rsplit(s, sep, maxsplit))
  733. result.reverse()
  734. proc rsplit*(s: string, seps: set[char] = Whitespace,
  735. maxsplit: int = -1): seq[string]
  736. {.noSideEffect, rtl, extern: "nsuRSplitCharSet".} =
  737. ## The same as the `rsplit iterator <#rsplit.i,string,set[char],int>`_, but is a
  738. ## proc that returns a sequence of substrings.
  739. ##
  740. ## A possible common use case for `rsplit` is path manipulation,
  741. ## particularly on systems that don't use a common delimiter.
  742. ##
  743. ## For example, if a system had `#` as a delimiter, you could
  744. ## do the following to get the tail of the path:
  745. ##
  746. ## .. code-block:: nim
  747. ## var tailSplit = rsplit("Root#Object#Method#Index", {'#'}, maxsplit=1)
  748. ##
  749. ## Results in `tailSplit` containing:
  750. ##
  751. ## .. code-block:: nim
  752. ## @["Root#Object#Method", "Index"]
  753. ##
  754. ## See also:
  755. ## * `rsplit iterator <#rsplit.i,string,set[char],int>`_
  756. ## * `split proc<#split,string,set[char],int>`_
  757. ## * `splitLines proc<#splitLines,string>`_
  758. ## * `splitWhitespace proc<#splitWhitespace,string,int>`_
  759. accResult(rsplit(s, seps, maxsplit))
  760. result.reverse()
  761. proc rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string]
  762. {.noSideEffect, rtl, extern: "nsuRSplitString".} =
  763. ## The same as the `rsplit iterator <#rsplit.i,string,string,int,bool>`_, but is a proc
  764. ## that returns a sequence of substrings.
  765. ##
  766. ## A possible common use case for `rsplit` is path manipulation,
  767. ## particularly on systems that don't use a common delimiter.
  768. ##
  769. ## For example, if a system had `#` as a delimiter, you could
  770. ## do the following to get the tail of the path:
  771. ##
  772. ## .. code-block:: nim
  773. ## var tailSplit = rsplit("Root#Object#Method#Index", "#", maxsplit=1)
  774. ##
  775. ## Results in `tailSplit` containing:
  776. ##
  777. ## .. code-block:: nim
  778. ## @["Root#Object#Method", "Index"]
  779. ##
  780. ## See also:
  781. ## * `rsplit iterator <#rsplit.i,string,string,int,bool>`_
  782. ## * `split proc<#split,string,string,int>`_
  783. ## * `splitLines proc<#splitLines,string>`_
  784. ## * `splitWhitespace proc<#splitWhitespace,string,int>`_
  785. runnableExamples:
  786. doAssert "a largely spaced sentence".rsplit(" ", maxsplit = 1) == @[
  787. "a largely spaced", "sentence"]
  788. doAssert "a,b,c".rsplit(",") == @["a", "b", "c"]
  789. doAssert "a man a plan a canal panama".rsplit("a ") == @["", "man ",
  790. "plan ", "canal panama"]
  791. doAssert "".rsplit("Elon Musk") == @[""]
  792. doAssert "a largely spaced sentence".rsplit(" ") == @["a", "",
  793. "largely", "", "", "", "spaced", "sentence"]
  794. accResult(rsplit(s, sep, maxsplit))
  795. result.reverse()
  796. proc splitLines*(s: string, keepEol = false): seq[string] {.noSideEffect,
  797. rtl, extern: "nsuSplitLines".} =
  798. ## The same as the `splitLines iterator<#splitLines.i,string>`_ (see its
  799. ## documentation), but is a proc that returns a sequence of substrings.
  800. ##
  801. ## See also:
  802. ## * `splitLines iterator<#splitLines.i,string>`_
  803. ## * `splitWhitespace proc<#splitWhitespace,string,int>`_
  804. ## * `countLines proc<#countLines,string>`_
  805. accResult(splitLines(s, keepEol = keepEol))
  806. proc splitWhitespace*(s: string, maxsplit: int = -1): seq[string] {.noSideEffect,
  807. rtl, extern: "nsuSplitWhitespace".} =
  808. ## The same as the `splitWhitespace iterator <#splitWhitespace.i,string,int>`_
  809. ## (see its documentation), but is a proc that returns a sequence of substrings.
  810. ##
  811. ## See also:
  812. ## * `splitWhitespace iterator <#splitWhitespace.i,string,int>`_
  813. ## * `splitLines proc<#splitLines,string>`_
  814. accResult(splitWhitespace(s, maxsplit))
  815. proc toBin*(x: BiggestInt, len: Positive): string {.noSideEffect,
  816. rtl, extern: "nsuToBin".} =
  817. ## Converts `x` into its binary representation.
  818. ##
  819. ## The resulting string is always `len` characters long. No leading ``0b``
  820. ## prefix is generated.
  821. runnableExamples:
  822. let
  823. a = 29
  824. b = 257
  825. doAssert a.toBin(8) == "00011101"
  826. doAssert b.toBin(8) == "00000001"
  827. doAssert b.toBin(9) == "100000001"
  828. var
  829. mask = BiggestUInt 1
  830. shift = BiggestUInt 0
  831. assert(len > 0)
  832. result = newString(len)
  833. for j in countdown(len-1, 0):
  834. result[j] = chr(int((BiggestUInt(x) and mask) shr shift) + ord('0'))
  835. inc shift
  836. mask = mask shl BiggestUInt(1)
  837. proc toOct*(x: BiggestInt, len: Positive): string {.noSideEffect,
  838. rtl, extern: "nsuToOct".} =
  839. ## Converts `x` into its octal representation.
  840. ##
  841. ## The resulting string is always `len` characters long. No leading ``0o``
  842. ## prefix is generated.
  843. ##
  844. ## Do not confuse it with `toOctal proc<#toOctal,char>`_.
  845. runnableExamples:
  846. let
  847. a = 62
  848. b = 513
  849. doAssert a.toOct(3) == "076"
  850. doAssert b.toOct(3) == "001"
  851. doAssert b.toOct(5) == "01001"
  852. var
  853. mask = BiggestUInt 7
  854. shift = BiggestUInt 0
  855. assert(len > 0)
  856. result = newString(len)
  857. for j in countdown(len-1, 0):
  858. result[j] = chr(int((BiggestUInt(x) and mask) shr shift) + ord('0'))
  859. inc shift, 3
  860. mask = mask shl BiggestUInt(3)
  861. proc toHex*(x: BiggestInt, len: Positive): string {.noSideEffect,
  862. rtl, extern: "nsuToHex".} =
  863. ## Converts `x` to its hexadecimal representation.
  864. ##
  865. ## The resulting string will be exactly `len` characters long. No prefix like
  866. ## ``0x`` is generated. `x` is treated as an unsigned value.
  867. runnableExamples:
  868. let
  869. a = 62
  870. b = 4097
  871. doAssert a.toHex(3) == "03E"
  872. doAssert b.toHex(3) == "001"
  873. doAssert b.toHex(4) == "1001"
  874. const
  875. HexChars = "0123456789ABCDEF"
  876. var
  877. n = x
  878. result = newString(len)
  879. for j in countdown(len-1, 0):
  880. result[j] = HexChars[int(n and 0xF)]
  881. n = n shr 4
  882. # handle negative overflow
  883. if n == 0 and x < 0: n = -1
  884. proc toHex*[T: SomeInteger](x: T): string =
  885. ## Shortcut for ``toHex(x, T.sizeOf * 2)``
  886. runnableExamples:
  887. doAssert toHex(1984'i64) == "00000000000007C0"
  888. toHex(BiggestInt(x), T.sizeOf * 2)
  889. proc toHex*(s: string): string {.noSideEffect, rtl.} =
  890. ## Converts a bytes string to its hexadecimal representation.
  891. ##
  892. ## The output is twice the input long. No prefix like
  893. ## ``0x`` is generated.
  894. ##
  895. ## See also:
  896. ## * `parseHexStr proc<#parseHexStr,string>`_ for the reverse operation
  897. runnableExamples:
  898. let
  899. a = "1"
  900. b = "A"
  901. c = "\0\255"
  902. doAssert a.toHex() == "31"
  903. doAssert b.toHex() == "41"
  904. doAssert c.toHex() == "00FF"
  905. const HexChars = "0123456789ABCDEF"
  906. result = newString(s.len * 2)
  907. for pos, c in s:
  908. var n = ord(c)
  909. result[pos * 2 + 1] = HexChars[n and 0xF]
  910. n = n shr 4
  911. result[pos * 2] = HexChars[n]
  912. proc toOctal*(c: char): string {.noSideEffect, rtl, extern: "nsuToOctal".} =
  913. ## Converts a character `c` to its octal representation.
  914. ##
  915. ## The resulting string may not have a leading zero. Its length is always
  916. ## exactly 3.
  917. ##
  918. ## Do not confuse it with `toOct proc<#toOct,BiggestInt,Positive>`_.
  919. runnableExamples:
  920. doAssert toOctal('1') == "061"
  921. doAssert toOctal('A') == "101"
  922. doAssert toOctal('a') == "141"
  923. doAssert toOctal('!') == "041"
  924. result = newString(3)
  925. var val = ord(c)
  926. for i in countdown(2, 0):
  927. result[i] = chr(val mod 8 + ord('0'))
  928. val = val div 8
  929. proc fromBin*[T: SomeInteger](s: string): T =
  930. ## Parses a binary integer value from a string `s`.
  931. ##
  932. ## If `s` is not a valid binary integer, `ValueError` is raised. `s` can have
  933. ## one of the following optional prefixes: `0b`, `0B`. Underscores within
  934. ## `s` are ignored.
  935. ##
  936. ## Does not check for overflow. If the value represented by `s`
  937. ## is too big to fit into a return type, only the value of the rightmost
  938. ## binary digits of `s` is returned without producing an error.
  939. runnableExamples:
  940. let s = "0b_0100_1000_1000_1000_1110_1110_1001_1001"
  941. doAssert fromBin[int](s) == 1216933529
  942. doAssert fromBin[int8](s) == 0b1001_1001'i8
  943. doAssert fromBin[int8](s) == -103'i8
  944. doAssert fromBin[uint8](s) == 153
  945. doAssert s.fromBin[:int16] == 0b1110_1110_1001_1001'i16
  946. doAssert s.fromBin[:uint64] == 1216933529'u64
  947. let p = parseutils.parseBin(s, result)
  948. if p != s.len or p == 0:
  949. raise newException(ValueError, "invalid binary integer: " & s)
  950. proc fromOct*[T: SomeInteger](s: string): T =
  951. ## Parses an octal integer value from a string `s`.
  952. ##
  953. ## If `s` is not a valid octal integer, `ValueError` is raised. `s` can have
  954. ## one of the following optional prefixes: `0o`, `0O`. Underscores within
  955. ## `s` are ignored.
  956. ##
  957. ## Does not check for overflow. If the value represented by `s`
  958. ## is too big to fit into a return type, only the value of the rightmost
  959. ## octal digits of `s` is returned without producing an error.
  960. runnableExamples:
  961. let s = "0o_123_456_777"
  962. doAssert fromOct[int](s) == 21913087
  963. doAssert fromOct[int8](s) == 0o377'i8
  964. doAssert fromOct[int8](s) == -1'i8
  965. doAssert fromOct[uint8](s) == 255'u8
  966. doAssert s.fromOct[:int16] == 24063'i16
  967. doAssert s.fromOct[:uint64] == 21913087'u64
  968. let p = parseutils.parseOct(s, result)
  969. if p != s.len or p == 0:
  970. raise newException(ValueError, "invalid oct integer: " & s)
  971. proc fromHex*[T: SomeInteger](s: string): T =
  972. ## Parses a hex integer value from a string `s`.
  973. ##
  974. ## If `s` is not a valid hex integer, `ValueError` is raised. `s` can have
  975. ## one of the following optional prefixes: `0x`, `0X`, `#`. Underscores within
  976. ## `s` are ignored.
  977. ##
  978. ## Does not check for overflow. If the value represented by `s`
  979. ## is too big to fit into a return type, only the value of the rightmost
  980. ## hex digits of `s` is returned without producing an error.
  981. runnableExamples:
  982. let s = "0x_1235_8df6"
  983. doAssert fromHex[int](s) == 305499638
  984. doAssert fromHex[int8](s) == 0xf6'i8
  985. doAssert fromHex[int8](s) == -10'i8
  986. doAssert fromHex[uint8](s) == 246'u8
  987. doAssert s.fromHex[:int16] == -29194'i16
  988. doAssert s.fromHex[:uint64] == 305499638'u64
  989. let p = parseutils.parseHex(s, result)
  990. if p != s.len or p == 0:
  991. raise newException(ValueError, "invalid hex integer: " & s)
  992. proc intToStr*(x: int, minchars: Positive = 1): string {.noSideEffect,
  993. rtl, extern: "nsuIntToStr".} =
  994. ## Converts `x` to its decimal representation.
  995. ##
  996. ## The resulting string will be minimally `minchars` characters long. This is
  997. ## achieved by adding leading zeros.
  998. runnableExamples:
  999. doAssert intToStr(1984) == "1984"
  1000. doAssert intToStr(1984, 6) == "001984"
  1001. result = $abs(x)
  1002. for i in 1 .. minchars - len(result):
  1003. result = '0' & result
  1004. if x < 0:
  1005. result = '-' & result
  1006. proc parseInt*(s: string): int {.noSideEffect, procvar,
  1007. rtl, extern: "nsuParseInt".} =
  1008. ## Parses a decimal integer value contained in `s`.
  1009. ##
  1010. ## If `s` is not a valid integer, `ValueError` is raised.
  1011. runnableExamples:
  1012. doAssert parseInt("-0042") == -42
  1013. let L = parseutils.parseInt(s, result, 0)
  1014. if L != s.len or L == 0:
  1015. raise newException(ValueError, "invalid integer: " & s)
  1016. proc parseBiggestInt*(s: string): BiggestInt {.noSideEffect, procvar,
  1017. rtl, extern: "nsuParseBiggestInt".} =
  1018. ## Parses a decimal integer value contained in `s`.
  1019. ##
  1020. ## If `s` is not a valid integer, `ValueError` is raised.
  1021. let L = parseutils.parseBiggestInt(s, result, 0)
  1022. if L != s.len or L == 0:
  1023. raise newException(ValueError, "invalid integer: " & s)
  1024. proc parseUInt*(s: string): uint {.noSideEffect, procvar,
  1025. rtl, extern: "nsuParseUInt".} =
  1026. ## Parses a decimal unsigned integer value contained in `s`.
  1027. ##
  1028. ## If `s` is not a valid integer, `ValueError` is raised.
  1029. let L = parseutils.parseUInt(s, result, 0)
  1030. if L != s.len or L == 0:
  1031. raise newException(ValueError, "invalid unsigned integer: " & s)
  1032. proc parseBiggestUInt*(s: string): BiggestUInt {.noSideEffect, procvar,
  1033. rtl, extern: "nsuParseBiggestUInt".} =
  1034. ## Parses a decimal unsigned integer value contained in `s`.
  1035. ##
  1036. ## If `s` is not a valid integer, `ValueError` is raised.
  1037. let L = parseutils.parseBiggestUInt(s, result, 0)
  1038. if L != s.len or L == 0:
  1039. raise newException(ValueError, "invalid unsigned integer: " & s)
  1040. proc parseFloat*(s: string): float {.noSideEffect, procvar,
  1041. rtl, extern: "nsuParseFloat".} =
  1042. ## Parses a decimal floating point value contained in `s`.
  1043. ##
  1044. ## If `s` is not a valid floating point number, `ValueError` is raised.
  1045. ##``NAN``, ``INF``, ``-INF`` are also supported (case insensitive comparison).
  1046. runnableExamples:
  1047. doAssert parseFloat("3.14") == 3.14
  1048. doAssert parseFloat("inf") == 1.0/0
  1049. let L = parseutils.parseFloat(s, result, 0)
  1050. if L != s.len or L == 0:
  1051. raise newException(ValueError, "invalid float: " & s)
  1052. proc parseBinInt*(s: string): int {.noSideEffect, procvar,
  1053. rtl, extern: "nsuParseBinInt".} =
  1054. ## Parses a binary integer value contained in `s`.
  1055. ##
  1056. ## If `s` is not a valid binary integer, `ValueError` is raised. `s` can have
  1057. ## one of the following optional prefixes: ``0b``, ``0B``. Underscores within
  1058. ## `s` are ignored.
  1059. runnableExamples:
  1060. let
  1061. a = "0b11_0101"
  1062. b = "111"
  1063. doAssert a.parseBinInt() == 53
  1064. doAssert b.parseBinInt() == 7
  1065. let L = parseutils.parseBin(s, result, 0)
  1066. if L != s.len or L == 0:
  1067. raise newException(ValueError, "invalid binary integer: " & s)
  1068. proc parseOctInt*(s: string): int {.noSideEffect,
  1069. rtl, extern: "nsuParseOctInt".} =
  1070. ## Parses an octal integer value contained in `s`.
  1071. ##
  1072. ## If `s` is not a valid oct integer, `ValueError` is raised. `s` can have one
  1073. ## of the following optional prefixes: ``0o``, ``0O``. Underscores within
  1074. ## `s` are ignored.
  1075. let L = parseutils.parseOct(s, result, 0)
  1076. if L != s.len or L == 0:
  1077. raise newException(ValueError, "invalid oct integer: " & s)
  1078. proc parseHexInt*(s: string): int {.noSideEffect, procvar,
  1079. rtl, extern: "nsuParseHexInt".} =
  1080. ## Parses a hexadecimal integer value contained in `s`.
  1081. ##
  1082. ## If `s` is not a valid hex integer, `ValueError` is raised. `s` can have one
  1083. ## of the following optional prefixes: ``0x``, ``0X``, ``#``. Underscores
  1084. ## within `s` are ignored.
  1085. let L = parseutils.parseHex(s, result, 0)
  1086. if L != s.len or L == 0:
  1087. raise newException(ValueError, "invalid hex integer: " & s)
  1088. proc generateHexCharToValueMap(): string =
  1089. ## Generate a string to map a hex digit to uint value
  1090. result = ""
  1091. for inp in 0..255:
  1092. let ch = chr(inp)
  1093. let o =
  1094. case ch:
  1095. of '0'..'9': inp - ord('0')
  1096. of 'a'..'f': inp - ord('a') + 10
  1097. of 'A'..'F': inp - ord('A') + 10
  1098. else: 17 # indicates an invalid hex char
  1099. result.add chr(o)
  1100. const hexCharToValueMap = generateHexCharToValueMap()
  1101. proc parseHexStr*(s: string): string {.noSideEffect, procvar,
  1102. rtl, extern: "nsuParseHexStr".} =
  1103. ## Convert hex-encoded string to byte string, e.g.:
  1104. ##
  1105. ## Raises ``ValueError`` for an invalid hex values. The comparison is
  1106. ## case-insensitive.
  1107. ##
  1108. ## See also:
  1109. ## * `toHex proc<#toHex,string>`_ for the reverse operation
  1110. runnableExamples:
  1111. let
  1112. a = "41"
  1113. b = "3161"
  1114. c = "00ff"
  1115. doAssert parseHexStr(a) == "A"
  1116. doAssert parseHexStr(b) == "1a"
  1117. doAssert parseHexStr(c) == "\0\255"
  1118. if s.len mod 2 != 0:
  1119. raise newException(ValueError, "Incorrect hex string len")
  1120. result = newString(s.len div 2)
  1121. var buf = 0
  1122. for pos, c in s:
  1123. let val = hexCharToValueMap[ord(c)].ord
  1124. if val == 17:
  1125. raise newException(ValueError, "Invalid hex char " & repr(c))
  1126. if pos mod 2 == 0:
  1127. buf = val
  1128. else:
  1129. result[pos div 2] = chr(val + buf shl 4)
  1130. proc parseBool*(s: string): bool =
  1131. ## Parses a value into a `bool`.
  1132. ##
  1133. ## If ``s`` is one of the following values: ``y, yes, true, 1, on``, then
  1134. ## returns `true`. If ``s`` is one of the following values: ``n, no, false,
  1135. ## 0, off``, then returns `false`. If ``s`` is something else a
  1136. ## ``ValueError`` exception is raised.
  1137. runnableExamples:
  1138. let a = "n"
  1139. doAssert parseBool(a) == false
  1140. case normalize(s)
  1141. of "y", "yes", "true", "1", "on": result = true
  1142. of "n", "no", "false", "0", "off": result = false
  1143. else: raise newException(ValueError, "cannot interpret as a bool: " & s)
  1144. proc parseEnum*[T: enum](s: string): T =
  1145. ## Parses an enum ``T``.
  1146. ##
  1147. ## Raises ``ValueError`` for an invalid value in `s`. The comparison is
  1148. ## done in a style insensitive way.
  1149. runnableExamples:
  1150. type
  1151. MyEnum = enum
  1152. first = "1st",
  1153. second,
  1154. third = "3rd"
  1155. doAssert parseEnum[MyEnum]("1_st") == first
  1156. doAssert parseEnum[MyEnum]("second") == second
  1157. doAssertRaises(ValueError):
  1158. echo parseEnum[MyEnum]("third")
  1159. for e in low(T)..high(T):
  1160. if cmpIgnoreStyle(s, $e) == 0:
  1161. return e
  1162. raise newException(ValueError, "invalid enum value: " & s)
  1163. proc parseEnum*[T: enum](s: string, default: T): T =
  1164. ## Parses an enum ``T``.
  1165. ##
  1166. ## Uses `default` for an invalid value in `s`. The comparison is done in a
  1167. ## style insensitive way.
  1168. runnableExamples:
  1169. type
  1170. MyEnum = enum
  1171. first = "1st",
  1172. second,
  1173. third = "3rd"
  1174. doAssert parseEnum[MyEnum]("1_st") == first
  1175. doAssert parseEnum[MyEnum]("second") == second
  1176. doAssert parseEnum[MyEnum]("last", third) == third
  1177. for e in low(T)..high(T):
  1178. if cmpIgnoreStyle(s, $e) == 0:
  1179. return e
  1180. result = default
  1181. proc repeat*(c: char, count: Natural): string {.noSideEffect,
  1182. rtl, extern: "nsuRepeatChar".} =
  1183. ## Returns a string of length `count` consisting only of
  1184. ## the character `c`.
  1185. runnableExamples:
  1186. let a = 'z'
  1187. doAssert a.repeat(5) == "zzzzz"
  1188. result = newString(count)
  1189. for i in 0..count-1: result[i] = c
  1190. proc repeat*(s: string, n: Natural): string {.noSideEffect,
  1191. rtl, extern: "nsuRepeatStr".} =
  1192. ## Returns string `s` concatenated `n` times.
  1193. runnableExamples:
  1194. doAssert "+ foo +".repeat(3) == "+ foo ++ foo ++ foo +"
  1195. result = newStringOfCap(n * s.len)
  1196. for i in 1..n: result.add(s)
  1197. proc spaces*(n: Natural): string {.inline.} =
  1198. ## Returns a string with `n` space characters. You can use this proc
  1199. ## to left align strings.
  1200. ##
  1201. ## See also:
  1202. ## * `align proc<#align,string,Natural,char>`_
  1203. ## * `alignLeft proc<#alignLeft,string,Natural,char>`_
  1204. ## * `indent proc<#indent,string,Natural,string>`_
  1205. ## * `center proc<#center,string,int,char>`_
  1206. runnableExamples:
  1207. let
  1208. width = 15
  1209. text1 = "Hello user!"
  1210. text2 = "This is a very long string"
  1211. doAssert text1 & spaces(max(0, width - text1.len)) & "|" ==
  1212. "Hello user! |"
  1213. doAssert text2 & spaces(max(0, width - text2.len)) & "|" ==
  1214. "This is a very long string|"
  1215. repeat(' ', n)
  1216. proc align*(s: string, count: Natural, padding = ' '): string {.
  1217. noSideEffect, rtl, extern: "nsuAlignString".} =
  1218. ## Aligns a string `s` with `padding`, so that it is of length `count`.
  1219. ##
  1220. ## `padding` characters (by default spaces) are added before `s` resulting in
  1221. ## right alignment. If ``s.len >= count``, no spaces are added and `s` is
  1222. ## returned unchanged. If you need to left align a string use the `alignLeft
  1223. ## proc <#alignLeft,string,Natural,char>`_.
  1224. ##
  1225. ## See also:
  1226. ## * `alignLeft proc<#alignLeft,string,Natural,char>`_
  1227. ## * `spaces proc<#spaces,Natural>`_
  1228. ## * `indent proc<#indent,string,Natural,string>`_
  1229. ## * `center proc<#center,string,int,char>`_
  1230. runnableExamples:
  1231. assert align("abc", 4) == " abc"
  1232. assert align("a", 0) == "a"
  1233. assert align("1232", 6) == " 1232"
  1234. assert align("1232", 6, '#') == "##1232"
  1235. if s.len < count:
  1236. result = newString(count)
  1237. let spaces = count - s.len
  1238. for i in 0..spaces-1: result[i] = padding
  1239. for i in spaces..count-1: result[i] = s[i-spaces]
  1240. else:
  1241. result = s
  1242. proc alignLeft*(s: string, count: Natural, padding = ' '): string {.
  1243. noSideEffect.} =
  1244. ## Left-Aligns a string `s` with `padding`, so that it is of length `count`.
  1245. ##
  1246. ## `padding` characters (by default spaces) are added after `s` resulting in
  1247. ## left alignment. If ``s.len >= count``, no spaces are added and `s` is
  1248. ## returned unchanged. If you need to right align a string use the `align
  1249. ## proc <#align,string,Natural,char>`_.
  1250. ##
  1251. ## See also:
  1252. ## * `align proc<#align,string,Natural,char>`_
  1253. ## * `spaces proc<#spaces,Natural>`_
  1254. ## * `indent proc<#indent,string,Natural,string>`_
  1255. ## * `center proc<#center,string,int,char>`_
  1256. runnableExamples:
  1257. assert alignLeft("abc", 4) == "abc "
  1258. assert alignLeft("a", 0) == "a"
  1259. assert alignLeft("1232", 6) == "1232 "
  1260. assert alignLeft("1232", 6, '#') == "1232##"
  1261. if s.len < count:
  1262. result = newString(count)
  1263. if s.len > 0:
  1264. result[0 .. (s.len - 1)] = s
  1265. for i in s.len ..< count:
  1266. result[i] = padding
  1267. else:
  1268. result = s
  1269. proc center*(s: string, width: int, fillChar: char = ' '): string {.
  1270. noSideEffect, rtl, extern: "nsuCenterString".} =
  1271. ## Return the contents of `s` centered in a string `width` long using
  1272. ## `fillChar` (default: space) as padding.
  1273. ##
  1274. ## The original string is returned if `width` is less than or equal
  1275. ## to `s.len`.
  1276. ##
  1277. ## See also:
  1278. ## * `align proc<#align,string,Natural,char>`_
  1279. ## * `alignLeft proc<#alignLeft,string,Natural,char>`_
  1280. ## * `spaces proc<#spaces,Natural>`_
  1281. ## * `indent proc<#indent,string,Natural,string>`_
  1282. runnableExamples:
  1283. let a = "foo"
  1284. doAssert a.center(2) == "foo"
  1285. doAssert a.center(5) == " foo "
  1286. doAssert a.center(6) == " foo "
  1287. if width <= s.len: return s
  1288. result = newString(width)
  1289. # Left padding will be one fillChar
  1290. # smaller if there are an odd number
  1291. # of characters
  1292. let
  1293. charsLeft = (width - s.len)
  1294. leftPadding = charsLeft div 2
  1295. for i in 0 ..< width:
  1296. if i >= leftPadding and i < leftPadding + s.len:
  1297. # we are where the string should be located
  1298. result[i] = s[i-leftPadding]
  1299. else:
  1300. # we are either before or after where
  1301. # the string s should go
  1302. result[i] = fillChar
  1303. proc indent*(s: string, count: Natural, padding: string = " "): string
  1304. {.noSideEffect, rtl, extern: "nsuIndent".} =
  1305. ## Indents each line in ``s`` by ``count`` amount of ``padding``.
  1306. ##
  1307. ## **Note:** This does not preserve the new line characters used in ``s``.
  1308. ##
  1309. ## See also:
  1310. ## * `align proc<#align,string,Natural,char>`_
  1311. ## * `alignLeft proc<#alignLeft,string,Natural,char>`_
  1312. ## * `spaces proc<#spaces,Natural>`_
  1313. ## * `unindent proc<#unindent,string,Natural,string>`_
  1314. runnableExamples:
  1315. doAssert indent("First line\c\l and second line.", 2) ==
  1316. " First line\l and second line."
  1317. result = ""
  1318. var i = 0
  1319. for line in s.splitLines():
  1320. if i != 0:
  1321. result.add("\n")
  1322. for j in 1..count:
  1323. result.add(padding)
  1324. result.add(line)
  1325. i.inc
  1326. proc unindent*(s: string, count: Natural, padding: string = " "): string
  1327. {.noSideEffect, rtl, extern: "nsuUnindent".} =
  1328. ## Unindents each line in ``s`` by ``count`` amount of ``padding``.
  1329. ## Sometimes called `dedent`:idx:
  1330. ##
  1331. ## **Note:** This does not preserve the new line characters used in ``s``.
  1332. ##
  1333. ## See also:
  1334. ## * `align proc<#align,string,Natural,char>`_
  1335. ## * `alignLeft proc<#alignLeft,string,Natural,char>`_
  1336. ## * `spaces proc<#spaces,Natural>`_
  1337. ## * `indent proc<#indent,string,Natural,string>`_
  1338. runnableExamples:
  1339. doAssert unindent(" First line\l and second line", 3) ==
  1340. "First line\land second line"
  1341. result = ""
  1342. var i = 0
  1343. for line in s.splitLines():
  1344. if i != 0:
  1345. result.add("\n")
  1346. var indentCount = 0
  1347. for j in 0..<count.int:
  1348. indentCount.inc
  1349. if j + padding.len-1 >= line.len or line[j .. j + padding.len-1] != padding:
  1350. indentCount = j
  1351. break
  1352. result.add(line[indentCount*padding.len .. ^1])
  1353. i.inc
  1354. proc unindent*(s: string): string
  1355. {.noSideEffect, rtl, extern: "nsuUnindentAll".} =
  1356. ## Removes all indentation composed of whitespace from each line in ``s``.
  1357. ##
  1358. ## See also:
  1359. ## * `align proc<#align,string,Natural,char>`_
  1360. ## * `alignLeft proc<#alignLeft,string,Natural,char>`_
  1361. ## * `spaces proc<#spaces,Natural>`_
  1362. ## * `indent proc<#indent,string,Natural,string>`_
  1363. runnableExamples:
  1364. let x = """
  1365. Hello
  1366. There
  1367. """.unindent()
  1368. doAssert x == "Hello\nThere\n"
  1369. unindent(s, 1000) # TODO: Passing a 1000 is a bit hackish.
  1370. proc delete*(s: var string, first, last: int) {.noSideEffect,
  1371. rtl, extern: "nsuDelete".} =
  1372. ## Deletes in `s` (must be declared as ``var``) the characters at positions
  1373. ## ``first ..last`` (both ends included).
  1374. ##
  1375. ## This modifies `s` itself, it does not return a copy.
  1376. runnableExamples:
  1377. var a = "abracadabra"
  1378. a.delete(4, 5)
  1379. doAssert a == "abradabra"
  1380. a.delete(1, 6)
  1381. doAssert a == "ara"
  1382. a.delete(2, 999)
  1383. doAssert a == "ar"
  1384. var i = first
  1385. var j = min(len(s), last+1)
  1386. var newLen = len(s)-j+i
  1387. while i < newLen:
  1388. s[i] = s[j]
  1389. inc(i)
  1390. inc(j)
  1391. setLen(s, newLen)
  1392. proc startsWith*(s: string, prefix: char): bool {.noSideEffect, inline.} =
  1393. ## Returns true if ``s`` starts with character ``prefix``.
  1394. ##
  1395. ## See also:
  1396. ## * `endsWith proc<#endsWith,string,char>`_
  1397. ## * `continuesWith proc<#continuesWith,string,string,Natural>`_
  1398. ## * `removePrefix proc<#removePrefix,string,char>`_
  1399. runnableExamples:
  1400. let a = "abracadabra"
  1401. doAssert a.startsWith('a') == true
  1402. doAssert a.startsWith('b') == false
  1403. result = s.len > 0 and s[0] == prefix
  1404. proc startsWith*(s, prefix: string): bool {.noSideEffect,
  1405. rtl, extern: "nsuStartsWith".} =
  1406. ## Returns true if ``s`` starts with string ``prefix``.
  1407. ##
  1408. ## If ``prefix == ""`` true is returned.
  1409. ##
  1410. ## See also:
  1411. ## * `endsWith proc<#endsWith,string,string>`_
  1412. ## * `continuesWith proc<#continuesWith,string,string,Natural>`_
  1413. ## * `removePrefix proc<#removePrefix,string,string>`_
  1414. runnableExamples:
  1415. let a = "abracadabra"
  1416. doAssert a.startsWith("abra") == true
  1417. doAssert a.startsWith("bra") == false
  1418. var i = 0
  1419. while true:
  1420. if i >= prefix.len: return true
  1421. if i >= s.len or s[i] != prefix[i]: return false
  1422. inc(i)
  1423. proc endsWith*(s: string, suffix: char): bool {.noSideEffect, inline.} =
  1424. ## Returns true if ``s`` ends with ``suffix``.
  1425. ##
  1426. ## See also:
  1427. ## * `startsWith proc<#startsWith,string,char>`_
  1428. ## * `continuesWith proc<#continuesWith,string,string,Natural>`_
  1429. ## * `removeSuffix proc<#removeSuffix,string,char>`_
  1430. runnableExamples:
  1431. let a = "abracadabra"
  1432. doAssert a.endsWith('a') == true
  1433. doAssert a.endsWith('b') == false
  1434. result = s.len > 0 and s[s.high] == suffix
  1435. proc endsWith*(s, suffix: string): bool {.noSideEffect,
  1436. rtl, extern: "nsuEndsWith".} =
  1437. ## Returns true if ``s`` ends with ``suffix``.
  1438. ##
  1439. ## If ``suffix == ""`` true is returned.
  1440. ##
  1441. ## See also:
  1442. ## * `startsWith proc<#startsWith,string,string>`_
  1443. ## * `continuesWith proc<#continuesWith,string,string,Natural>`_
  1444. ## * `removeSuffix proc<#removeSuffix,string,string>`_
  1445. runnableExamples:
  1446. let a = "abracadabra"
  1447. doAssert a.endsWith("abra") == true
  1448. doAssert a.endsWith("dab") == false
  1449. var i = 0
  1450. var j = len(s) - len(suffix)
  1451. while i+j >= 0 and i+j < s.len:
  1452. if s[i+j] != suffix[i]: return false
  1453. inc(i)
  1454. if i >= suffix.len: return true
  1455. proc continuesWith*(s, substr: string, start: Natural): bool {.noSideEffect,
  1456. rtl, extern: "nsuContinuesWith".} =
  1457. ## Returns true if ``s`` continues with ``substr`` at position ``start``.
  1458. ##
  1459. ## If ``substr == ""`` true is returned.
  1460. ##
  1461. ## See also:
  1462. ## * `startsWith proc<#startsWith,string,string>`_
  1463. ## * `endsWith proc<#endsWith,string,string>`_
  1464. runnableExamples:
  1465. let a = "abracadabra"
  1466. doAssert a.continuesWith("ca", 4) == true
  1467. doAssert a.continuesWith("ca", 5) == false
  1468. doAssert a.continuesWith("dab", 6) == true
  1469. var i = 0
  1470. while true:
  1471. if i >= substr.len: return true
  1472. if i+start >= s.len or s[i+start] != substr[i]: return false
  1473. inc(i)
  1474. proc removePrefix*(s: var string, chars: set[char] = Newlines) {.
  1475. rtl, extern: "nsuRemovePrefixCharSet".} =
  1476. ## Removes all characters from `chars` from the start of the string `s`
  1477. ## (in-place).
  1478. ##
  1479. ## See also:
  1480. ## * `removeSuffix proc<#removeSuffix,string,set[char]>`_
  1481. runnableExamples:
  1482. var userInput = "\r\n*~Hello World!"
  1483. userInput.removePrefix
  1484. doAssert userInput == "*~Hello World!"
  1485. userInput.removePrefix({'~', '*'})
  1486. doAssert userInput == "Hello World!"
  1487. var otherInput = "?!?Hello!?!"
  1488. otherInput.removePrefix({'!', '?'})
  1489. doAssert otherInput == "Hello!?!"
  1490. var start = 0
  1491. while start < s.len and s[start] in chars: start += 1
  1492. if start > 0: s.delete(0, start - 1)
  1493. proc removePrefix*(s: var string, c: char) {.
  1494. rtl, extern: "nsuRemovePrefixChar".} =
  1495. ## Removes all occurrences of a single character (in-place) from the start
  1496. ## of a string.
  1497. ##
  1498. ## See also:
  1499. ## * `removeSuffix proc<#removeSuffix,string,char>`_
  1500. ## * `startsWith proc<#startsWith,string,char>`_
  1501. runnableExamples:
  1502. var ident = "pControl"
  1503. ident.removePrefix('p')
  1504. doAssert ident == "Control"
  1505. removePrefix(s, chars = {c})
  1506. proc removePrefix*(s: var string, prefix: string) {.
  1507. rtl, extern: "nsuRemovePrefixString".} =
  1508. ## Remove the first matching prefix (in-place) from a string.
  1509. ##
  1510. ## See also:
  1511. ## * `removeSuffix proc<#removeSuffix,string,string>`_
  1512. ## * `startsWith proc<#startsWith,string,string>`_
  1513. runnableExamples:
  1514. var answers = "yesyes"
  1515. answers.removePrefix("yes")
  1516. doAssert answers == "yes"
  1517. if s.startsWith(prefix):
  1518. s.delete(0, prefix.len - 1)
  1519. proc removeSuffix*(s: var string, chars: set[char] = Newlines) {.
  1520. rtl, extern: "nsuRemoveSuffixCharSet".} =
  1521. ## Removes all characters from `chars` from the end of the string `s`
  1522. ## (in-place).
  1523. ##
  1524. ## See also:
  1525. ## * `removePrefix proc<#removePrefix,string,set[char]>`_
  1526. runnableExamples:
  1527. var userInput = "Hello World!*~\r\n"
  1528. userInput.removeSuffix
  1529. doAssert userInput == "Hello World!*~"
  1530. userInput.removeSuffix({'~', '*'})
  1531. doAssert userInput == "Hello World!"
  1532. var otherInput = "Hello!?!"
  1533. otherInput.removeSuffix({'!', '?'})
  1534. doAssert otherInput == "Hello"
  1535. if s.len == 0: return
  1536. var last = s.high
  1537. while last > -1 and s[last] in chars: last -= 1
  1538. s.setLen(last + 1)
  1539. proc removeSuffix*(s: var string, c: char) {.
  1540. rtl, extern: "nsuRemoveSuffixChar".} =
  1541. ## Removes all occurrences of a single character (in-place) from the end
  1542. ## of a string.
  1543. ##
  1544. ## See also:
  1545. ## * `removePrefix proc<#removePrefix,string,char>`_
  1546. ## * `endsWith proc<#endsWith,string,char>`_
  1547. runnableExamples:
  1548. var table = "users"
  1549. table.removeSuffix('s')
  1550. doAssert table == "user"
  1551. var dots = "Trailing dots......."
  1552. dots.removeSuffix('.')
  1553. doAssert dots == "Trailing dots"
  1554. removeSuffix(s, chars = {c})
  1555. proc removeSuffix*(s: var string, suffix: string) {.
  1556. rtl, extern: "nsuRemoveSuffixString".} =
  1557. ## Remove the first matching suffix (in-place) from a string.
  1558. ##
  1559. ## See also:
  1560. ## * `removePrefix proc<#removePrefix,string,string>`_
  1561. ## * `endsWith proc<#endsWith,string,string>`_
  1562. runnableExamples:
  1563. var answers = "yeses"
  1564. answers.removeSuffix("es")
  1565. doAssert answers == "yes"
  1566. var newLen = s.len
  1567. if s.endsWith(suffix):
  1568. newLen -= len(suffix)
  1569. s.setLen(newLen)
  1570. proc addSep*(dest: var string, sep = ", ", startLen: Natural = 0)
  1571. {.noSideEffect, inline.} =
  1572. ## Adds a separator to `dest` only if its length is bigger than `startLen`.
  1573. ##
  1574. ## A shorthand for:
  1575. ##
  1576. ## .. code-block:: nim
  1577. ## if dest.len > startLen: add(dest, sep)
  1578. ##
  1579. ## This is often useful for generating some code where the items need to
  1580. ## be *separated* by `sep`. `sep` is only added if `dest` is longer than
  1581. ## `startLen`. The following example creates a string describing
  1582. ## an array of integers.
  1583. runnableExamples:
  1584. var arr = "["
  1585. for x in items([2, 3, 5, 7, 11]):
  1586. addSep(arr, startLen = len("["))
  1587. add(arr, $x)
  1588. add(arr, "]")
  1589. doAssert arr == "[2, 3, 5, 7, 11]"
  1590. if dest.len > startLen: add(dest, sep)
  1591. proc allCharsInSet*(s: string, theSet: set[char]): bool =
  1592. ## Returns true if every character of `s` is in the set `theSet`.
  1593. runnableExamples:
  1594. doAssert allCharsInSet("aeea", {'a', 'e'}) == true
  1595. doAssert allCharsInSet("", {'a', 'e'}) == true
  1596. for c in items(s):
  1597. if c notin theSet: return false
  1598. return true
  1599. proc abbrev*(s: string, possibilities: openArray[string]): int =
  1600. ## Returns the index of the first item in ``possibilities`` which starts
  1601. ## with ``s``, if not ambiguous.
  1602. ##
  1603. ## Returns -1 if no item has been found and -2 if multiple items match.
  1604. runnableExamples:
  1605. doAssert abbrev("fac", ["college", "faculty", "industry"]) == 1
  1606. doAssert abbrev("foo", ["college", "faculty", "industry"]) == -1 # Not found
  1607. doAssert abbrev("fac", ["college", "faculty", "faculties"]) == -2 # Ambiguous
  1608. doAssert abbrev("college", ["college", "colleges", "industry"]) == 0
  1609. result = -1 # none found
  1610. for i in 0..possibilities.len-1:
  1611. if possibilities[i].startsWith(s):
  1612. if possibilities[i] == s:
  1613. # special case: exact match shouldn't be ambiguous
  1614. return i
  1615. if result >= 0: return -2 # ambiguous
  1616. result = i
  1617. # ---------------------------------------------------------------------------
  1618. proc join*(a: openArray[string], sep: string = ""): string {.
  1619. noSideEffect, rtl, extern: "nsuJoinSep".} =
  1620. ## Concatenates all strings in the container `a`, separating them with `sep`.
  1621. runnableExamples:
  1622. doAssert join(["A", "B", "Conclusion"], " -> ") == "A -> B -> Conclusion"
  1623. if len(a) > 0:
  1624. var L = sep.len * (a.len-1)
  1625. for i in 0..high(a): inc(L, a[i].len)
  1626. result = newStringOfCap(L)
  1627. add(result, a[0])
  1628. for i in 1..high(a):
  1629. add(result, sep)
  1630. add(result, a[i])
  1631. else:
  1632. result = ""
  1633. proc join*[T: not string](a: openArray[T], sep: string = ""): string {.
  1634. noSideEffect, rtl.} =
  1635. ## Converts all elements in the container `a` to strings using `$`,
  1636. ## and concatenates them with `sep`.
  1637. runnableExamples:
  1638. doAssert join([1, 2, 3], " -> ") == "1 -> 2 -> 3"
  1639. result = ""
  1640. for i, x in a:
  1641. if i > 0:
  1642. add(result, sep)
  1643. add(result, $x)
  1644. type
  1645. SkipTable* = array[char, int]
  1646. proc initSkipTable*(a: var SkipTable, sub: string)
  1647. {.noSideEffect, rtl, extern: "nsuInitSkipTable".} =
  1648. ## Preprocess table `a` for `sub`.
  1649. let m = len(sub)
  1650. var i = 0
  1651. while i <= 0xff-7:
  1652. a[chr(i + 0)] = m
  1653. a[chr(i + 1)] = m
  1654. a[chr(i + 2)] = m
  1655. a[chr(i + 3)] = m
  1656. a[chr(i + 4)] = m
  1657. a[chr(i + 5)] = m
  1658. a[chr(i + 6)] = m
  1659. a[chr(i + 7)] = m
  1660. i += 8
  1661. for i in 0 ..< m - 1:
  1662. a[sub[i]] = m - 1 - i
  1663. proc find*(a: SkipTable, s, sub: string, start: Natural = 0, last = 0): int
  1664. {.noSideEffect, rtl, extern: "nsuFindStrA".} =
  1665. ## Searches for `sub` in `s` inside range `start`..`last` using preprocessed
  1666. ## table `a`. If `last` is unspecified, it defaults to `s.high` (the last
  1667. ## element).
  1668. ##
  1669. ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
  1670. let
  1671. last = if last == 0: s.high else: last
  1672. subLast = sub.len - 1
  1673. if subLast == -1:
  1674. # this was an empty needle string,
  1675. # we count this as match in the first possible position:
  1676. return start
  1677. # This is an implementation of the Boyer-Moore Horspool algorithms
  1678. # https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm
  1679. var skip = start
  1680. while last - skip >= subLast:
  1681. var i = subLast
  1682. while s[skip + i] == sub[i]:
  1683. if i == 0:
  1684. return skip
  1685. dec i
  1686. inc skip, a[s[skip + subLast]]
  1687. return -1
  1688. when not (defined(js) or defined(nimdoc) or defined(nimscript)):
  1689. proc c_memchr(cstr: pointer, c: char, n: csize): pointer {.
  1690. importc: "memchr", header: "<string.h>".}
  1691. const hasCStringBuiltin = true
  1692. else:
  1693. const hasCStringBuiltin = false
  1694. proc find*(s: string, sub: char, start: Natural = 0, last = 0): int {.noSideEffect,
  1695. rtl, extern: "nsuFindChar".} =
  1696. ## Searches for `sub` in `s` inside range ``start..last`` (both ends included).
  1697. ## If `last` is unspecified, it defaults to `s.high` (the last element).
  1698. ##
  1699. ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
  1700. ## Otherwise the index returned is relative to ``s[0]``, not ``start``.
  1701. ## Use `s[start..last].rfind` for a ``start``-origin index.
  1702. ##
  1703. ## See also:
  1704. ## * `rfind proc<#rfind,string,char,Natural,int>`_
  1705. ## * `replace proc<#replace,string,char,char>`_
  1706. let last = if last == 0: s.high else: last
  1707. when nimvm:
  1708. for i in int(start)..last:
  1709. if sub == s[i]: return i
  1710. else:
  1711. when hasCStringBuiltin:
  1712. let L = last-start+1
  1713. if L > 0:
  1714. let found = c_memchr(s[start].unsafeAddr, sub, L)
  1715. if not found.isNil:
  1716. return cast[ByteAddress](found) -% cast[ByteAddress](s.cstring)
  1717. else:
  1718. for i in int(start)..last:
  1719. if sub == s[i]: return i
  1720. return -1
  1721. proc find*(s: string, chars: set[char], start: Natural = 0, last = 0): int {.noSideEffect,
  1722. rtl, extern: "nsuFindCharSet".} =
  1723. ## Searches for `chars` in `s` inside range ``start..last`` (both ends included).
  1724. ## If `last` is unspecified, it defaults to `s.high` (the last element).
  1725. ##
  1726. ## If `s` contains none of the characters in `chars`, -1 is returned.
  1727. ## Otherwise the index returned is relative to ``s[0]``, not ``start``.
  1728. ## Use `s[start..last].find` for a ``start``-origin index.
  1729. ##
  1730. ## See also:
  1731. ## * `rfind proc<#rfind,string,set[char],Natural,int>`_
  1732. ## * `multiReplace proc<#multiReplace,string,varargs[]>`_
  1733. let last = if last == 0: s.high else: last
  1734. for i in int(start)..last:
  1735. if s[i] in chars: return i
  1736. return -1
  1737. proc find*(s, sub: string, start: Natural = 0, last = 0): int {.noSideEffect,
  1738. rtl, extern: "nsuFindStr".} =
  1739. ## Searches for `sub` in `s` inside range ``start..last`` (both ends included).
  1740. ## If `last` is unspecified, it defaults to `s.high` (the last element).
  1741. ##
  1742. ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
  1743. ## Otherwise the index returned is relative to ``s[0]``, not ``start``.
  1744. ## Use `s[start..last].find` for a ``start``-origin index.
  1745. ##
  1746. ## See also:
  1747. ## * `rfind proc<#rfind,string,string,Natural,int>`_
  1748. ## * `replace proc<#replace,string,string,string>`_
  1749. if sub.len > s.len: return -1
  1750. if sub.len == 1: return find(s, sub[0], start, last)
  1751. var a {.noinit.}: SkipTable
  1752. initSkipTable(a, sub)
  1753. result = find(a, s, sub, start, last)
  1754. proc rfind*(s: string, sub: char, start: Natural = 0, last = -1): int {.noSideEffect,
  1755. rtl, extern: "nsuRFindChar".} =
  1756. ## Searches for `sub` in `s` inside range ``start..last`` (both ends included)
  1757. ## in reverse -- starting at high indexes and moving lower to the first
  1758. ## character or ``start``. If `last` is unspecified, it defaults to `s.high`
  1759. ## (the last element).
  1760. ##
  1761. ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
  1762. ## Otherwise the index returned is relative to ``s[0]``, not ``start``.
  1763. ## Use `s[start..last].find` for a ``start``-origin index.
  1764. ##
  1765. ## See also:
  1766. ## * `find proc<#find,string,char,Natural,int>`_
  1767. let last = if last == -1: s.high else: last
  1768. for i in countdown(last, start):
  1769. if sub == s[i]: return i
  1770. return -1
  1771. proc rfind*(s: string, chars: set[char], start: Natural = 0, last = -1): int {.noSideEffect,
  1772. rtl, extern: "nsuRFindCharSet".} =
  1773. ## Searches for `chars` in `s` inside range ``start..last`` (both ends
  1774. ## included) in reverse -- starting at high indexes and moving lower to the
  1775. ## first character or ``start``. If `last` is unspecified, it defaults to
  1776. ## `s.high` (the last element).
  1777. ##
  1778. ## If `s` contains none of the characters in `chars`, -1 is returned.
  1779. ## Otherwise the index returned is relative to ``s[0]``, not ``start``.
  1780. ## Use `s[start..last].rfind` for a ``start``-origin index.
  1781. ##
  1782. ## See also:
  1783. ## * `find proc<#find,string,set[char],Natural,int>`_
  1784. let last = if last == -1: s.high else: last
  1785. for i in countdown(last, start):
  1786. if s[i] in chars: return i
  1787. return -1
  1788. proc rfind*(s, sub: string, start: Natural = 0, last = -1): int {.noSideEffect,
  1789. rtl, extern: "nsuRFindStr".} =
  1790. ## Searches for `sub` in `s` inside range ``start..last`` (both ends included)
  1791. ## included) in reverse -- starting at high indexes and moving lower to the
  1792. ## first character or ``start``. If `last` is unspecified, it defaults to
  1793. ## `s.high` (the last element).
  1794. ##
  1795. ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
  1796. ## Otherwise the index returned is relative to ``s[0]``, not ``start``.
  1797. ## Use `s[start..last].rfind` for a ``start``-origin index.
  1798. ##
  1799. ## See also:
  1800. ## * `find proc<#find,string,string,Natural,int>`_
  1801. if sub.len == 0:
  1802. return -1
  1803. let last = if last == -1: s.high else: last
  1804. for i in countdown(last - sub.len + 1, start):
  1805. for j in 0..sub.len-1:
  1806. result = i
  1807. if sub[j] != s[i+j]:
  1808. result = -1
  1809. break
  1810. if result != -1: return
  1811. return -1
  1812. proc count*(s: string, sub: char): int {.noSideEffect,
  1813. rtl, extern: "nsuCountChar".} =
  1814. ## Count the occurrences of the character `sub` in the string `s`.
  1815. ##
  1816. ## See also:
  1817. ## * `countLines proc<#countLines,string>`_
  1818. for c in s:
  1819. if c == sub: inc result
  1820. proc count*(s: string, subs: set[char]): int {.noSideEffect,
  1821. rtl, extern: "nsuCountCharSet".} =
  1822. ## Count the occurrences of the group of character `subs` in the string `s`.
  1823. ##
  1824. ## See also:
  1825. ## * `countLines proc<#countLines,string>`_
  1826. doAssert card(subs) > 0
  1827. for c in s:
  1828. if c in subs: inc result
  1829. proc count*(s: string, sub: string, overlapping: bool = false): int {.
  1830. noSideEffect, rtl, extern: "nsuCountString".} =
  1831. ## Count the occurrences of a substring `sub` in the string `s`.
  1832. ## Overlapping occurrences of `sub` only count when `overlapping`
  1833. ## is set to true (default: false).
  1834. ##
  1835. ## See also:
  1836. ## * `countLines proc<#countLines,string>`_
  1837. doAssert sub.len > 0
  1838. var i = 0
  1839. while true:
  1840. i = s.find(sub, i)
  1841. if i < 0: break
  1842. if overlapping: inc i
  1843. else: i += sub.len
  1844. inc result
  1845. proc countLines*(s: string): int {.noSideEffect,
  1846. rtl, extern: "nsuCountLines".} =
  1847. ## Returns the number of lines in the string `s`.
  1848. ##
  1849. ## This is the same as ``len(splitLines(s))``, but much more efficient
  1850. ## because it doesn't modify the string creating temporal objects. Every
  1851. ## `character literal <manual.html#lexical-analysis-character-literals>`_
  1852. ## newline combination (CR, LF, CR-LF) is supported.
  1853. ##
  1854. ## In this context, a line is any string separated by a newline combination.
  1855. ## A line can be an empty string.
  1856. ##
  1857. ## See also:
  1858. ## * `splitLines proc<#splitLines,string>`_
  1859. runnableExamples:
  1860. doAssert countLines("First line\l and second line.") == 2
  1861. result = 1
  1862. var i = 0
  1863. while i < s.len:
  1864. case s[i]
  1865. of '\c':
  1866. if i+1 < s.len and s[i+1] == '\l': inc i
  1867. inc result
  1868. of '\l': inc result
  1869. else: discard
  1870. inc i
  1871. proc contains*(s, sub: string): bool {.noSideEffect.} =
  1872. ## Same as ``find(s, sub) >= 0``.
  1873. ##
  1874. ## See also:
  1875. ## * `find proc<#find,string,string,Natural,int>`_
  1876. return find(s, sub) >= 0
  1877. proc contains*(s: string, chars: set[char]): bool {.noSideEffect.} =
  1878. ## Same as ``find(s, chars) >= 0``.
  1879. ##
  1880. ## See also:
  1881. ## * `find proc<#find,string,set[char],Natural,int>`_
  1882. return find(s, chars) >= 0
  1883. proc replace*(s, sub: string, by = ""): string {.noSideEffect,
  1884. rtl, extern: "nsuReplaceStr".} =
  1885. ## Replaces `sub` in `s` by the string `by`.
  1886. ##
  1887. ## See also:
  1888. ## * `find proc<#find,string,string,Natural,int>`_
  1889. ## * `replace proc<#replace,string,char,char>`_ for replacing
  1890. ## single characters
  1891. ## * `replaceWord proc<#replaceWord,string,string,string>`_
  1892. ## * `multiReplace proc<#multiReplace,string,varargs[]>`_
  1893. result = ""
  1894. let subLen = sub.len
  1895. if subLen == 0:
  1896. result = s
  1897. elif subLen == 1:
  1898. # when the pattern is a single char, we use a faster
  1899. # char-based search that doesn't need a skip table:
  1900. let c = sub[0]
  1901. let last = s.high
  1902. var i = 0
  1903. while true:
  1904. let j = find(s, c, i, last)
  1905. if j < 0: break
  1906. add result, substr(s, i, j - 1)
  1907. add result, by
  1908. i = j + subLen
  1909. # copy the rest:
  1910. add result, substr(s, i)
  1911. else:
  1912. var a {.noinit.}: SkipTable
  1913. initSkipTable(a, sub)
  1914. let last = s.high
  1915. var i = 0
  1916. while true:
  1917. let j = find(a, s, sub, i, last)
  1918. if j < 0: break
  1919. add result, substr(s, i, j - 1)
  1920. add result, by
  1921. i = j + subLen
  1922. # copy the rest:
  1923. add result, substr(s, i)
  1924. proc replace*(s: string, sub, by: char): string {.noSideEffect,
  1925. rtl, extern: "nsuReplaceChar".} =
  1926. ## Replaces `sub` in `s` by the character `by`.
  1927. ##
  1928. ## Optimized version of `replace <#replace,string,string,string>`_ for
  1929. ## characters.
  1930. ##
  1931. ## See also:
  1932. ## * `find proc<#find,string,char,Natural,int>`_
  1933. ## * `replaceWord proc<#replaceWord,string,string,string>`_
  1934. ## * `multiReplace proc<#multiReplace,string,varargs[]>`_
  1935. result = newString(s.len)
  1936. var i = 0
  1937. while i < s.len:
  1938. if s[i] == sub: result[i] = by
  1939. else: result[i] = s[i]
  1940. inc(i)
  1941. proc replaceWord*(s, sub: string, by = ""): string {.noSideEffect,
  1942. rtl, extern: "nsuReplaceWord".} =
  1943. ## Replaces `sub` in `s` by the string `by`.
  1944. ##
  1945. ## Each occurrence of `sub` has to be surrounded by word boundaries
  1946. ## (comparable to ``\b`` in regular expressions), otherwise it is not
  1947. ## replaced.
  1948. if sub.len == 0: return s
  1949. const wordChars = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\128'..'\255'}
  1950. var a {.noinit.}: SkipTable
  1951. result = ""
  1952. initSkipTable(a, sub)
  1953. var i = 0
  1954. let last = s.high
  1955. let sublen = sub.len
  1956. if sublen > 0:
  1957. while true:
  1958. var j = find(a, s, sub, i, last)
  1959. if j < 0: break
  1960. # word boundary?
  1961. if (j == 0 or s[j-1] notin wordChars) and
  1962. (j+sub.len >= s.len or s[j+sub.len] notin wordChars):
  1963. add result, substr(s, i, j - 1)
  1964. add result, by
  1965. i = j + sublen
  1966. else:
  1967. add result, substr(s, i, j)
  1968. i = j + 1
  1969. # copy the rest:
  1970. add result, substr(s, i)
  1971. proc multiReplace*(s: string, replacements: varargs[(string, string)]):
  1972. string {.noSideEffect.} =
  1973. ## Same as replace, but specialized for doing multiple replacements in a single
  1974. ## pass through the input string.
  1975. ##
  1976. ## `multiReplace` performs all replacements in a single pass, this means it
  1977. ## can be used to swap the occurrences of "a" and "b", for instance.
  1978. ##
  1979. ## If the resulting string is not longer than the original input string,
  1980. ## only a single memory allocation is required.
  1981. ##
  1982. ## The order of the replacements does matter. Earlier replacements are
  1983. ## preferred over later replacements in the argument list.
  1984. result = newStringOfCap(s.len)
  1985. var i = 0
  1986. var fastChk: set[char] = {}
  1987. for sub, by in replacements.items:
  1988. if sub.len > 0:
  1989. # Include first character of all replacements
  1990. fastChk.incl sub[0]
  1991. while i < s.len:
  1992. block sIteration:
  1993. # Assume most chars in s are not candidates for any replacement operation
  1994. if s[i] in fastChk:
  1995. for sub, by in replacements.items:
  1996. if sub.len > 0 and s.continuesWith(sub, i):
  1997. add result, by
  1998. inc(i, sub.len)
  1999. break sIteration
  2000. # No matching replacement found
  2001. # copy current character from s
  2002. add result, s[i]
  2003. inc(i)
  2004. proc insertSep*(s: string, sep = '_', digits = 3): string {.noSideEffect,
  2005. rtl, extern: "nsuInsertSep".} =
  2006. ## Inserts the separator `sep` after `digits` characters (default: 3)
  2007. ## from right to left.
  2008. ##
  2009. ## Even though the algorithm works with any string `s`, it is only useful
  2010. ## if `s` contains a number.
  2011. runnableExamples:
  2012. doAssert insertSep("1000000") == "1_000_000"
  2013. var L = (s.len-1) div digits + s.len
  2014. result = newString(L)
  2015. var j = 0
  2016. dec(L)
  2017. for i in countdown(len(s)-1, 0):
  2018. if j == digits:
  2019. result[L] = sep
  2020. dec(L)
  2021. j = 0
  2022. result[L] = s[i]
  2023. inc(j)
  2024. dec(L)
  2025. proc escape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect,
  2026. rtl, extern: "nsuEscape".} =
  2027. ## Escapes a string `s`. See `system.addEscapedChar
  2028. ## <system.html#addEscapedChar,string,char>`_ for the escaping scheme.
  2029. ##
  2030. ## The resulting string is prefixed with `prefix` and suffixed with `suffix`.
  2031. ## Both may be empty strings.
  2032. ##
  2033. ## See also:
  2034. ## * `unescape proc<#unescape,string,string,string>`_ for the opposite
  2035. ## operation
  2036. result = newStringOfCap(s.len + s.len shr 2)
  2037. result.add(prefix)
  2038. for c in items(s):
  2039. case c
  2040. of '\0'..'\31', '\127'..'\255':
  2041. add(result, "\\x")
  2042. add(result, toHex(ord(c), 2))
  2043. of '\\': add(result, "\\\\")
  2044. of '\'': add(result, "\\'")
  2045. of '\"': add(result, "\\\"")
  2046. else: add(result, c)
  2047. add(result, suffix)
  2048. proc unescape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect,
  2049. rtl, extern: "nsuUnescape".} =
  2050. ## Unescapes a string `s`.
  2051. ##
  2052. ## This complements `escape proc<#escape,string,string,string>`_
  2053. ## as it performs the opposite operations.
  2054. ##
  2055. ## If `s` does not begin with ``prefix`` and end with ``suffix`` a
  2056. ## ValueError exception will be raised.
  2057. result = newStringOfCap(s.len)
  2058. var i = prefix.len
  2059. if not s.startsWith(prefix):
  2060. raise newException(ValueError,
  2061. "String does not start with: " & prefix)
  2062. while true:
  2063. if i >= s.len-suffix.len: break
  2064. if s[i] == '\\':
  2065. if i+1 >= s.len:
  2066. result.add('\\')
  2067. break
  2068. case s[i+1]:
  2069. of 'x':
  2070. inc i, 2
  2071. var c: int
  2072. i += parseutils.parseHex(s, c, i, maxLen = 2)
  2073. result.add(chr(c))
  2074. dec i, 2
  2075. of '\\':
  2076. result.add('\\')
  2077. of '\'':
  2078. result.add('\'')
  2079. of '\"':
  2080. result.add('\"')
  2081. else:
  2082. result.add("\\" & s[i+1])
  2083. inc(i, 2)
  2084. else:
  2085. result.add(s[i])
  2086. inc(i)
  2087. if not s.endsWith(suffix):
  2088. raise newException(ValueError,
  2089. "String does not end in: " & suffix)
  2090. proc validIdentifier*(s: string): bool {.noSideEffect,
  2091. rtl, extern: "nsuValidIdentifier".} =
  2092. ## Returns true if `s` is a valid identifier.
  2093. ##
  2094. ## A valid identifier starts with a character of the set `IdentStartChars`
  2095. ## and is followed by any number of characters of the set `IdentChars`.
  2096. runnableExamples:
  2097. doAssert "abc_def08".validIdentifier
  2098. if s.len > 0 and s[0] in IdentStartChars:
  2099. for i in 1..s.len-1:
  2100. if s[i] notin IdentChars: return false
  2101. return true
  2102. # floating point formatting:
  2103. when not defined(js):
  2104. proc c_sprintf(buf, frmt: cstring): cint {.header: "<stdio.h>",
  2105. importc: "sprintf", varargs, noSideEffect.}
  2106. type
  2107. FloatFormatMode* = enum
  2108. ## the different modes of floating point formatting
  2109. ffDefault, ## use the shorter floating point notation
  2110. ffDecimal, ## use decimal floating point notation
  2111. ffScientific ## use scientific notation (using ``e`` character)
  2112. proc formatBiggestFloat*(f: BiggestFloat, format: FloatFormatMode = ffDefault,
  2113. precision: range[-1..32] = 16;
  2114. decimalSep = '.'): string {.
  2115. noSideEffect, rtl, extern: "nsu$1".} =
  2116. ## Converts a floating point value `f` to a string.
  2117. ##
  2118. ## If ``format == ffDecimal`` then precision is the number of digits to
  2119. ## be printed after the decimal point.
  2120. ## If ``format == ffScientific`` then precision is the maximum number
  2121. ## of significant digits to be printed.
  2122. ## `precision`'s default value is the maximum number of meaningful digits
  2123. ## after the decimal point for Nim's ``biggestFloat`` type.
  2124. ##
  2125. ## If ``precision == -1``, it tries to format it nicely.
  2126. runnableExamples:
  2127. let x = 123.456
  2128. doAssert x.formatBiggestFloat() == "123.4560000000000"
  2129. doAssert x.formatBiggestFloat(ffDecimal, 4) == "123.4560"
  2130. doAssert x.formatBiggestFloat(ffScientific, 2) == "1.23e+02"
  2131. when defined(js):
  2132. var precision = precision
  2133. if precision == -1:
  2134. # use the same default precision as c_sprintf
  2135. precision = 6
  2136. var res: cstring
  2137. case format
  2138. of ffDefault:
  2139. {.emit: "`res` = `f`.toString();".}
  2140. of ffDecimal:
  2141. {.emit: "`res` = `f`.toFixed(`precision`);".}
  2142. of ffScientific:
  2143. {.emit: "`res` = `f`.toExponential(`precision`);".}
  2144. result = $res
  2145. if 1.0 / f == -Inf:
  2146. # JavaScript removes the "-" from negative Zero, add it back here
  2147. result = "-" & $res
  2148. for i in 0 ..< result.len:
  2149. # Depending on the locale either dot or comma is produced,
  2150. # but nothing else is possible:
  2151. if result[i] in {'.', ','}: result[i] = decimalsep
  2152. else:
  2153. const floatFormatToChar: array[FloatFormatMode, char] = ['g', 'f', 'e']
  2154. var
  2155. frmtstr {.noinit.}: array[0..5, char]
  2156. buf {.noinit.}: array[0..2500, char]
  2157. L: cint
  2158. frmtstr[0] = '%'
  2159. if precision >= 0:
  2160. frmtstr[1] = '#'
  2161. frmtstr[2] = '.'
  2162. frmtstr[3] = '*'
  2163. frmtstr[4] = floatFormatToChar[format]
  2164. frmtstr[5] = '\0'
  2165. when defined(nimNoArrayToCstringConversion):
  2166. L = c_sprintf(addr buf, addr frmtstr, precision, f)
  2167. else:
  2168. L = c_sprintf(buf, frmtstr, precision, f)
  2169. else:
  2170. frmtstr[1] = floatFormatToChar[format]
  2171. frmtstr[2] = '\0'
  2172. when defined(nimNoArrayToCstringConversion):
  2173. L = c_sprintf(addr buf, addr frmtstr, f)
  2174. else:
  2175. L = c_sprintf(buf, frmtstr, f)
  2176. result = newString(L)
  2177. for i in 0 ..< L:
  2178. # Depending on the locale either dot or comma is produced,
  2179. # but nothing else is possible:
  2180. if buf[i] in {'.', ','}: result[i] = decimalSep
  2181. else: result[i] = buf[i]
  2182. when defined(windows):
  2183. # VS pre 2015 violates the C standard: "The exponent always contains at
  2184. # least two digits, and only as many more digits as necessary to
  2185. # represent the exponent." [C11 §7.21.6.1]
  2186. # The following post-processing fixes this behavior.
  2187. if result.len > 4 and result[^4] == '+' and result[^3] == '0':
  2188. result[^3] = result[^2]
  2189. result[^2] = result[^1]
  2190. result.setLen(result.len - 1)
  2191. proc formatFloat*(f: float, format: FloatFormatMode = ffDefault,
  2192. precision: range[-1..32] = 16; decimalSep = '.'): string {.
  2193. noSideEffect, rtl, extern: "nsu$1".} =
  2194. ## Converts a floating point value `f` to a string.
  2195. ##
  2196. ## If ``format == ffDecimal`` then precision is the number of digits to
  2197. ## be printed after the decimal point.
  2198. ## If ``format == ffScientific`` then precision is the maximum number
  2199. ## of significant digits to be printed.
  2200. ## `precision`'s default value is the maximum number of meaningful digits
  2201. ## after the decimal point for Nim's ``float`` type.
  2202. ##
  2203. ## If ``precision == -1``, it tries to format it nicely.
  2204. runnableExamples:
  2205. let x = 123.456
  2206. doAssert x.formatFloat() == "123.4560000000000"
  2207. doAssert x.formatFloat(ffDecimal, 4) == "123.4560"
  2208. doAssert x.formatFloat(ffScientific, 2) == "1.23e+02"
  2209. result = formatBiggestFloat(f, format, precision, decimalSep)
  2210. proc trimZeros*(x: var string) {.noSideEffect.} =
  2211. ## Trim trailing zeros from a formatted floating point
  2212. ## value `x` (must be declared as ``var``).
  2213. ##
  2214. ## This modifies `x` itself, it does not return a copy.
  2215. runnableExamples:
  2216. var x = "123.456000000"
  2217. x.trimZeros()
  2218. doAssert x == "123.456"
  2219. var spl: seq[string]
  2220. if x.contains('.') or x.contains(','):
  2221. if x.contains('e'):
  2222. spl = x.split('e')
  2223. x = spl[0]
  2224. while x[x.high] == '0':
  2225. x.setLen(x.len-1)
  2226. if x[x.high] in [',', '.']:
  2227. x.setLen(x.len-1)
  2228. if spl.len > 0:
  2229. x &= "e" & spl[1]
  2230. type
  2231. BinaryPrefixMode* = enum ## the different names for binary prefixes
  2232. bpIEC, # use the IEC/ISO standard prefixes such as kibi
  2233. bpColloquial # use the colloquial kilo, mega etc
  2234. proc formatSize*(bytes: int64,
  2235. decimalSep = '.',
  2236. prefix = bpIEC,
  2237. includeSpace = false): string {.noSideEffect.} =
  2238. ## Rounds and formats `bytes`.
  2239. ##
  2240. ## By default, uses the IEC/ISO standard binary prefixes, so 1024 will be
  2241. ## formatted as 1KiB. Set prefix to `bpColloquial` to use the colloquial
  2242. ## names from the SI standard (e.g. k for 1000 being reused as 1024).
  2243. ##
  2244. ## `includeSpace` can be set to true to include the (SI preferred) space
  2245. ## between the number and the unit (e.g. 1 KiB).
  2246. ##
  2247. ## See also:
  2248. ## * `strformat module<strformat.html>`_ for string interpolation and formatting
  2249. runnableExamples:
  2250. doAssert formatSize((1'i64 shl 31) + (300'i64 shl 20)) == "2.293GiB"
  2251. doAssert formatSize((2.234*1024*1024).int) == "2.234MiB"
  2252. doAssert formatSize(4096, includeSpace = true) == "4 KiB"
  2253. doAssert formatSize(4096, prefix = bpColloquial, includeSpace = true) == "4 kB"
  2254. doAssert formatSize(4096) == "4KiB"
  2255. doAssert formatSize(5_378_934, prefix = bpColloquial, decimalSep = ',') == "5,13MB"
  2256. const iecPrefixes = ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"]
  2257. const collPrefixes = ["", "k", "M", "G", "T", "P", "E", "Z", "Y"]
  2258. var
  2259. xb: int64 = bytes
  2260. fbytes: float
  2261. lastXb: int64 = bytes
  2262. matchedIndex: int
  2263. prefixes: array[9, string]
  2264. if prefix == bpColloquial:
  2265. prefixes = collPrefixes
  2266. else:
  2267. prefixes = iecPrefixes
  2268. # Iterate through prefixes seeing if value will be greater than
  2269. # 0 in each case
  2270. for index in 1..<prefixes.len:
  2271. lastXb = xb
  2272. xb = bytes div (1'i64 shl (index*10))
  2273. matchedIndex = index
  2274. if xb == 0:
  2275. xb = lastXb
  2276. matchedIndex = index - 1
  2277. break
  2278. # xb has the integer number for the latest value; index should be correct
  2279. fbytes = bytes.float / (1'i64 shl (matchedIndex*10)).float
  2280. result = formatFloat(fbytes, format = ffDecimal, precision = 3,
  2281. decimalSep = decimalSep)
  2282. result.trimZeros()
  2283. if includeSpace:
  2284. result &= " "
  2285. result &= prefixes[matchedIndex]
  2286. result &= "B"
  2287. proc formatEng*(f: BiggestFloat,
  2288. precision: range[0..32] = 10,
  2289. trim: bool = true,
  2290. siPrefix: bool = false,
  2291. unit: string = "",
  2292. decimalSep = '.',
  2293. useUnitSpace = false): string {.noSideEffect.} =
  2294. ## Converts a floating point value `f` to a string using engineering notation.
  2295. ##
  2296. ## Numbers in of the range -1000.0<f<1000.0 will be formatted without an
  2297. ## exponent. Numbers outside of this range will be formatted as a
  2298. ## significand in the range -1000.0<f<1000.0 and an exponent that will always
  2299. ## be an integer multiple of 3, corresponding with the SI prefix scale k, M,
  2300. ## G, T etc for numbers with an absolute value greater than 1 and m, μ, n, p
  2301. ## etc for numbers with an absolute value less than 1.
  2302. ##
  2303. ## The default configuration (`trim=true` and `precision=10`) shows the
  2304. ## **shortest** form that precisely (up to a maximum of 10 decimal places)
  2305. ## displays the value. For example, 4.100000 will be displayed as 4.1 (which
  2306. ## is mathematically identical) whereas 4.1000003 will be displayed as
  2307. ## 4.1000003.
  2308. ##
  2309. ## If `trim` is set to true, trailing zeros will be removed; if false, the
  2310. ## number of digits specified by `precision` will always be shown.
  2311. ##
  2312. ## `precision` can be used to set the number of digits to be shown after the
  2313. ## decimal point or (if `trim` is true) the maximum number of digits to be
  2314. ## shown.
  2315. ##
  2316. ## .. code-block:: nim
  2317. ##
  2318. ## formatEng(0, 2, trim=false) == "0.00"
  2319. ## formatEng(0, 2) == "0"
  2320. ## formatEng(0.053, 0) == "53e-3"
  2321. ## formatEng(52731234, 2) == "52.73e6"
  2322. ## formatEng(-52731234, 2) == "-52.73e6"
  2323. ##
  2324. ## If `siPrefix` is set to true, the number will be displayed with the SI
  2325. ## prefix corresponding to the exponent. For example 4100 will be displayed
  2326. ## as "4.1 k" instead of "4.1e3". Note that `u` is used for micro- in place
  2327. ## of the greek letter mu (μ) as per ISO 2955. Numbers with an absolute
  2328. ## value outside of the range 1e-18<f<1000e18 (1a<f<1000E) will be displayed
  2329. ## with an exponent rather than an SI prefix, regardless of whether
  2330. ## `siPrefix` is true.
  2331. ##
  2332. ## If `useUnitSpace` is true, the provided unit will be appended to the string
  2333. ## (with a space as required by the SI standard). This behaviour is slightly
  2334. ## different to appending the unit to the result as the location of the space
  2335. ## is altered depending on whether there is an exponent.
  2336. ##
  2337. ## .. code-block:: nim
  2338. ##
  2339. ## formatEng(4100, siPrefix=true, unit="V") == "4.1 kV"
  2340. ## formatEng(4.1, siPrefix=true, unit="V") == "4.1 V"
  2341. ## formatEng(4.1, siPrefix=true) == "4.1" # Note lack of space
  2342. ## formatEng(4100, siPrefix=true) == "4.1 k"
  2343. ## formatEng(4.1, siPrefix=true, unit="") == "4.1 " # Space with unit=""
  2344. ## formatEng(4100, siPrefix=true, unit="") == "4.1 k"
  2345. ## formatEng(4100) == "4.1e3"
  2346. ## formatEng(4100, unit="V") == "4.1e3 V"
  2347. ## formatEng(4100, unit="", useUnitSpace=true) == "4.1e3 " # Space with useUnitSpace=true
  2348. ##
  2349. ## `decimalSep` is used as the decimal separator.
  2350. ##
  2351. ## See also:
  2352. ## * `strformat module<strformat.html>`_ for string interpolation and formatting
  2353. var
  2354. absolute: BiggestFloat
  2355. significand: BiggestFloat
  2356. fexponent: BiggestFloat
  2357. exponent: int
  2358. splitResult: seq[string]
  2359. suffix: string = ""
  2360. proc getPrefix(exp: int): char =
  2361. ## Get the SI prefix for a given exponent
  2362. ##
  2363. ## Assumes exponent is a multiple of 3; returns ' ' if no prefix found
  2364. const siPrefixes = ['a', 'f', 'p', 'n', 'u', 'm', ' ', 'k', 'M', 'G', 'T',
  2365. 'P', 'E']
  2366. var index: int = (exp div 3) + 6
  2367. result = ' '
  2368. if index in low(siPrefixes)..high(siPrefixes):
  2369. result = siPrefixes[index]
  2370. # Most of the work is done with the sign ignored, so get the absolute value
  2371. absolute = abs(f)
  2372. significand = f
  2373. if absolute == 0.0:
  2374. # Simple case: just format it and force the exponent to 0
  2375. exponent = 0
  2376. result = significand.formatBiggestFloat(ffDecimal, precision,
  2377. decimalSep = '.')
  2378. else:
  2379. # Find the best exponent that's a multiple of 3
  2380. fexponent = floor(log10(absolute))
  2381. fexponent = 3.0 * floor(fexponent / 3.0)
  2382. # Adjust the significand for the new exponent
  2383. significand /= pow(10.0, fexponent)
  2384. # Adjust the significand and check whether it has affected
  2385. # the exponent
  2386. absolute = abs(significand)
  2387. if absolute >= 1000.0:
  2388. significand *= 0.001
  2389. fexponent += 3
  2390. # Components of the result:
  2391. result = significand.formatBiggestFloat(ffDecimal, precision,
  2392. decimalSep = '.')
  2393. exponent = fexponent.int()
  2394. splitResult = result.split('.')
  2395. result = splitResult[0]
  2396. # result should have at most one decimal character
  2397. if splitResult.len() > 1:
  2398. # If trim is set, we get rid of trailing zeros. Don't use trimZeros here as
  2399. # we can be a bit more efficient through knowledge that there will never be
  2400. # an exponent in this part.
  2401. if trim:
  2402. while splitResult[1].endsWith("0"):
  2403. # Trim last character
  2404. splitResult[1].setLen(splitResult[1].len-1)
  2405. if splitResult[1].len() > 0:
  2406. result &= decimalSep & splitResult[1]
  2407. else:
  2408. result &= decimalSep & splitResult[1]
  2409. # Combine the results accordingly
  2410. if siPrefix and exponent != 0:
  2411. var p = getPrefix(exponent)
  2412. if p != ' ':
  2413. suffix = " " & p
  2414. exponent = 0 # Exponent replaced by SI prefix
  2415. if suffix == "" and useUnitSpace:
  2416. suffix = " "
  2417. suffix &= unit
  2418. if exponent != 0:
  2419. result &= "e" & $exponent
  2420. result &= suffix
  2421. proc findNormalized(x: string, inArray: openArray[string]): int =
  2422. var i = 0
  2423. while i < high(inArray):
  2424. if cmpIgnoreStyle(x, inArray[i]) == 0: return i
  2425. inc(i, 2) # incrementing by 1 would probably lead to a
  2426. # security hole...
  2427. return -1
  2428. proc invalidFormatString() {.noinline.} =
  2429. raise newException(ValueError, "invalid format string")
  2430. proc addf*(s: var string, formatstr: string, a: varargs[string, `$`]) {.
  2431. noSideEffect, rtl, extern: "nsuAddf".} =
  2432. ## The same as ``add(s, formatstr % a)``, but more efficient.
  2433. const PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '\128'..'\255', '_'}
  2434. var i = 0
  2435. var num = 0
  2436. while i < len(formatstr):
  2437. if formatstr[i] == '$' and i+1 < len(formatstr):
  2438. case formatstr[i+1]
  2439. of '#':
  2440. if num > a.high: invalidFormatString()
  2441. add s, a[num]
  2442. inc i, 2
  2443. inc num
  2444. of '$':
  2445. add s, '$'
  2446. inc(i, 2)
  2447. of '1'..'9', '-':
  2448. var j = 0
  2449. inc(i) # skip $
  2450. var negative = formatstr[i] == '-'
  2451. if negative: inc i
  2452. while i < formatstr.len and formatstr[i] in Digits:
  2453. j = j * 10 + ord(formatstr[i]) - ord('0')
  2454. inc(i)
  2455. let idx = if not negative: j-1 else: a.len-j
  2456. if idx < 0 or idx > a.high: invalidFormatString()
  2457. add s, a[idx]
  2458. of '{':
  2459. var j = i+2
  2460. var k = 0
  2461. var negative = formatstr[j] == '-'
  2462. if negative: inc j
  2463. var isNumber = 0
  2464. while j < formatstr.len and formatstr[j] notin {'\0', '}'}:
  2465. if formatstr[j] in Digits:
  2466. k = k * 10 + ord(formatstr[j]) - ord('0')
  2467. if isNumber == 0: isNumber = 1
  2468. else:
  2469. isNumber = -1
  2470. inc(j)
  2471. if isNumber == 1:
  2472. let idx = if not negative: k-1 else: a.len-k
  2473. if idx < 0 or idx > a.high: invalidFormatString()
  2474. add s, a[idx]
  2475. else:
  2476. var x = findNormalized(substr(formatstr, i+2, j-1), a)
  2477. if x >= 0 and x < high(a): add s, a[x+1]
  2478. else: invalidFormatString()
  2479. i = j+1
  2480. of 'a'..'z', 'A'..'Z', '\128'..'\255', '_':
  2481. var j = i+1
  2482. while j < formatstr.len and formatstr[j] in PatternChars: inc(j)
  2483. var x = findNormalized(substr(formatstr, i+1, j-1), a)
  2484. if x >= 0 and x < high(a): add s, a[x+1]
  2485. else: invalidFormatString()
  2486. i = j
  2487. else:
  2488. invalidFormatString()
  2489. else:
  2490. add s, formatstr[i]
  2491. inc(i)
  2492. proc `%` *(formatstr: string, a: openArray[string]): string {.noSideEffect,
  2493. rtl, extern: "nsuFormatOpenArray".} =
  2494. ## Interpolates a format string with the values from `a`.
  2495. ##
  2496. ## The `substitution`:idx: operator performs string substitutions in
  2497. ## `formatstr` and returns a modified `formatstr`. This is often called
  2498. ## `string interpolation`:idx:.
  2499. ##
  2500. ## This is best explained by an example:
  2501. ##
  2502. ## .. code-block:: nim
  2503. ## "$1 eats $2." % ["The cat", "fish"]
  2504. ##
  2505. ## Results in:
  2506. ##
  2507. ## .. code-block:: nim
  2508. ## "The cat eats fish."
  2509. ##
  2510. ## The substitution variables (the thing after the ``$``) are enumerated
  2511. ## from 1 to ``a.len``.
  2512. ## To produce a verbatim ``$``, use ``$$``.
  2513. ## The notation ``$#`` can be used to refer to the next substitution
  2514. ## variable:
  2515. ##
  2516. ## .. code-block:: nim
  2517. ## "$# eats $#." % ["The cat", "fish"]
  2518. ##
  2519. ## Substitution variables can also be words (that is
  2520. ## ``[A-Za-z_]+[A-Za-z0-9_]*``) in which case the arguments in `a` with even
  2521. ## indices are keys and with odd indices are the corresponding values.
  2522. ## An example:
  2523. ##
  2524. ## .. code-block:: nim
  2525. ## "$animal eats $food." % ["animal", "The cat", "food", "fish"]
  2526. ##
  2527. ## Results in:
  2528. ##
  2529. ## .. code-block:: nim
  2530. ## "The cat eats fish."
  2531. ##
  2532. ## The variables are compared with `cmpIgnoreStyle`. `ValueError` is
  2533. ## raised if an ill-formed format string has been passed to the `%` operator.
  2534. ##
  2535. ## See also:
  2536. ## * `strformat module<strformat.html>`_ for string interpolation and formatting
  2537. result = newStringOfCap(formatstr.len + a.len shl 4)
  2538. addf(result, formatstr, a)
  2539. proc `%` *(formatstr, a: string): string {.noSideEffect,
  2540. rtl, extern: "nsuFormatSingleElem".} =
  2541. ## This is the same as ``formatstr % [a]`` (see
  2542. ## `% proc<#%25,string,openArray[string]>`_).
  2543. result = newStringOfCap(formatstr.len + a.len)
  2544. addf(result, formatstr, [a])
  2545. proc format*(formatstr: string, a: varargs[string, `$`]): string {.noSideEffect,
  2546. rtl, extern: "nsuFormatVarargs".} =
  2547. ## This is the same as ``formatstr % a`` (see
  2548. ## `% proc<#%25,string,openArray[string]>`_) except that it supports
  2549. ## auto stringification.
  2550. ##
  2551. ## See also:
  2552. ## * `strformat module<strformat.html>`_ for string interpolation and formatting
  2553. result = newStringOfCap(formatstr.len + a.len)
  2554. addf(result, formatstr, a)
  2555. proc strip*(s: string, leading = true, trailing = true,
  2556. chars: set[char] = Whitespace): string
  2557. {.noSideEffect, rtl, extern: "nsuStrip".} =
  2558. ## Strips leading or trailing `chars` (default: whitespace characters)
  2559. ## from `s` and returns the resulting string.
  2560. ##
  2561. ## If `leading` is true (default), leading `chars` are stripped.
  2562. ## If `trailing` is true (default), trailing `chars` are stripped.
  2563. ## If both are false, the string is returned unchanged.
  2564. ##
  2565. ## See also:
  2566. ## * `stripLineEnd proc<#stripLineEnd,string>`_
  2567. runnableExamples:
  2568. let a = " vhellov "
  2569. let b = strip(a)
  2570. doAssert b == "vhellov"
  2571. doAssert a.strip(leading = false) == " vhellov"
  2572. doAssert a.strip(trailing = false) == "vhellov "
  2573. doAssert b.strip(chars = {'v'}) == "hello"
  2574. doAssert b.strip(leading = false, chars = {'v'}) == "vhello"
  2575. let c = "blaXbla"
  2576. doAssert c.strip(chars = {'b', 'a'}) == "laXbl"
  2577. doAssert c.strip(chars = {'b', 'a', 'l'}) == "X"
  2578. var
  2579. first = 0
  2580. last = len(s)-1
  2581. if leading:
  2582. while first <= last and s[first] in chars: inc(first)
  2583. if trailing:
  2584. while last >= 0 and s[last] in chars: dec(last)
  2585. result = substr(s, first, last)
  2586. proc stripLineEnd*(s: var string) =
  2587. ## Returns ``s`` stripped from one of these suffixes:
  2588. ## ``\r, \n, \r\n, \f, \v`` (at most once instance).
  2589. ## For example, can be useful in conjunction with ``osproc.execCmdEx``.
  2590. ## aka: `chomp`:idx:
  2591. runnableExamples:
  2592. var s = "foo\n\n"
  2593. s.stripLineEnd
  2594. doAssert s == "foo\n"
  2595. s = "foo\r\n"
  2596. s.stripLineEnd
  2597. doAssert s == "foo"
  2598. if s.len > 0:
  2599. case s[^1]
  2600. of '\n':
  2601. if s.len > 1 and s[^2] == '\r':
  2602. s.setLen s.len-2
  2603. else:
  2604. s.setLen s.len-1
  2605. of '\r', '\v', '\f':
  2606. s.setLen s.len-1
  2607. else:
  2608. discard
  2609. iterator tokenize*(s: string, seps: set[char] = Whitespace): tuple[
  2610. token: string, isSep: bool] =
  2611. ## Tokenizes the string `s` into substrings.
  2612. ##
  2613. ## Substrings are separated by a substring containing only `seps`.
  2614. ## Example:
  2615. ##
  2616. ## .. code-block:: nim
  2617. ## for word in tokenize(" this is an example "):
  2618. ## writeLine(stdout, word)
  2619. ##
  2620. ## Results in:
  2621. ##
  2622. ## .. code-block:: nim
  2623. ## (" ", true)
  2624. ## ("this", false)
  2625. ## (" ", true)
  2626. ## ("is", false)
  2627. ## (" ", true)
  2628. ## ("an", false)
  2629. ## (" ", true)
  2630. ## ("example", false)
  2631. ## (" ", true)
  2632. var i = 0
  2633. while true:
  2634. var j = i
  2635. var isSep = j < s.len and s[j] in seps
  2636. while j < s.len and (s[j] in seps) == isSep: inc(j)
  2637. if j > i:
  2638. yield (substr(s, i, j-1), isSep)
  2639. else:
  2640. break
  2641. i = j
  2642. # --------------------------------------------------------------------------
  2643. # Deprecated procs
  2644. {.push warning[Deprecated]: off.}
  2645. proc editDistance*(a, b: string): int {.noSideEffect,
  2646. rtl, extern: "nsuEditDistance",
  2647. deprecated: "use editdistance.editDistanceAscii instead".} =
  2648. ## Returns the edit distance between `a` and `b`.
  2649. ##
  2650. ## This uses the `Levenshtein`:idx: distance algorithm with only a linear
  2651. ## memory overhead.
  2652. var len1 = a.len
  2653. var len2 = b.len
  2654. if len1 > len2:
  2655. # make `b` the longer string
  2656. return editDistance(b, a)
  2657. # strip common prefix:
  2658. var s = 0
  2659. while s < len1 and a[s] == b[s]:
  2660. inc(s)
  2661. dec(len1)
  2662. dec(len2)
  2663. # strip common suffix:
  2664. while len1 > 0 and len2 > 0 and a[s+len1-1] == b[s+len2-1]:
  2665. dec(len1)
  2666. dec(len2)
  2667. # trivial cases:
  2668. if len1 == 0: return len2
  2669. if len2 == 0: return len1
  2670. # another special case:
  2671. if len1 == 1:
  2672. for j in s..s+len2-1:
  2673. if a[s] == b[j]: return len2 - 1
  2674. return len2
  2675. inc(len1)
  2676. inc(len2)
  2677. var half = len1 shr 1
  2678. # initialize first row:
  2679. #var row = cast[ptr array[0..high(int) div 8, int]](alloc(len2*sizeof(int)))
  2680. var row: seq[int]
  2681. newSeq(row, len2)
  2682. var e = s + len2 - 1 # end marker
  2683. for i in 1..len2 - half - 1: row[i] = i
  2684. row[0] = len1 - half - 1
  2685. for i in 1 .. len1 - 1:
  2686. var char1 = a[i + s - 1]
  2687. var char2p: int
  2688. var diff, x: int
  2689. var p: int
  2690. if i >= len1 - half:
  2691. # skip the upper triangle:
  2692. var offset = i - len1 + half
  2693. char2p = offset
  2694. p = offset
  2695. var c3 = row[p] + ord(char1 != b[s + char2p])
  2696. inc(p)
  2697. inc(char2p)
  2698. x = row[p] + 1
  2699. diff = x
  2700. if x > c3: x = c3
  2701. row[p] = x
  2702. inc(p)
  2703. else:
  2704. p = 1
  2705. char2p = 0
  2706. diff = i
  2707. x = i
  2708. if i <= half + 1:
  2709. # skip the lower triangle:
  2710. e = len2 + i - half - 2
  2711. # main:
  2712. while p <= e:
  2713. dec(diff)
  2714. var c3 = diff + ord(char1 != b[char2p + s])
  2715. inc(char2p)
  2716. inc(x)
  2717. if x > c3: x = c3
  2718. diff = row[p] + 1
  2719. if x > diff: x = diff
  2720. row[p] = x
  2721. inc(p)
  2722. # lower triangle sentinel:
  2723. if i <= half:
  2724. dec(diff)
  2725. var c3 = diff + ord(char1 != b[char2p + s])
  2726. inc(x)
  2727. if x > c3: x = c3
  2728. row[p] = x
  2729. result = row[e]
  2730. {.pop.}
  2731. proc isNilOrEmpty*(s: string): bool {.noSideEffect, procvar, rtl,
  2732. extern: "nsuIsNilOrEmpty",
  2733. deprecated: "use 'x.len == 0' instead".} =
  2734. ## Checks if `s` is nil or empty.
  2735. result = len(s) == 0
  2736. proc isNilOrWhitespace*(s: string): bool {.noSideEffect, procvar, rtl,
  2737. extern: "nsuIsNilOrWhitespace".} =
  2738. ## Checks if `s` is nil or consists entirely of whitespace characters.
  2739. result = true
  2740. for c in s:
  2741. if not c.isSpaceAscii():
  2742. return false
  2743. template isImpl(call) =
  2744. if s.len == 0: return false
  2745. result = true
  2746. for c in s:
  2747. if not call(c): return false
  2748. proc isAlphaAscii*(s: string): bool {.noSideEffect, procvar,
  2749. rtl, extern: "nsuIsAlphaAsciiStr",
  2750. deprecated: "Deprecated since version 0.20 since its semantics are unclear".} =
  2751. ## Checks whether or not `s` is alphabetical.
  2752. ##
  2753. ## This checks a-z, A-Z ASCII characters only.
  2754. ## Returns true if all characters in `s` are
  2755. ## alphabetic and there is at least one character
  2756. ## in `s`.
  2757. ## Use `Unicode module<unicode.html>`_ for UTF-8 support.
  2758. runnableExamples:
  2759. doAssert isAlphaAscii("fooBar") == true
  2760. doAssert isAlphaAscii("fooBar1") == false
  2761. doAssert isAlphaAscii("foo Bar") == false
  2762. isImpl isAlphaAscii
  2763. proc isAlphaNumeric*(s: string): bool {.noSideEffect, procvar,
  2764. rtl, extern: "nsuIsAlphaNumericStr",
  2765. deprecated: "Deprecated since version 0.20 since its semantics are unclear".} =
  2766. ## Checks whether or not `s` is alphanumeric.
  2767. ##
  2768. ## This checks a-z, A-Z, 0-9 ASCII characters only.
  2769. ## Returns true if all characters in `s` are
  2770. ## alpanumeric and there is at least one character
  2771. ## in `s`.
  2772. ## Use `Unicode module<unicode.html>`_ for UTF-8 support.
  2773. runnableExamples:
  2774. doAssert isAlphaNumeric("fooBar") == true
  2775. doAssert isAlphaNumeric("fooBar1") == true
  2776. doAssert isAlphaNumeric("foo Bar") == false
  2777. isImpl isAlphaNumeric
  2778. proc isDigit*(s: string): bool {.noSideEffect, procvar,
  2779. rtl, extern: "nsuIsDigitStr",
  2780. deprecated: "Deprecated since version 0.20 since its semantics are unclear".} =
  2781. ## Checks whether or not `s` is a numeric value.
  2782. ##
  2783. ## This checks 0-9 ASCII characters only.
  2784. ## Returns true if all characters in `s` are
  2785. ## numeric and there is at least one character
  2786. ## in `s`.
  2787. runnableExamples:
  2788. doAssert isDigit("1908") == true
  2789. doAssert isDigit("fooBar1") == false
  2790. isImpl isDigit
  2791. proc isSpaceAscii*(s: string): bool {.noSideEffect, procvar,
  2792. rtl, extern: "nsuIsSpaceAsciiStr",
  2793. deprecated: "Deprecated since version 0.20 since its semantics are unclear".} =
  2794. ## Checks whether or not `s` is completely whitespace.
  2795. ##
  2796. ## Returns true if all characters in `s` are whitespace
  2797. ## characters and there is at least one character in `s`.
  2798. runnableExamples:
  2799. doAssert isSpaceAscii(" ") == true
  2800. doAssert isSpaceAscii("") == false
  2801. isImpl isSpaceAscii
  2802. template isCaseImpl(s, charProc, skipNonAlpha) =
  2803. var hasAtleastOneAlphaChar = false
  2804. if s.len == 0: return false
  2805. for c in s:
  2806. if skipNonAlpha:
  2807. var charIsAlpha = c.isAlphaAscii()
  2808. if not hasAtleastOneAlphaChar:
  2809. hasAtleastOneAlphaChar = charIsAlpha
  2810. if charIsAlpha and (not charProc(c)):
  2811. return false
  2812. else:
  2813. if not charProc(c):
  2814. return false
  2815. return if skipNonAlpha: hasAtleastOneAlphaChar else: true
  2816. proc isLowerAscii*(s: string, skipNonAlpha: bool): bool {.
  2817. deprecated: "Deprecated since version 0.20 since its semantics are unclear".} =
  2818. ## Checks whether ``s`` is lower case.
  2819. ##
  2820. ## This checks ASCII characters only.
  2821. ##
  2822. ## If ``skipNonAlpha`` is true, returns true if all alphabetical
  2823. ## characters in ``s`` are lower case. Returns false if none of the
  2824. ## characters in ``s`` are alphabetical.
  2825. ##
  2826. ## If ``skipNonAlpha`` is false, returns true only if all characters
  2827. ## in ``s`` are alphabetical and lower case.
  2828. ##
  2829. ## For either value of ``skipNonAlpha``, returns false if ``s`` is
  2830. ## an empty string.
  2831. ## Use `Unicode module<unicode.html>`_ for UTF-8 support.
  2832. runnableExamples:
  2833. doAssert isLowerAscii("1foobar", false) == false
  2834. doAssert isLowerAscii("1foobar", true) == true
  2835. doAssert isLowerAscii("1fooBar", true) == false
  2836. isCaseImpl(s, isLowerAscii, skipNonAlpha)
  2837. proc isUpperAscii*(s: string, skipNonAlpha: bool): bool {.
  2838. deprecated: "Deprecated since version 0.20 since its semantics are unclear".} =
  2839. ## Checks whether ``s`` is upper case.
  2840. ##
  2841. ## This checks ASCII characters only.
  2842. ##
  2843. ## If ``skipNonAlpha`` is true, returns true if all alphabetical
  2844. ## characters in ``s`` are upper case. Returns false if none of the
  2845. ## characters in ``s`` are alphabetical.
  2846. ##
  2847. ## If ``skipNonAlpha`` is false, returns true only if all characters
  2848. ## in ``s`` are alphabetical and upper case.
  2849. ##
  2850. ## For either value of ``skipNonAlpha``, returns false if ``s`` is
  2851. ## an empty string.
  2852. ## Use `Unicode module<unicode.html>`_ for UTF-8 support.
  2853. runnableExamples:
  2854. doAssert isUpperAscii("1FOO", false) == false
  2855. doAssert isUpperAscii("1FOO", true) == true
  2856. doAssert isUpperAscii("1Foo", true) == false
  2857. isCaseImpl(s, isUpperAscii, skipNonAlpha)
  2858. proc wordWrap*(s: string, maxLineWidth = 80,
  2859. splitLongWords = true,
  2860. seps: set[char] = Whitespace,
  2861. newLine = "\n"): string {.
  2862. noSideEffect, rtl, extern: "nsuWordWrap",
  2863. deprecated: "use wrapWords in std/wordwrap instead".} =
  2864. ## Word wraps `s`.
  2865. result = newStringOfCap(s.len + s.len shr 6)
  2866. var spaceLeft = maxLineWidth
  2867. var lastSep = ""
  2868. for word, isSep in tokenize(s, seps):
  2869. if isSep:
  2870. lastSep = word
  2871. spaceLeft = spaceLeft - len(word)
  2872. continue
  2873. if len(word) > spaceLeft:
  2874. if splitLongWords and len(word) > maxLineWidth:
  2875. result.add(substr(word, 0, spaceLeft-1))
  2876. var w = spaceLeft
  2877. var wordLeft = len(word) - spaceLeft
  2878. while wordLeft > 0:
  2879. result.add(newLine)
  2880. var L = min(maxLineWidth, wordLeft)
  2881. spaceLeft = maxLineWidth - L
  2882. result.add(substr(word, w, w+L-1))
  2883. inc(w, L)
  2884. dec(wordLeft, L)
  2885. else:
  2886. spaceLeft = maxLineWidth - len(word)
  2887. result.add(newLine)
  2888. result.add(word)
  2889. else:
  2890. spaceLeft = spaceLeft - len(word)
  2891. result.add(lastSep & word)
  2892. lastSep.setLen(0)
  2893. when isMainModule:
  2894. proc nonStaticTests =
  2895. doAssert formatBiggestFloat(1234.567, ffDecimal, -1) == "1234.567000"
  2896. when not defined(js):
  2897. doAssert formatBiggestFloat(1234.567, ffDecimal, 0) == "1235." # <=== bug 8242
  2898. doAssert formatBiggestFloat(1234.567, ffDecimal, 1) == "1234.6"
  2899. doAssert formatBiggestFloat(0.00000000001, ffDecimal, 11) == "0.00000000001"
  2900. doAssert formatBiggestFloat(0.00000000001, ffScientific, 1, ',') in
  2901. ["1,0e-11", "1,0e-011"]
  2902. # bug #6589
  2903. when not defined(js):
  2904. doAssert formatFloat(123.456, ffScientific, precision = -1) == "1.234560e+02"
  2905. doAssert "$# $3 $# $#" % ["a", "b", "c"] == "a c b c"
  2906. doAssert "${1}12 ${-1}$2" % ["a", "b"] == "a12 bb"
  2907. block: # formatSize tests
  2908. when not defined(js):
  2909. doAssert formatSize((1'i64 shl 31) + (300'i64 shl 20)) == "2.293GiB" # <=== bug #8231
  2910. doAssert formatSize((2.234*1024*1024).int) == "2.234MiB"
  2911. doAssert formatSize(4096) == "4KiB"
  2912. doAssert formatSize(4096, prefix = bpColloquial, includeSpace = true) == "4 kB"
  2913. doAssert formatSize(4096, includeSpace = true) == "4 KiB"
  2914. doAssert formatSize(5_378_934, prefix = bpColloquial, decimalSep = ',') == "5,13MB"
  2915. block: # formatEng tests
  2916. doAssert formatEng(0, 2, trim = false) == "0.00"
  2917. doAssert formatEng(0, 2) == "0"
  2918. doAssert formatEng(53, 2, trim = false) == "53.00"
  2919. doAssert formatEng(0.053, 2, trim = false) == "53.00e-3"
  2920. doAssert formatEng(0.053, 4, trim = false) == "53.0000e-3"
  2921. doAssert formatEng(0.053, 4, trim = true) == "53e-3"
  2922. doAssert formatEng(0.053, 0) == "53e-3"
  2923. doAssert formatEng(52731234) == "52.731234e6"
  2924. doAssert formatEng(-52731234) == "-52.731234e6"
  2925. doAssert formatEng(52731234, 1) == "52.7e6"
  2926. doAssert formatEng(-52731234, 1) == "-52.7e6"
  2927. doAssert formatEng(52731234, 1, decimalSep = ',') == "52,7e6"
  2928. doAssert formatEng(-52731234, 1, decimalSep = ',') == "-52,7e6"
  2929. doAssert formatEng(4100, siPrefix = true, unit = "V") == "4.1 kV"
  2930. doAssert formatEng(4.1, siPrefix = true, unit = "V",
  2931. useUnitSpace = true) == "4.1 V"
  2932. doAssert formatEng(4.1, siPrefix = true) == "4.1" # Note lack of space
  2933. doAssert formatEng(4100, siPrefix = true) == "4.1 k"
  2934. doAssert formatEng(4.1, siPrefix = true, unit = "",
  2935. useUnitSpace = true) == "4.1 " # Includes space
  2936. doAssert formatEng(4100, siPrefix = true, unit = "") == "4.1 k"
  2937. doAssert formatEng(4100) == "4.1e3"
  2938. doAssert formatEng(4100, unit = "V", useUnitSpace = true) == "4.1e3 V"
  2939. doAssert formatEng(4100, unit = "", useUnitSpace = true) == "4.1e3 "
  2940. # Don't use SI prefix as number is too big
  2941. doAssert formatEng(3.1e22, siPrefix = true, unit = "a",
  2942. useUnitSpace = true) == "31e21 a"
  2943. # Don't use SI prefix as number is too small
  2944. doAssert formatEng(3.1e-25, siPrefix = true, unit = "A",
  2945. useUnitSpace = true) == "310e-27 A"
  2946. proc staticTests =
  2947. doAssert align("abc", 4) == " abc"
  2948. doAssert align("a", 0) == "a"
  2949. doAssert align("1232", 6) == " 1232"
  2950. doAssert align("1232", 6, '#') == "##1232"
  2951. doAssert alignLeft("abc", 4) == "abc "
  2952. doAssert alignLeft("a", 0) == "a"
  2953. doAssert alignLeft("1232", 6) == "1232 "
  2954. doAssert alignLeft("1232", 6, '#') == "1232##"
  2955. let
  2956. inp = """ this is a long text -- muchlongerthan10chars and here
  2957. it goes"""
  2958. outp = " this is a\nlong text\n--\nmuchlongerthan10chars\nand here\nit goes"
  2959. doAssert wordWrap(inp, 10, false) == outp
  2960. let
  2961. longInp = """ThisIsOneVeryLongStringWhichWeWillSplitIntoEightSeparatePartsNow"""
  2962. longOutp = "ThisIsOn\neVeryLon\ngStringW\nhichWeWi\nllSplitI\nntoEight\nSeparate\nPartsNow"
  2963. doAssert wordWrap(longInp, 8, true) == longOutp
  2964. doAssert "$animal eats $food." % ["animal", "The cat", "food", "fish"] ==
  2965. "The cat eats fish."
  2966. doAssert "-ld a-ldz -ld".replaceWord("-ld") == " a-ldz "
  2967. doAssert "-lda-ldz -ld abc".replaceWord("-ld") == "-lda-ldz abc"
  2968. doAssert "-lda-ldz -ld abc".replaceWord("") == "-lda-ldz -ld abc"
  2969. doAssert "oo".replace("", "abc") == "oo"
  2970. type MyEnum = enum enA, enB, enC, enuD, enE
  2971. doAssert parseEnum[MyEnum]("enu_D") == enuD
  2972. doAssert parseEnum("invalid enum value", enC) == enC
  2973. doAssert center("foo", 13) == " foo "
  2974. doAssert center("foo", 0) == "foo"
  2975. doAssert center("foo", 3, fillChar = 'a') == "foo"
  2976. doAssert center("foo", 10, fillChar = '\t') == "\t\t\tfoo\t\t\t\t"
  2977. doAssert count("foofoofoo", "foofoo") == 1
  2978. doAssert count("foofoofoo", "foofoo", overlapping = true) == 2
  2979. doAssert count("foofoofoo", 'f') == 3
  2980. doAssert count("foofoofoobar", {'f', 'b'}) == 4
  2981. doAssert strip(" foofoofoo ") == "foofoofoo"
  2982. doAssert strip("sfoofoofoos", chars = {'s'}) == "foofoofoo"
  2983. doAssert strip("barfoofoofoobar", chars = {'b', 'a', 'r'}) == "foofoofoo"
  2984. doAssert strip("stripme but don't strip this stripme",
  2985. chars = {'s', 't', 'r', 'i', 'p', 'm', 'e'}) ==
  2986. " but don't strip this "
  2987. doAssert strip("sfoofoofoos", leading = false, chars = {'s'}) == "sfoofoofoo"
  2988. doAssert strip("sfoofoofoos", trailing = false, chars = {'s'}) == "foofoofoos"
  2989. doAssert " foo\n bar".indent(4, "Q") == "QQQQ foo\nQQQQ bar"
  2990. doAssert "abba".multiReplace(("a", "b"), ("b", "a")) == "baab"
  2991. doAssert "Hello World.".multiReplace(("ello", "ELLO"), ("World.",
  2992. "PEOPLE!")) == "HELLO PEOPLE!"
  2993. doAssert "aaaa".multiReplace(("a", "aa"), ("aa", "bb")) == "aaaaaaaa"
  2994. doAssert isAlphaAscii('r')
  2995. doAssert isAlphaAscii('A')
  2996. doAssert(not isAlphaAscii('$'))
  2997. doAssert isAlphaNumeric('3')
  2998. doAssert isAlphaNumeric('R')
  2999. doAssert(not isAlphaNumeric('!'))
  3000. doAssert isDigit('3')
  3001. doAssert(not isDigit('a'))
  3002. doAssert(not isDigit('%'))
  3003. doAssert isSpaceAscii('\t')
  3004. doAssert isSpaceAscii('\l')
  3005. doAssert(not isSpaceAscii('A'))
  3006. doAssert(isNilOrWhitespace(""))
  3007. doAssert(isNilOrWhitespace(" "))
  3008. doAssert(isNilOrWhitespace("\t\l \v\r\f"))
  3009. doAssert(not isNilOrWhitespace("ABc \td"))
  3010. doAssert isLowerAscii('a')
  3011. doAssert isLowerAscii('z')
  3012. doAssert(not isLowerAscii('A'))
  3013. doAssert(not isLowerAscii('5'))
  3014. doAssert(not isLowerAscii('&'))
  3015. doAssert(not isLowerAscii(' '))
  3016. doAssert isUpperAscii('A')
  3017. doAssert(not isUpperAscii('b'))
  3018. doAssert(not isUpperAscii('5'))
  3019. doAssert(not isUpperAscii('%'))
  3020. doAssert rsplit("foo bar", seps = Whitespace) == @["foo", "bar"]
  3021. doAssert rsplit(" foo bar", seps = Whitespace, maxsplit = 1) == @[" foo", "bar"]
  3022. doAssert rsplit(" foo bar ", seps = Whitespace, maxsplit = 1) == @[
  3023. " foo bar", ""]
  3024. doAssert rsplit(":foo:bar", sep = ':') == @["", "foo", "bar"]
  3025. doAssert rsplit(":foo:bar", sep = ':', maxsplit = 2) == @["", "foo", "bar"]
  3026. doAssert rsplit(":foo:bar", sep = ':', maxsplit = 3) == @["", "foo", "bar"]
  3027. doAssert rsplit("foothebar", sep = "the") == @["foo", "bar"]
  3028. doAssert(unescape(r"\x013", "", "") == "\x013")
  3029. doAssert join(["foo", "bar", "baz"]) == "foobarbaz"
  3030. doAssert join(@["foo", "bar", "baz"], ", ") == "foo, bar, baz"
  3031. doAssert join([1, 2, 3]) == "123"
  3032. doAssert join(@[1, 2, 3], ", ") == "1, 2, 3"
  3033. doAssert """~~!!foo
  3034. ~~!!bar
  3035. ~~!!baz""".unindent(2, "~~!!") == "foo\nbar\nbaz"
  3036. doAssert """~~!!foo
  3037. ~~!!bar
  3038. ~~!!baz""".unindent(2, "~~!!aa") == "~~!!foo\n~~!!bar\n~~!!baz"
  3039. doAssert """~~foo
  3040. ~~ bar
  3041. ~~ baz""".unindent(4, "~") == "foo\n bar\n baz"
  3042. doAssert """foo
  3043. bar
  3044. baz
  3045. """.unindent(4) == "foo\nbar\nbaz\n"
  3046. doAssert """foo
  3047. bar
  3048. baz
  3049. """.unindent(2) == "foo\n bar\n baz\n"
  3050. doAssert """foo
  3051. bar
  3052. baz
  3053. """.unindent(100) == "foo\nbar\nbaz\n"
  3054. doAssert """foo
  3055. foo
  3056. bar
  3057. """.unindent() == "foo\nfoo\nbar\n"
  3058. let s = " this is an example "
  3059. let s2 = ":this;is;an:example;;"
  3060. doAssert s.split() == @["", "this", "is", "an", "example", "", ""]
  3061. doAssert s2.split(seps = {':', ';'}) == @["", "this", "is", "an", "example",
  3062. "", ""]
  3063. doAssert s.split(maxsplit = 4) == @["", "this", "is", "an", "example "]
  3064. doAssert s.split(' ', maxsplit = 1) == @["", "this is an example "]
  3065. doAssert s.split(" ", maxsplit = 4) == @["", "this", "is", "an", "example "]
  3066. doAssert s.splitWhitespace() == @["this", "is", "an", "example"]
  3067. doAssert s.splitWhitespace(maxsplit = 1) == @["this", "is an example "]
  3068. doAssert s.splitWhitespace(maxsplit = 2) == @["this", "is", "an example "]
  3069. doAssert s.splitWhitespace(maxsplit = 3) == @["this", "is", "an", "example "]
  3070. doAssert s.splitWhitespace(maxsplit = 4) == @["this", "is", "an", "example"]
  3071. block: # startsWith / endsWith char tests
  3072. var s = "abcdef"
  3073. doAssert s.startsWith('a')
  3074. doAssert s.startsWith('b') == false
  3075. doAssert s.endsWith('f')
  3076. doAssert s.endsWith('a') == false
  3077. doAssert s.endsWith('\0') == false
  3078. #echo("strutils tests passed")
  3079. nonStaticTests()
  3080. staticTests()
  3081. static: staticTests()