strutils.nim 91 KB


  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module contains various string utility routines.
  10. ## See the module `re <re.html>`_ for regular expression support.
  11. ## See the module `pegs <pegs.html>`_ for PEG support.
  12. ## This module is available for the `JavaScript target
  13. ## <backends.html#the-javascript-target>`_.
  14. import parseutils
  15. from math import pow, floor, log10
  16. from algorithm import reverse
  17. when defined(nimVmExportFixed):
  18. from unicode import toLower, toUpper
  19. export toLower, toUpper
  20. {.deadCodeElim: on.} # dce option deprecated
  21. {.push debugger:off .} # the user does not want to trace a part
  22. # of the standard library!
  23. include "system/inclrtl"
  24. {.pop.}
  25. # Support old split with set[char]
  26. when defined(nimOldSplit):
  27. {.pragma: deprecatedSplit, deprecated.}
  28. else:
  29. {.pragma: deprecatedSplit.}
  30. const
  31. Whitespace* = {' ', '\t', '\v', '\r', '\l', '\f'}
  32. ## All the characters that count as whitespace.
  33. Letters* = {'A'..'Z', 'a'..'z'}
  34. ## the set of letters
  35. Digits* = {'0'..'9'}
  36. ## the set of digits
  37. HexDigits* = {'0'..'9', 'A'..'F', 'a'..'f'}
  38. ## the set of hexadecimal digits
  39. IdentChars* = {'a'..'z', 'A'..'Z', '0'..'9', '_'}
  40. ## the set of characters an identifier can consist of
  41. IdentStartChars* = {'a'..'z', 'A'..'Z', '_'}
  42. ## the set of characters an identifier can start with
  43. NewLines* = {'\13', '\10'}
  44. ## the set of characters a newline terminator can start with
  45. AllChars* = {'\x00'..'\xFF'}
  46. ## A set with all the possible characters.
  47. ##
  48. ## Not very useful by its own, you can use it to create *inverted* sets to
  49. ## make the `find() proc <#find,string,set[char],int>`_ find **invalid**
  50. ## characters in strings. Example:
  51. ##
  52. ## .. code-block:: nim
  53. ## let invalid = AllChars - Digits
  54. ## doAssert "01234".find(invalid) == -1
  55. ## doAssert "01A34".find(invalid) == 2
  56. proc isAlphaAscii*(c: char): bool {.noSideEffect, procvar,
  57. rtl, extern: "nsuIsAlphaAsciiChar".}=
  58. ## Checks whether or not `c` is alphabetical.
  59. ##
  60. ## This checks a-z, A-Z ASCII characters only.
  61. runnableExamples:
  62. doAssert isAlphaAscii('e') == true
  63. doAssert isAlphaAscii('E') == true
  64. doAssert isAlphaAscii('8') == false
  65. return c in Letters
  66. proc isAlphaNumeric*(c: char): bool {.noSideEffect, procvar,
  67. rtl, extern: "nsuIsAlphaNumericChar".} =
  68. ## Checks whether or not `c` is alphanumeric.
  69. ##
  70. ## This checks a-z, A-Z, 0-9 ASCII characters only.
  71. runnableExamples:
  72. doAssert isAlphaNumeric('n') == true
  73. doAssert isAlphaNumeric('8') == true
  74. doAssert isAlphaNumeric(' ') == false
  75. return c in Letters+Digits
  76. proc isDigit*(c: char): bool {.noSideEffect, procvar,
  77. rtl, extern: "nsuIsDigitChar".} =
  78. ## Checks whether or not `c` is a number.
  79. ##
  80. ## This checks 0-9 ASCII characters only.
  81. runnableExamples:
  82. doAssert isDigit('n') == false
  83. doAssert isDigit('8') == true
  84. return c in Digits
  85. proc isSpaceAscii*(c: char): bool {.noSideEffect, procvar,
  86. rtl, extern: "nsuIsSpaceAsciiChar".} =
  87. ## Checks whether or not `c` is a whitespace character.
  88. runnableExamples:
  89. doAssert isSpaceAscii('n') == false
  90. doAssert isSpaceAscii(' ') == true
  91. return c in Whitespace
  92. proc isLowerAscii*(c: char): bool {.noSideEffect, procvar,
  93. rtl, extern: "nsuIsLowerAsciiChar".} =
  94. ## Checks whether or not `c` is a lower case character.
  95. ##
  96. ## This checks ASCII characters only.
  97. runnableExamples:
  98. doAssert isLowerAscii('e') == true
  99. doAssert isLowerAscii('E') == false
  100. doAssert isLowerAscii('7') == false
  101. return c in {'a'..'z'}
  102. proc isUpperAscii*(c: char): bool {.noSideEffect, procvar,
  103. rtl, extern: "nsuIsUpperAsciiChar".} =
  104. ## Checks whether or not `c` is an upper case character.
  105. ##
  106. ## This checks ASCII characters only.
  107. runnableExamples:
  108. doAssert isUpperAscii('e') == false
  109. doAssert isUpperAscii('E') == true
  110. doAssert isUpperAscii('7') == false
  111. return c in {'A'..'Z'}
  112. template isImpl(call) =
  113. if s.len == 0: return false
  114. result = true
  115. for c in s:
  116. if not call(c): return false
  117. proc isAlphaAscii*(s: string): bool {.noSideEffect, procvar,
  118. rtl, extern: "nsuIsAlphaAsciiStr",
  119. deprecated: "Deprecated since version 0.20 since its semantics are unclear".} =
  120. ## Checks whether or not `s` is alphabetical.
  121. ##
  122. ## This checks a-z, A-Z ASCII characters only.
  123. ## Returns true if all characters in `s` are
  124. ## alphabetic and there is at least one character
  125. ## in `s`.
  126. runnableExamples:
  127. doAssert isAlphaAscii("fooBar") == true
  128. doAssert isAlphaAscii("fooBar1") == false
  129. doAssert isAlphaAscii("foo Bar") == false
  130. isImpl isAlphaAscii
  131. proc isAlphaNumeric*(s: string): bool {.noSideEffect, procvar,
  132. rtl, extern: "nsuIsAlphaNumericStr",
  133. deprecated: "Deprecated since version 0.20 since its semantics are unclear".} =
  134. ## Checks whether or not `s` is alphanumeric.
  135. ##
  136. ## This checks a-z, A-Z, 0-9 ASCII characters only.
  137. ## Returns true if all characters in `s` are
  138. ## alpanumeric and there is at least one character
  139. ## in `s`.
  140. runnableExamples:
  141. doAssert isAlphaNumeric("fooBar") == true
  142. doAssert isAlphaNumeric("fooBar") == true
  143. doAssert isAlphaNumeric("foo Bar") == false
  144. isImpl isAlphaNumeric
  145. proc isDigit*(s: string): bool {.noSideEffect, procvar,
  146. rtl, extern: "nsuIsDigitStr",
  147. deprecated: "Deprecated since version 0.20 since its semantics are unclear".} =
  148. ## Checks whether or not `s` is a numeric value.
  149. ##
  150. ## This checks 0-9 ASCII characters only.
  151. ## Returns true if all characters in `s` are
  152. ## numeric and there is at least one character
  153. ## in `s`.
  154. runnableExamples:
  155. doAssert isDigit("1908") == true
  156. doAssert isDigit("fooBar1") == false
  157. isImpl isDigit
  158. proc isSpaceAscii*(s: string): bool {.noSideEffect, procvar,
  159. rtl, extern: "nsuIsSpaceAsciiStr",
  160. deprecated: "Deprecated since version 0.20 since its semantics are unclear".} =
  161. ## Checks whether or not `s` is completely whitespace.
  162. ##
  163. ## Returns true if all characters in `s` are whitespace
  164. ## characters and there is at least one character in `s`.
  165. runnableExamples:
  166. doAssert isSpaceAscii(" ") == true
  167. doAssert isSpaceAscii("") == false
  168. isImpl isSpaceAscii
  169. template isCaseImpl(s, charProc, skipNonAlpha) =
  170. var hasAtleastOneAlphaChar = false
  171. if s.len == 0: return false
  172. for c in s:
  173. if skipNonAlpha:
  174. var charIsAlpha = c.isAlphaAscii()
  175. if not hasAtleastOneAlphaChar:
  176. hasAtleastOneAlphaChar = charIsAlpha
  177. if charIsAlpha and (not charProc(c)):
  178. return false
  179. else:
  180. if not charProc(c):
  181. return false
  182. return if skipNonAlpha: hasAtleastOneAlphaChar else: true
  183. proc isLowerAscii*(s: string, skipNonAlpha: bool): bool {.
  184. deprecated: "Deprecated since version 0.20 since its semantics are unclear".} =
  185. ## Checks whether ``s`` is lower case.
  186. ##
  187. ## This checks ASCII characters only.
  188. ##
  189. ## If ``skipNonAlpha`` is true, returns true if all alphabetical
  190. ## characters in ``s`` are lower case. Returns false if none of the
  191. ## characters in ``s`` are alphabetical.
  192. ##
  193. ## If ``skipNonAlpha`` is false, returns true only if all characters
  194. ## in ``s`` are alphabetical and lower case.
  195. ##
  196. ## For either value of ``skipNonAlpha``, returns false if ``s`` is
  197. ## an empty string.
  198. runnableExamples:
  199. doAssert isLowerAscii("1foobar", false) == false
  200. doAssert isLowerAscii("1foobar", true) == true
  201. doAssert isLowerAscii("1fooBar", true) == false
  202. isCaseImpl(s, isLowerAscii, skipNonAlpha)
  203. proc isUpperAscii*(s: string, skipNonAlpha: bool): bool {.
  204. deprecated: "Deprecated since version 0.20 since its semantics are unclear".} =
  205. ## Checks whether ``s`` is upper case.
  206. ##
  207. ## This checks ASCII characters only.
  208. ##
  209. ## If ``skipNonAlpha`` is true, returns true if all alphabetical
  210. ## characters in ``s`` are upper case. Returns false if none of the
  211. ## characters in ``s`` are alphabetical.
  212. ##
  213. ## If ``skipNonAlpha`` is false, returns true only if all characters
  214. ## in ``s`` are alphabetical and upper case.
  215. ##
  216. ## For either value of ``skipNonAlpha``, returns false if ``s`` is
  217. ## an empty string.
  218. runnableExamples:
  219. doAssert isUpperAscii("1FOO", false) == false
  220. doAssert isUpperAscii("1FOO", true) == true
  221. doAssert isUpperAscii("1Foo", true) == false
  222. isCaseImpl(s, isUpperAscii, skipNonAlpha)
  223. proc toLowerAscii*(c: char): char {.noSideEffect, procvar,
  224. rtl, extern: "nsuToLowerAsciiChar".} =
  225. ## Returns the lower case version of ``c``.
  226. ##
  227. ## This works only for the letters ``A-Z``. See `unicode.toLower
  228. ## <unicode.html#toLower>`_ for a version that works for any Unicode
  229. ## character.
  230. runnableExamples:
  231. doAssert toLowerAscii('A') == 'a'
  232. doAssert toLowerAscii('e') == 'e'
  233. if c in {'A'..'Z'}:
  234. result = chr(ord(c) + (ord('a') - ord('A')))
  235. else:
  236. result = c
  237. template toImpl(call) =
  238. result = newString(len(s))
  239. for i in 0..len(s) - 1:
  240. result[i] = call(s[i])
  241. proc toLowerAscii*(s: string): string {.noSideEffect, procvar,
  242. rtl, extern: "nsuToLowerAsciiStr".} =
  243. ## Converts `s` into lower case.
  244. ##
  245. ## This works only for the letters ``A-Z``. See `unicode.toLower
  246. ## <unicode.html#toLower>`_ for a version that works for any Unicode
  247. ## character.
  248. runnableExamples:
  249. doAssert toLowerAscii("FooBar!") == "foobar!"
  250. toImpl toLowerAscii
  251. proc toUpperAscii*(c: char): char {.noSideEffect, procvar,
  252. rtl, extern: "nsuToUpperAsciiChar".} =
  253. ## Converts `c` into upper case.
  254. ##
  255. ## This works only for the letters ``A-Z``. See `unicode.toUpper
  256. ## <unicode.html#toUpper>`_ for a version that works for any Unicode
  257. ## character.
  258. runnableExamples:
  259. doAssert toUpperAscii('a') == 'A'
  260. doAssert toUpperAscii('E') == 'E'
  261. if c in {'a'..'z'}:
  262. result = chr(ord(c) - (ord('a') - ord('A')))
  263. else:
  264. result = c
  265. proc toUpperAscii*(s: string): string {.noSideEffect, procvar,
  266. rtl, extern: "nsuToUpperAsciiStr".} =
  267. ## Converts `s` into upper case.
  268. ##
  269. ## This works only for the letters ``A-Z``. See `unicode.toUpper
  270. ## <unicode.html#toUpper>`_ for a version that works for any Unicode
  271. ## character.
  272. runnableExamples:
  273. doAssert toUpperAscii("FooBar!") == "FOOBAR!"
  274. toImpl toUpperAscii
  275. proc capitalizeAscii*(s: string): string {.noSideEffect, procvar,
  276. rtl, extern: "nsuCapitalizeAscii".} =
  277. ## Converts the first character of `s` into upper case.
  278. ##
  279. ## This works only for the letters ``A-Z``.
  280. runnableExamples:
  281. doAssert capitalizeAscii("foo") == "Foo"
  282. doAssert capitalizeAscii("-bar") == "-bar"
  283. if s.len == 0: result = ""
  284. else: result = toUpperAscii(s[0]) & substr(s, 1)
  285. proc normalize*(s: string): string {.noSideEffect, procvar,
  286. rtl, extern: "nsuNormalize".} =
  287. ## Normalizes the string `s`.
  288. ##
  289. ## That means to convert it to lower case and remove any '_'. This
  290. ## should NOT be used to normalize Nim identifier names.
  291. runnableExamples:
  292. doAssert normalize("Foo_bar") == "foobar"
  293. doAssert normalize("Foo Bar") == "foo bar"
  294. result = newString(s.len)
  295. var j = 0
  296. for i in 0..len(s) - 1:
  297. if s[i] in {'A'..'Z'}:
  298. result[j] = chr(ord(s[i]) + (ord('a') - ord('A')))
  299. inc j
  300. elif s[i] != '_':
  301. result[j] = s[i]
  302. inc j
  303. if j != s.len: setLen(result, j)
  304. proc cmpIgnoreCase*(a, b: string): int {.noSideEffect,
  305. rtl, extern: "nsuCmpIgnoreCase", procvar.} =
  306. ## Compares two strings in a case insensitive manner. Returns:
  307. ##
  308. ## | 0 iff a == b
  309. ## | < 0 iff a < b
  310. ## | > 0 iff a > b
  311. runnableExamples:
  312. doAssert cmpIgnoreCase("FooBar", "foobar") == 0
  313. doAssert cmpIgnoreCase("bar", "Foo") < 0
  314. doAssert cmpIgnoreCase("Foo5", "foo4") > 0
  315. var i = 0
  316. var m = min(a.len, b.len)
  317. while i < m:
  318. result = ord(toLowerAscii(a[i])) - ord(toLowerAscii(b[i]))
  319. if result != 0: return
  320. inc(i)
  321. result = a.len - b.len
  322. {.push checks: off, line_trace: off .} # this is a hot-spot in the compiler!
  323. # thus we compile without checks here
  324. proc cmpIgnoreStyle*(a, b: string): int {.noSideEffect,
  325. rtl, extern: "nsuCmpIgnoreStyle", procvar.} =
  326. ## Semantically the same as ``cmp(normalize(a), normalize(b))``. It
  327. ## is just optimized to not allocate temporary strings. This should
  328. ## NOT be used to compare Nim identifier names. use `macros.eqIdent`
  329. ## for that. Returns:
  330. ##
  331. ## | 0 iff a == b
  332. ## | < 0 iff a < b
  333. ## | > 0 iff a > b
  334. runnableExamples:
  335. doAssert cmpIgnoreStyle("foo_bar", "FooBar") == 0
  336. doAssert cmpIgnoreStyle("foo_bar_5", "FooBar4") > 0
  337. var i = 0
  338. var j = 0
  339. while true:
  340. while i < a.len and a[i] == '_': inc i
  341. while j < b.len and b[j] == '_': inc j
  342. var aa = if i < a.len: toLowerAscii(a[i]) else: '\0'
  343. var bb = if j < b.len: toLowerAscii(b[j]) else: '\0'
  344. result = ord(aa) - ord(bb)
  345. if result != 0: return result
  346. # the characters are identical:
  347. if i >= a.len:
  348. # both cursors at the end:
  349. if j >= b.len: return 0
  350. # not yet at the end of 'b':
  351. return -1
  352. elif j >= b.len:
  353. return 1
  354. inc i
  355. inc j
  356. proc strip*(s: string, leading = true, trailing = true,
  357. chars: set[char] = Whitespace): string
  358. {.noSideEffect, rtl, extern: "nsuStrip".} =
  359. ## Strips leading or trailing `chars` from `s` and returns
  360. ## the resulting string.
  361. ##
  362. ## If `leading` is true, leading `chars` are stripped.
  363. ## If `trailing` is true, trailing `chars` are stripped.
  364. ## If both are false, the string is returned unchanged.
  365. runnableExamples:
  366. doAssert " vhellov ".strip().strip(trailing = false, chars = {'v'}) == "hellov"
  367. var
  368. first = 0
  369. last = len(s)-1
  370. if leading:
  371. while first <= last and s[first] in chars: inc(first)
  372. if trailing:
  373. while last >= 0 and s[last] in chars: dec(last)
  374. result = substr(s, first, last)
  375. proc toOctal*(c: char): string {.noSideEffect, rtl, extern: "nsuToOctal".} =
  376. ## Converts a character `c` to its octal representation.
  377. ##
  378. ## The resulting string may not have a leading zero. Its length is always
  379. ## exactly 3.
  380. runnableExamples:
  381. doAssert toOctal('!') == "041"
  382. result = newString(3)
  383. var val = ord(c)
  384. for i in countdown(2, 0):
  385. result[i] = chr(val mod 8 + ord('0'))
  386. val = val div 8
  387. proc isNilOrEmpty*(s: string): bool {.noSideEffect, procvar, rtl,
  388. extern: "nsuIsNilOrEmpty",
  389. deprecated: "use 'x.len == 0' instead".} =
  390. ## Checks if `s` is nil or empty.
  391. result = len(s) == 0
  392. proc isNilOrWhitespace*(s: string): bool {.noSideEffect, procvar, rtl, extern: "nsuIsNilOrWhitespace".} =
  393. ## Checks if `s` is nil or consists entirely of whitespace characters.
  394. result = true
  395. for c in s:
  396. if not c.isSpaceAscii():
  397. return false
  398. proc substrEq(s: string, pos: int, substr: string): bool =
  399. var i = 0
  400. var length = substr.len
  401. while i < length and s[pos+i] == substr[i]:
  402. inc i
  403. return i == length
  404. # --------- Private templates for different split separators -----------
  405. template stringHasSep(s: string, index: int, seps: set[char]): bool =
  406. s[index] in seps
  407. template stringHasSep(s: string, index: int, sep: char): bool =
  408. s[index] == sep
  409. template stringHasSep(s: string, index: int, sep: string): bool =
  410. s.substrEq(index, sep)
  411. template splitCommon(s, sep, maxsplit, sepLen) =
  412. ## Common code for split procedures
  413. var last = 0
  414. var splits = maxsplit
  415. while last <= len(s):
  416. var first = last
  417. while last < len(s) and not stringHasSep(s, last, sep):
  418. inc(last)
  419. if splits == 0: last = len(s)
  420. yield substr(s, first, last-1)
  421. if splits == 0: break
  422. dec(splits)
  423. inc(last, sepLen)
  424. template oldSplit(s, seps, maxsplit) =
  425. var last = 0
  426. var splits = maxsplit
  427. assert(not ('\0' in seps))
  428. while last < len(s):
  429. while last < len(s) and s[last] in seps: inc(last)
  430. var first = last
  431. while last < len(s) and s[last] notin seps: inc(last)
  432. if first <= last-1:
  433. if splits == 0: last = len(s)
  434. yield substr(s, first, last-1)
  435. if splits == 0: break
  436. dec(splits)
  437. iterator split*(s: string, seps: set[char] = Whitespace,
  438. maxsplit: int = -1): string =
  439. ## Splits the string `s` into substrings using a group of separators.
  440. ##
  441. ## Substrings are separated by a substring containing only `seps`.
  442. ##
  443. ## .. code-block:: nim
  444. ## for word in split("this\lis an\texample"):
  445. ## writeLine(stdout, word)
  446. ##
  447. ## ...generates this output:
  448. ##
  449. ## .. code-block::
  450. ## "this"
  451. ## "is"
  452. ## "an"
  453. ## "example"
  454. ##
  455. ## And the following code:
  456. ##
  457. ## .. code-block:: nim
  458. ## for word in split("this:is;an$example", {';', ':', '$'}):
  459. ## writeLine(stdout, word)
  460. ##
  461. ## ...produces the same output as the first example. The code:
  462. ##
  463. ## .. code-block:: nim
  464. ## let date = "2012-11-20T22:08:08.398990"
  465. ## let separators = {' ', '-', ':', 'T'}
  466. ## for number in split(date, separators):
  467. ## writeLine(stdout, number)
  468. ##
  469. ## ...results in:
  470. ##
  471. ## .. code-block::
  472. ## "2012"
  473. ## "11"
  474. ## "20"
  475. ## "22"
  476. ## "08"
  477. ## "08.398990"
  478. ##
  479. splitCommon(s, seps, maxsplit, 1)
  480. iterator splitWhitespace*(s: string, maxsplit: int = -1): string =
  481. ## Splits the string ``s`` at whitespace stripping leading and trailing
  482. ## whitespace if necessary. If ``maxsplit`` is specified and is positive,
  483. ## no more than ``maxsplit`` splits is made.
  484. ##
  485. ## The following code:
  486. ##
  487. ## .. code-block:: nim
  488. ## let s = " foo \t bar baz "
  489. ## for ms in [-1, 1, 2, 3]:
  490. ## echo "------ maxsplit = ", ms, ":"
  491. ## for item in s.splitWhitespace(maxsplit=ms):
  492. ## echo '"', item, '"'
  493. ##
  494. ## ...results in:
  495. ##
  496. ## .. code-block::
  497. ## ------ maxsplit = -1:
  498. ## "foo"
  499. ## "bar"
  500. ## "baz"
  501. ## ------ maxsplit = 1:
  502. ## "foo"
  503. ## "bar baz "
  504. ## ------ maxsplit = 2:
  505. ## "foo"
  506. ## "bar"
  507. ## "baz "
  508. ## ------ maxsplit = 3:
  509. ## "foo"
  510. ## "bar"
  511. ## "baz"
  512. ##
  513. oldSplit(s, Whitespace, maxsplit)
  514. template accResult(iter: untyped) =
  515. result = @[]
  516. for x in iter: add(result, x)
  517. proc splitWhitespace*(s: string, maxsplit: int = -1): seq[string] {.noSideEffect,
  518. rtl, extern: "nsuSplitWhitespace".} =
  519. ## The same as the `splitWhitespace <#splitWhitespace.i,string,int>`_
  520. ## iterator, but is a proc that returns a sequence of substrings.
  521. accResult(splitWhitespace(s, maxsplit))
  522. iterator split*(s: string, sep: char, maxsplit: int = -1): string =
  523. ## Splits the string `s` into substrings using a single separator.
  524. ##
  525. ## Substrings are separated by the character `sep`.
  526. ## The code:
  527. ##
  528. ## .. code-block:: nim
  529. ## for word in split(";;this;is;an;;example;;;", ';'):
  530. ## writeLine(stdout, word)
  531. ##
  532. ## Results in:
  533. ##
  534. ## .. code-block::
  535. ## ""
  536. ## ""
  537. ## "this"
  538. ## "is"
  539. ## "an"
  540. ## ""
  541. ## "example"
  542. ## ""
  543. ## ""
  544. ## ""
  545. ##
  546. splitCommon(s, sep, maxsplit, 1)
  547. iterator split*(s: string, sep: string, maxsplit: int = -1): string =
  548. ## Splits the string `s` into substrings using a string separator.
  549. ##
  550. ## Substrings are separated by the string `sep`.
  551. ## The code:
  552. ##
  553. ## .. code-block:: nim
  554. ## for word in split("thisDATAisDATAcorrupted", "DATA"):
  555. ## writeLine(stdout, word)
  556. ##
  557. ## Results in:
  558. ##
  559. ## .. code-block::
  560. ## "this"
  561. ## "is"
  562. ## "corrupted"
  563. ##
  564. splitCommon(s, sep, maxsplit, sep.len)
  565. template rsplitCommon(s, sep, maxsplit, sepLen) =
  566. ## Common code for rsplit functions
  567. var
  568. last = s.len - 1
  569. first = last
  570. splits = maxsplit
  571. startPos = 0
  572. # go to -1 in order to get separators at the beginning
  573. while first >= -1:
  574. while first >= 0 and not stringHasSep(s, first, sep):
  575. dec(first)
  576. if splits == 0:
  577. # No more splits means set first to the beginning
  578. first = -1
  579. if first == -1:
  580. startPos = 0
  581. else:
  582. startPos = first + sepLen
  583. yield substr(s, startPos, last)
  584. if splits == 0: break
  585. dec(splits)
  586. dec(first)
  587. last = first
  588. iterator rsplit*(s: string, seps: set[char] = Whitespace,
  589. maxsplit: int = -1): string =
  590. ## Splits the string `s` into substrings from the right using a
  591. ## string separator. Works exactly the same as `split iterator
  592. ## <#split.i,string,char,int>`_ except in reverse order.
  593. ##
  594. ## .. code-block:: nim
  595. ## for piece in "foo bar".rsplit(WhiteSpace):
  596. ## echo piece
  597. ##
  598. ## Results in:
  599. ##
  600. ## .. code-block:: nim
  601. ## "bar"
  602. ## "foo"
  603. ##
  604. ## Substrings are separated from the right by the set of chars `seps`
  605. rsplitCommon(s, seps, maxsplit, 1)
  606. iterator rsplit*(s: string, sep: char,
  607. maxsplit: int = -1): string =
  608. ## Splits the string `s` into substrings from the right using a
  609. ## string separator. Works exactly the same as `split iterator
  610. ## <#split.i,string,char,int>`_ except in reverse order.
  611. ##
  612. ## .. code-block:: nim
  613. ## for piece in "foo:bar".rsplit(':'):
  614. ## echo piece
  615. ##
  616. ## Results in:
  617. ##
  618. ## .. code-block:: nim
  619. ## "bar"
  620. ## "foo"
  621. ##
  622. ## Substrings are separated from the right by the char `sep`
  623. rsplitCommon(s, sep, maxsplit, 1)
  624. iterator rsplit*(s: string, sep: string, maxsplit: int = -1,
  625. keepSeparators: bool = false): string =
  626. ## Splits the string `s` into substrings from the right using a
  627. ## string separator. Works exactly the same as `split iterator
  628. ## <#split.i,string,string,int>`_ except in reverse order.
  629. ##
  630. ## .. code-block:: nim
  631. ## for piece in "foothebar".rsplit("the"):
  632. ## echo piece
  633. ##
  634. ## Results in:
  635. ##
  636. ## .. code-block:: nim
  637. ## "bar"
  638. ## "foo"
  639. ##
  640. ## Substrings are separated from the right by the string `sep`
  641. rsplitCommon(s, sep, maxsplit, sep.len)
  642. iterator splitLines*(s: string, keepEol = false): string =
  643. ## Splits the string `s` into its containing lines.
  644. ##
  645. ## Every `character literal <manual.html#character-literals>`_ newline
  646. ## combination (CR, LF, CR-LF) is supported. The result strings contain no
  647. ## trailing end of line characters unless parameter ``keepEol`` is set to
  648. ## ``true``.
  649. ##
  650. ## Example:
  651. ##
  652. ## .. code-block:: nim
  653. ## for line in splitLines("\nthis\nis\nan\n\nexample\n"):
  654. ## writeLine(stdout, line)
  655. ##
  656. ## Results in:
  657. ##
  658. ## .. code-block:: nim
  659. ## ""
  660. ## "this"
  661. ## "is"
  662. ## "an"
  663. ## ""
  664. ## "example"
  665. ## ""
  666. var first = 0
  667. var last = 0
  668. var eolpos = 0
  669. while true:
  670. while last < s.len and s[last] notin {'\c', '\l'}: inc(last)
  671. eolpos = last
  672. if last < s.len:
  673. if s[last] == '\l': inc(last)
  674. elif s[last] == '\c':
  675. inc(last)
  676. if last < s.len and s[last] == '\l': inc(last)
  677. yield substr(s, first, if keepEol: last-1 else: eolpos-1)
  678. # no eol characters consumed means that the string is over
  679. if eolpos == last:
  680. break
  681. first = last
  682. proc splitLines*(s: string, keepEol = false): seq[string] {.noSideEffect,
  683. rtl, extern: "nsuSplitLines".} =
  684. ## The same as the `splitLines <#splitLines.i,string>`_ iterator, but is a
  685. ## proc that returns a sequence of substrings.
  686. accResult(splitLines(s, keepEol=keepEol))
  687. proc countLines*(s: string): int {.noSideEffect,
  688. rtl, extern: "nsuCountLines".} =
  689. ## Returns the number of lines in the string `s`.
  690. ##
  691. ## This is the same as ``len(splitLines(s))``, but much more efficient
  692. ## because it doesn't modify the string creating temporal objects. Every
  693. ## `character literal <manual.html#character-literals>`_ newline combination
  694. ## (CR, LF, CR-LF) is supported.
  695. ##
  696. ## In this context, a line is any string seperated by a newline combination.
  697. ## A line can be an empty string.
  698. runnableExamples:
  699. doAssert countLines("First line\l and second line.") == 2
  700. result = 1
  701. var i = 0
  702. while i < s.len:
  703. case s[i]
  704. of '\c':
  705. if i+1 < s.len and s[i+1] == '\l': inc i
  706. inc result
  707. of '\l': inc result
  708. else: discard
  709. inc i
  710. proc split*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): seq[string] {.
  711. noSideEffect, rtl, extern: "nsuSplitCharSet".} =
  712. ## The same as the `split iterator <#split.i,string,set[char],int>`_, but is a
  713. ## proc that returns a sequence of substrings.
  714. runnableExamples:
  715. doAssert "a,b;c".split({',', ';'}) == @["a", "b", "c"]
  716. doAssert "".split({' '}) == @[""]
  717. accResult(split(s, seps, maxsplit))
  718. proc split*(s: string, sep: char, maxsplit: int = -1): seq[string] {.noSideEffect,
  719. rtl, extern: "nsuSplitChar".} =
  720. ## The same as the `split iterator <#split.i,string,char,int>`_, but is a proc
  721. ## that returns a sequence of substrings.
  722. runnableExamples:
  723. doAssert "a,b,c".split(',') == @["a", "b", "c"]
  724. doAssert "".split(' ') == @[""]
  725. accResult(split(s, sep, maxsplit))
  726. proc split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.noSideEffect,
  727. rtl, extern: "nsuSplitString".} =
  728. ## Splits the string `s` into substrings using a string separator.
  729. ##
  730. ## Substrings are separated by the string `sep`. This is a wrapper around the
  731. ## `split iterator <#split.i,string,string,int>`_.
  732. runnableExamples:
  733. doAssert "a,b,c".split(",") == @["a", "b", "c"]
  734. doAssert "a man a plan a canal panama".split("a ") == @["", "man ", "plan ", "canal panama"]
  735. doAssert "".split("Elon Musk") == @[""]
  736. doAssert "a largely spaced sentence".split(" ") == @["a", "", "largely", "", "", "", "spaced", "sentence"]
  737. doAssert "a largely spaced sentence".split(" ", maxsplit=1) == @["a", " largely spaced sentence"]
  738. doAssert(sep.len > 0)
  739. accResult(split(s, sep, maxsplit))
  740. proc rsplit*(s: string, seps: set[char] = Whitespace,
  741. maxsplit: int = -1): seq[string]
  742. {.noSideEffect, rtl, extern: "nsuRSplitCharSet".} =
  743. ## The same as the `rsplit iterator <#rsplit.i,string,set[char],int>`_, but is a
  744. ## proc that returns a sequence of substrings.
  745. ##
  746. ## A possible common use case for `rsplit` is path manipulation,
  747. ## particularly on systems that don't use a common delimiter.
  748. ##
  749. ## For example, if a system had `#` as a delimiter, you could
  750. ## do the following to get the tail of the path:
  751. ##
  752. ## .. code-block:: nim
  753. ## var tailSplit = rsplit("Root#Object#Method#Index", {'#'}, maxsplit=1)
  754. ##
  755. ## Results in `tailSplit` containing:
  756. ##
  757. ## .. code-block:: nim
  758. ## @["Root#Object#Method", "Index"]
  759. ##
  760. accResult(rsplit(s, seps, maxsplit))
  761. result.reverse()
  762. proc rsplit*(s: string, sep: char, maxsplit: int = -1): seq[string]
  763. {.noSideEffect, rtl, extern: "nsuRSplitChar".} =
  764. ## The same as the `rsplit iterator <#rsplit.i,string,char,int>`_, but is a proc
  765. ## that returns a sequence of substrings.
  766. ##
  767. ## A possible common use case for `rsplit` is path manipulation,
  768. ## particularly on systems that don't use a common delimiter.
  769. ##
  770. ## For example, if a system had `#` as a delimiter, you could
  771. ## do the following to get the tail of the path:
  772. ##
  773. ## .. code-block:: nim
  774. ## var tailSplit = rsplit("Root#Object#Method#Index", '#', maxsplit=1)
  775. ##
  776. ## Results in `tailSplit` containing:
  777. ##
  778. ## .. code-block:: nim
  779. ## @["Root#Object#Method", "Index"]
  780. ##
  781. accResult(rsplit(s, sep, maxsplit))
  782. result.reverse()
  783. proc rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string]
  784. {.noSideEffect, rtl, extern: "nsuRSplitString".} =
  785. ## The same as the `rsplit iterator <#rsplit.i,string,string,int>`_, but is a proc
  786. ## that returns a sequence of substrings.
  787. ##
  788. ## A possible common use case for `rsplit` is path manipulation,
  789. ## particularly on systems that don't use a common delimiter.
  790. ##
  791. ## For example, if a system had `#` as a delimiter, you could
  792. ## do the following to get the tail of the path:
  793. ##
  794. ## .. code-block:: nim
  795. ## var tailSplit = rsplit("Root#Object#Method#Index", "#", maxsplit=1)
  796. ##
  797. ## Results in `tailSplit` containing:
  798. ##
  799. ## .. code-block:: nim
  800. ## @["Root#Object#Method", "Index"]
  801. ##
  802. runnableExamples:
  803. doAssert "a largely spaced sentence".rsplit(" ", maxsplit=1) == @["a largely spaced", "sentence"]
  804. doAssert "a,b,c".rsplit(",") == @["a", "b", "c"]
  805. doAssert "a man a plan a canal panama".rsplit("a ") == @["", "man ", "plan ", "canal panama"]
  806. doAssert "".rsplit("Elon Musk") == @[""]
  807. doAssert "a largely spaced sentence".rsplit(" ") == @["a", "", "largely", "", "", "", "spaced", "sentence"]
  808. accResult(rsplit(s, sep, maxsplit))
  809. result.reverse()
  810. proc toHex*(x: BiggestInt, len: Positive): string {.noSideEffect,
  811. rtl, extern: "nsuToHex".} =
  812. ## Converts `x` to its hexadecimal representation.
  813. ##
  814. ## The resulting string will be exactly `len` characters long. No prefix like
  815. ## ``0x`` is generated. `x` is treated as an unsigned value.
  816. runnableExamples:
  817. doAssert toHex(1984, 6) == "0007C0"
  818. doAssert toHex(1984, 2) == "C0"
  819. const
  820. HexChars = "0123456789ABCDEF"
  821. var
  822. n = x
  823. result = newString(len)
  824. for j in countdown(len-1, 0):
  825. result[j] = HexChars[int(n and 0xF)]
  826. n = n shr 4
  827. # handle negative overflow
  828. if n == 0 and x < 0: n = -1
  829. proc toHex*[T: SomeInteger](x: T): string =
  830. ## Shortcut for ``toHex(x, T.sizeOf * 2)``
  831. runnableExamples:
  832. doAssert toHex(1984'i64) == "00000000000007C0"
  833. toHex(BiggestInt(x), T.sizeOf * 2)
  834. proc toHex*(s: string): string {.noSideEffect, rtl.} =
  835. ## Converts a bytes string to its hexadecimal representation.
  836. ##
  837. ## The output is twice the input long. No prefix like
  838. ## ``0x`` is generated.
  839. const HexChars = "0123456789ABCDEF"
  840. result = newString(s.len * 2)
  841. for pos, c in s:
  842. var n = ord(c)
  843. result[pos * 2 + 1] = HexChars[n and 0xF]
  844. n = n shr 4
  845. result[pos * 2] = HexChars[n]
  846. proc intToStr*(x: int, minchars: Positive = 1): string {.noSideEffect,
  847. rtl, extern: "nsuIntToStr".} =
  848. ## Converts `x` to its decimal representation.
  849. ##
  850. ## The resulting string will be minimally `minchars` characters long. This is
  851. ## achieved by adding leading zeros.
  852. runnableExamples:
  853. doAssert intToStr(1984) == "1984"
  854. doAssert intToStr(1984, 6) == "001984"
  855. result = $abs(x)
  856. for i in 1 .. minchars - len(result):
  857. result = '0' & result
  858. if x < 0:
  859. result = '-' & result
  860. proc parseInt*(s: string): int {.noSideEffect, procvar,
  861. rtl, extern: "nsuParseInt".} =
  862. ## Parses a decimal integer value contained in `s`.
  863. ##
  864. ## If `s` is not a valid integer, `ValueError` is raised.
  865. runnableExamples:
  866. doAssert parseInt("-0042") == -42
  867. let L = parseutils.parseInt(s, result, 0)
  868. if L != s.len or L == 0:
  869. raise newException(ValueError, "invalid integer: " & s)
  870. proc parseBiggestInt*(s: string): BiggestInt {.noSideEffect, procvar,
  871. rtl, extern: "nsuParseBiggestInt".} =
  872. ## Parses a decimal integer value contained in `s`.
  873. ##
  874. ## If `s` is not a valid integer, `ValueError` is raised.
  875. let L = parseutils.parseBiggestInt(s, result, 0)
  876. if L != s.len or L == 0:
  877. raise newException(ValueError, "invalid integer: " & s)
  878. proc parseUInt*(s: string): uint {.noSideEffect, procvar,
  879. rtl, extern: "nsuParseUInt".} =
  880. ## Parses a decimal unsigned integer value contained in `s`.
  881. ##
  882. ## If `s` is not a valid integer, `ValueError` is raised.
  883. let L = parseutils.parseUInt(s, result, 0)
  884. if L != s.len or L == 0:
  885. raise newException(ValueError, "invalid unsigned integer: " & s)
  886. proc parseBiggestUInt*(s: string): BiggestUInt {.noSideEffect, procvar,
  887. rtl, extern: "nsuParseBiggestUInt".} =
  888. ## Parses a decimal unsigned integer value contained in `s`.
  889. ##
  890. ## If `s` is not a valid integer, `ValueError` is raised.
  891. let L = parseutils.parseBiggestUInt(s, result, 0)
  892. if L != s.len or L == 0:
  893. raise newException(ValueError, "invalid unsigned integer: " & s)
  894. proc parseFloat*(s: string): float {.noSideEffect, procvar,
  895. rtl, extern: "nsuParseFloat".} =
  896. ## Parses a decimal floating point value contained in `s`. If `s` is not
  897. ## a valid floating point number, `ValueError` is raised. ``NAN``,
  898. ## ``INF``, ``-INF`` are also supported (case insensitive comparison).
  899. runnableExamples:
  900. doAssert parseFloat("3.14") == 3.14
  901. doAssert parseFloat("inf") == 1.0/0
  902. let L = parseutils.parseFloat(s, result, 0)
  903. if L != s.len or L == 0:
  904. raise newException(ValueError, "invalid float: " & s)
  905. proc parseBinInt*(s: string): int {.noSideEffect, procvar,
  906. rtl, extern: "nsuParseBinInt".} =
  907. ## Parses a binary integer value contained in `s`.
  908. ##
  909. ## If `s` is not a valid binary integer, `ValueError` is raised. `s` can have
  910. ## one of the following optional prefixes: ``0b``, ``0B``. Underscores within
  911. ## `s` are ignored.
  912. let L = parseutils.parseBin(s, result, 0)
  913. if L != s.len or L == 0:
  914. raise newException(ValueError, "invalid binary integer: " & s)
  915. proc parseOctInt*(s: string): int {.noSideEffect,
  916. rtl, extern: "nsuParseOctInt".} =
  917. ## Parses an octal integer value contained in `s`.
  918. ##
  919. ## If `s` is not a valid oct integer, `ValueError` is raised. `s` can have one
  920. ## of the following optional prefixes: ``0o``, ``0O``. Underscores within
  921. ## `s` are ignored.
  922. let L = parseutils.parseOct(s, result, 0)
  923. if L != s.len or L == 0:
  924. raise newException(ValueError, "invalid oct integer: " & s)
  925. proc parseHexInt*(s: string): int {.noSideEffect, procvar,
  926. rtl, extern: "nsuParseHexInt".} =
  927. ## Parses a hexadecimal integer value contained in `s`.
  928. ##
  929. ## If `s` is not a valid hex integer, `ValueError` is raised. `s` can have one
  930. ## of the following optional prefixes: ``0x``, ``0X``, ``#``. Underscores
  931. ## within `s` are ignored.
  932. let L = parseutils.parseHex(s, result, 0)
  933. if L != s.len or L == 0:
  934. raise newException(ValueError, "invalid hex integer: " & s)
  935. proc generateHexCharToValueMap(): string =
  936. ## Generate a string to map a hex digit to uint value
  937. result = ""
  938. for inp in 0..255:
  939. let ch = chr(inp)
  940. let o =
  941. case ch:
  942. of '0'..'9': inp - ord('0')
  943. of 'a'..'f': inp - ord('a') + 10
  944. of 'A'..'F': inp - ord('A') + 10
  945. else: 17 # indicates an invalid hex char
  946. result.add chr(o)
  947. const hexCharToValueMap = generateHexCharToValueMap()
  948. proc parseHexStr*(s: string): string {.noSideEffect, procvar,
  949. rtl, extern: "nsuParseHexStr".} =
  950. ## Convert hex-encoded string to byte string, e.g.:
  951. ##
  952. ## .. code-block:: nim
  953. ## hexToStr("00ff") == "\0\255"
  954. ##
  955. ## Raises ``ValueError`` for an invalid hex values. The comparison is
  956. ## case-insensitive.
  957. if s.len mod 2 != 0:
  958. raise newException(ValueError, "Incorrect hex string len")
  959. result = newString(s.len div 2)
  960. var buf = 0
  961. for pos, c in s:
  962. let val = hexCharToValueMap[ord(c)].ord
  963. if val == 17:
  964. raise newException(ValueError, "Invalid hex char " & repr(c))
  965. if pos mod 2 == 0:
  966. buf = val
  967. else:
  968. result[pos div 2] = chr(val + buf shl 4)
  969. proc parseBool*(s: string): bool =
  970. ## Parses a value into a `bool`.
  971. ##
  972. ## If ``s`` is one of the following values: ``y, yes, true, 1, on``, then
  973. ## returns `true`. If ``s`` is one of the following values: ``n, no, false,
  974. ## 0, off``, then returns `false`. If ``s`` is something else a
  975. ## ``ValueError`` exception is raised.
  976. case normalize(s)
  977. of "y", "yes", "true", "1", "on": result = true
  978. of "n", "no", "false", "0", "off": result = false
  979. else: raise newException(ValueError, "cannot interpret as a bool: " & s)
  980. proc parseEnum*[T: enum](s: string): T =
  981. ## Parses an enum ``T``.
  982. ##
  983. ## Raises ``ValueError`` for an invalid value in `s`. The comparison is
  984. ## done in a style insensitive way.
  985. for e in low(T)..high(T):
  986. if cmpIgnoreStyle(s, $e) == 0:
  987. return e
  988. raise newException(ValueError, "invalid enum value: " & s)
  989. proc parseEnum*[T: enum](s: string, default: T): T =
  990. ## Parses an enum ``T``.
  991. ##
  992. ## Uses `default` for an invalid value in `s`. The comparison is done in a
  993. ## style insensitive way.
  994. for e in low(T)..high(T):
  995. if cmpIgnoreStyle(s, $e) == 0:
  996. return e
  997. result = default
  998. proc repeat*(c: char, count: Natural): string {.noSideEffect,
  999. rtl, extern: "nsuRepeatChar".} =
  1000. ## Returns a string of length `count` consisting only of
  1001. ## the character `c`. You can use this proc to left align strings. Example:
  1002. ##
  1003. ## .. code-block:: nim
  1004. ## proc tabexpand(indent: int, text: string, tabsize: int = 4) =
  1005. ## echo '\t'.repeat(indent div tabsize), ' '.repeat(indent mod tabsize),
  1006. ## text
  1007. ##
  1008. ## tabexpand(4, "At four")
  1009. ## tabexpand(5, "At five")
  1010. ## tabexpand(6, "At six")
  1011. result = newString(count)
  1012. for i in 0..count-1: result[i] = c
  1013. proc repeat*(s: string, n: Natural): string {.noSideEffect,
  1014. rtl, extern: "nsuRepeatStr".} =
  1015. ## Returns String `s` concatenated `n` times. Example:
  1016. ##
  1017. ## .. code-block:: nim
  1018. ## echo "+++ STOP ".repeat(4), "+++"
  1019. result = newStringOfCap(n * s.len)
  1020. for i in 1..n: result.add(s)
  1021. template spaces*(n: Natural): string = repeat(' ', n)
  1022. ## Returns a String with `n` space characters. You can use this proc
  1023. ## to left align strings. Example:
  1024. ##
  1025. ## .. code-block:: nim
  1026. ## let
  1027. ## width = 15
  1028. ## text1 = "Hello user!"
  1029. ## text2 = "This is a very long string"
  1030. ## echo text1 & spaces(max(0, width - text1.len)) & "|"
  1031. ## echo text2 & spaces(max(0, width - text2.len)) & "|"
  1032. proc align*(s: string, count: Natural, padding = ' '): string {.
  1033. noSideEffect, rtl, extern: "nsuAlignString".} =
  1034. ## Aligns a string `s` with `padding`, so that it is of length `count`.
  1035. ##
  1036. ## `padding` characters (by default spaces) are added before `s` resulting in
  1037. ## right alignment. If ``s.len >= count``, no spaces are added and `s` is
  1038. ## returned unchanged. If you need to left align a string use the `alignLeft
  1039. ## proc <#alignLeft>`_. Example:
  1040. ##
  1041. ## .. code-block:: nim
  1042. ## assert align("abc", 4) == " abc"
  1043. ## assert align("a", 0) == "a"
  1044. ## assert align("1232", 6) == " 1232"
  1045. ## assert align("1232", 6, '#') == "##1232"
  1046. if s.len < count:
  1047. result = newString(count)
  1048. let spaces = count - s.len
  1049. for i in 0..spaces-1: result[i] = padding
  1050. for i in spaces..count-1: result[i] = s[i-spaces]
  1051. else:
  1052. result = s
  1053. proc alignLeft*(s: string, count: Natural, padding = ' '): string {.noSideEffect.} =
  1054. ## Left-Aligns a string `s` with `padding`, so that it is of length `count`.
  1055. ##
  1056. ## `padding` characters (by default spaces) are added after `s` resulting in
  1057. ## left alignment. If ``s.len >= count``, no spaces are added and `s` is
  1058. ## returned unchanged. If you need to right align a string use the `align
  1059. ## proc <#align>`_. Example:
  1060. ##
  1061. ## .. code-block:: nim
  1062. ## assert alignLeft("abc", 4) == "abc "
  1063. ## assert alignLeft("a", 0) == "a"
  1064. ## assert alignLeft("1232", 6) == "1232 "
  1065. ## assert alignLeft("1232", 6, '#') == "1232##"
  1066. if s.len < count:
  1067. result = newString(count)
  1068. if s.len > 0:
  1069. result[0 .. (s.len - 1)] = s
  1070. for i in s.len ..< count:
  1071. result[i] = padding
  1072. else:
  1073. result = s
  1074. iterator tokenize*(s: string, seps: set[char] = Whitespace): tuple[
  1075. token: string, isSep: bool] =
  1076. ## Tokenizes the string `s` into substrings.
  1077. ##
  1078. ## Substrings are separated by a substring containing only `seps`.
  1079. ## Examples:
  1080. ##
  1081. ## .. code-block:: nim
  1082. ## for word in tokenize(" this is an example "):
  1083. ## writeLine(stdout, word)
  1084. ##
  1085. ## Results in:
  1086. ##
  1087. ## .. code-block:: nim
  1088. ## (" ", true)
  1089. ## ("this", false)
  1090. ## (" ", true)
  1091. ## ("is", false)
  1092. ## (" ", true)
  1093. ## ("an", false)
  1094. ## (" ", true)
  1095. ## ("example", false)
  1096. ## (" ", true)
  1097. var i = 0
  1098. while true:
  1099. var j = i
  1100. var isSep = j < s.len and s[j] in seps
  1101. while j < s.len and (s[j] in seps) == isSep: inc(j)
  1102. if j > i:
  1103. yield (substr(s, i, j-1), isSep)
  1104. else:
  1105. break
  1106. i = j
  1107. proc wordWrap*(s: string, maxLineWidth = 80,
  1108. splitLongWords = true,
  1109. seps: set[char] = Whitespace,
  1110. newLine = "\n"): string {.
  1111. noSideEffect, rtl, extern: "nsuWordWrap".} =
  1112. ## Word wraps `s`.
  1113. result = newStringOfCap(s.len + s.len shr 6)
  1114. var spaceLeft = maxLineWidth
  1115. var lastSep = ""
  1116. for word, isSep in tokenize(s, seps):
  1117. if isSep:
  1118. lastSep = word
  1119. spaceLeft = spaceLeft - len(word)
  1120. continue
  1121. if len(word) > spaceLeft:
  1122. if splitLongWords and len(word) > maxLineWidth:
  1123. result.add(substr(word, 0, spaceLeft-1))
  1124. var w = spaceLeft
  1125. var wordLeft = len(word) - spaceLeft
  1126. while wordLeft > 0:
  1127. result.add(newLine)
  1128. var L = min(maxLineWidth, wordLeft)
  1129. spaceLeft = maxLineWidth - L
  1130. result.add(substr(word, w, w+L-1))
  1131. inc(w, L)
  1132. dec(wordLeft, L)
  1133. else:
  1134. spaceLeft = maxLineWidth - len(word)
  1135. result.add(newLine)
  1136. result.add(word)
  1137. else:
  1138. spaceLeft = spaceLeft - len(word)
  1139. result.add(lastSep & word)
  1140. lastSep.setLen(0)
  1141. proc indent*(s: string, count: Natural, padding: string = " "): string
  1142. {.noSideEffect, rtl, extern: "nsuIndent".} =
  1143. ## Indents each line in ``s`` by ``count`` amount of ``padding``.
  1144. ##
  1145. ## **Note:** This does not preserve the new line characters used in ``s``.
  1146. runnableExamples:
  1147. doAssert indent("First line\c\l and second line.", 2) == " First line\l and second line."
  1148. result = ""
  1149. var i = 0
  1150. for line in s.splitLines():
  1151. if i != 0:
  1152. result.add("\n")
  1153. for j in 1..count:
  1154. result.add(padding)
  1155. result.add(line)
  1156. i.inc
  1157. proc unindent*(s: string, count: Natural, padding: string = " "): string
  1158. {.noSideEffect, rtl, extern: "nsuUnindent".} =
  1159. ## Unindents each line in ``s`` by ``count`` amount of ``padding``.
  1160. ## Sometimes called `dedent`:idx:
  1161. ##
  1162. ## **Note:** This does not preserve the new line characters used in ``s``.
  1163. runnableExamples:
  1164. doAssert unindent(" First line\l and second line", 3) == "First line\land second line"
  1165. result = ""
  1166. var i = 0
  1167. for line in s.splitLines():
  1168. if i != 0:
  1169. result.add("\n")
  1170. var indentCount = 0
  1171. for j in 0..<count.int:
  1172. indentCount.inc
  1173. if j + padding.len-1 >= line.len or line[j .. j + padding.len-1] != padding:
  1174. indentCount = j
  1175. break
  1176. result.add(line[indentCount*padding.len .. ^1])
  1177. i.inc
  1178. proc unindent*(s: string): string
  1179. {.noSideEffect, rtl, extern: "nsuUnindentAll".} =
  1180. ## Removes all indentation composed of whitespace from each line in ``s``.
  1181. ##
  1182. ## For example:
  1183. ##
  1184. ## .. code-block:: nim
  1185. ## const x = """
  1186. ## Hello
  1187. ## There
  1188. ## """.unindent()
  1189. ##
  1190. ## doAssert x == "Hello\nThere\n"
  1191. unindent(s, 1000) # TODO: Passing a 1000 is a bit hackish.
  1192. proc startsWith*(s, prefix: string): bool {.noSideEffect,
  1193. rtl, extern: "nsuStartsWith".} =
  1194. ## Returns true iff ``s`` starts with ``prefix``.
  1195. ##
  1196. ## If ``prefix == ""`` true is returned.
  1197. var i = 0
  1198. while true:
  1199. if i >= prefix.len: return true
  1200. if i >= s.len or s[i] != prefix[i]: return false
  1201. inc(i)
  1202. proc startsWith*(s: string, prefix: char): bool {.noSideEffect, inline.} =
  1203. ## Returns true iff ``s`` starts with ``prefix``.
  1204. result = s.len > 0 and s[0] == prefix
  1205. proc endsWith*(s, suffix: string): bool {.noSideEffect,
  1206. rtl, extern: "nsuEndsWith".} =
  1207. ## Returns true iff ``s`` ends with ``suffix``.
  1208. ##
  1209. ## If ``suffix == ""`` true is returned.
  1210. var i = 0
  1211. var j = len(s) - len(suffix)
  1212. while i+j <% s.len:
  1213. if s[i+j] != suffix[i]: return false
  1214. inc(i)
  1215. if i >= suffix.len: return true
  1216. proc endsWith*(s: string, suffix: char): bool {.noSideEffect, inline.} =
  1217. ## Returns true iff ``s`` ends with ``suffix``.
  1218. result = s.len > 0 and s[s.high] == suffix
  1219. proc continuesWith*(s, substr: string, start: Natural): bool {.noSideEffect,
  1220. rtl, extern: "nsuContinuesWith".} =
  1221. ## Returns true iff ``s`` continues with ``substr`` at position ``start``.
  1222. ##
  1223. ## If ``substr == ""`` true is returned.
  1224. var i = 0
  1225. while true:
  1226. if i >= substr.len: return true
  1227. if i+start >= s.len or s[i+start] != substr[i]: return false
  1228. inc(i)
  1229. proc addSep*(dest: var string, sep = ", ", startLen: Natural = 0)
  1230. {.noSideEffect, inline.} =
  1231. ## Adds a separator to `dest` only if its length is bigger than `startLen`.
  1232. ##
  1233. ## A shorthand for:
  1234. ##
  1235. ## .. code-block:: nim
  1236. ## if dest.len > startLen: add(dest, sep)
  1237. ##
  1238. ## This is often useful for generating some code where the items need to
  1239. ## be *separated* by `sep`. `sep` is only added if `dest` is longer than
  1240. ## `startLen`. The following example creates a string describing
  1241. ## an array of integers.
  1242. runnableExamples:
  1243. var arr = "["
  1244. for x in items([2, 3, 5, 7, 11]):
  1245. addSep(arr, startLen=len("["))
  1246. add(arr, $x)
  1247. add(arr, "]")
  1248. if dest.len > startLen: add(dest, sep)
  1249. proc allCharsInSet*(s: string, theSet: set[char]): bool =
  1250. ## Returns true iff each character of `s` is in the set `theSet`.
  1251. runnableExamples:
  1252. doAssert allCharsInSet("aeea", {'a', 'e'}) == true
  1253. doAssert allCharsInSet("", {'a', 'e'}) == true
  1254. for c in items(s):
  1255. if c notin theSet: return false
  1256. return true
  1257. proc abbrev*(s: string, possibilities: openArray[string]): int =
  1258. ## Returns the index of the first item in ``possibilities`` which starts with ``s``, if not ambiguous.
  1259. ##
  1260. ## Returns -1 if no item has been found and -2 if multiple items match.
  1261. runnableExamples:
  1262. doAssert abbrev("fac", ["college", "faculty", "industry"]) == 1
  1263. doAssert abbrev("foo", ["college", "faculty", "industry"]) == -1 # Not found
  1264. doAssert abbrev("fac", ["college", "faculty", "faculties"]) == -2 # Ambiguous
  1265. doAssert abbrev("college", ["college", "colleges", "industry"]) == 0
  1266. result = -1 # none found
  1267. for i in 0..possibilities.len-1:
  1268. if possibilities[i].startsWith(s):
  1269. if possibilities[i] == s:
  1270. # special case: exact match shouldn't be ambiguous
  1271. return i
  1272. if result >= 0: return -2 # ambiguous
  1273. result = i
  1274. # ---------------------------------------------------------------------------
  1275. proc join*(a: openArray[string], sep: string = ""): string {.
  1276. noSideEffect, rtl, extern: "nsuJoinSep".} =
  1277. ## Concatenates all strings in `a` separating them with `sep`.
  1278. runnableExamples:
  1279. doAssert join(["A", "B", "Conclusion"], " -> ") == "A -> B -> Conclusion"
  1280. if len(a) > 0:
  1281. var L = sep.len * (a.len-1)
  1282. for i in 0..high(a): inc(L, a[i].len)
  1283. result = newStringOfCap(L)
  1284. add(result, a[0])
  1285. for i in 1..high(a):
  1286. add(result, sep)
  1287. add(result, a[i])
  1288. else:
  1289. result = ""
  1290. proc join*[T: not string](a: openArray[T], sep: string = ""): string {.
  1291. noSideEffect, rtl.} =
  1292. ## Converts all elements in `a` to strings using `$` and concatenates them
  1293. ## with `sep`.
  1294. runnableExamples:
  1295. doAssert join([1, 2, 3], " -> ") == "1 -> 2 -> 3"
  1296. result = ""
  1297. for i, x in a:
  1298. if i > 0:
  1299. add(result, sep)
  1300. add(result, $x)
  1301. type
  1302. SkipTable* = array[char, int]
  1303. proc initSkipTable*(a: var SkipTable, sub: string)
  1304. {.noSideEffect, rtl, extern: "nsuInitSkipTable".} =
  1305. ## Preprocess table `a` for `sub`.
  1306. let m = len(sub)
  1307. var i = 0
  1308. while i <= 0xff-7:
  1309. a[chr(i + 0)] = m
  1310. a[chr(i + 1)] = m
  1311. a[chr(i + 2)] = m
  1312. a[chr(i + 3)] = m
  1313. a[chr(i + 4)] = m
  1314. a[chr(i + 5)] = m
  1315. a[chr(i + 6)] = m
  1316. a[chr(i + 7)] = m
  1317. i += 8
  1318. for i in 0 ..< m - 1:
  1319. a[sub[i]] = m - 1 - i
  1320. proc find*(a: SkipTable, s, sub: string, start: Natural = 0, last = 0): int
  1321. {.noSideEffect, rtl, extern: "nsuFindStrA".} =
  1322. ## Searches for `sub` in `s` inside range `start`..`last` using preprocessed table `a`.
  1323. ## If `last` is unspecified, it defaults to `s.high`.
  1324. ##
  1325. ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
  1326. let
  1327. last = if last==0: s.high else: last
  1328. sLen = last - start + 1
  1329. subLast = sub.len - 1
  1330. if subLast == -1:
  1331. # this was an empty needle string,
  1332. # we count this as match in the first possible position:
  1333. return start
  1334. # This is an implementation of the Boyer-Moore Horspool algorithms
  1335. # https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm
  1336. var skip = start
  1337. while last - skip >= subLast:
  1338. var i = subLast
  1339. while s[skip + i] == sub[i]:
  1340. if i == 0:
  1341. return skip
  1342. dec i
  1343. inc skip, a[s[skip + subLast]]
  1344. return -1
  1345. when not (defined(js) or defined(nimdoc) or defined(nimscript)):
  1346. proc c_memchr(cstr: pointer, c: char, n: csize): pointer {.
  1347. importc: "memchr", header: "<string.h>" .}
  1348. const hasCStringBuiltin = true
  1349. else:
  1350. const hasCStringBuiltin = false
  1351. proc find*(s: string, sub: char, start: Natural = 0, last = 0): int {.noSideEffect,
  1352. rtl, extern: "nsuFindChar".} =
  1353. ## Searches for `sub` in `s` inside range `start`..`last`.
  1354. ## If `last` is unspecified, it defaults to `s.high`.
  1355. ##
  1356. ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
  1357. let last = if last==0: s.high else: last
  1358. when nimvm:
  1359. for i in int(start)..last:
  1360. if sub == s[i]: return i
  1361. else:
  1362. when hasCStringBuiltin:
  1363. let L = last-start+1
  1364. if L > 0:
  1365. let found = c_memchr(s[start].unsafeAddr, sub, L)
  1366. if not found.isNil:
  1367. return cast[ByteAddress](found) -% cast[ByteAddress](s.cstring)
  1368. else:
  1369. for i in int(start)..last:
  1370. if sub == s[i]: return i
  1371. return -1
  1372. proc find*(s, sub: string, start: Natural = 0, last = 0): int {.noSideEffect,
  1373. rtl, extern: "nsuFindStr".} =
  1374. ## Searches for `sub` in `s` inside range `start`..`last`.
  1375. ## If `last` is unspecified, it defaults to `s.high`.
  1376. ##
  1377. ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
  1378. if sub.len > s.len: return -1
  1379. if sub.len == 1: return find(s, sub[0], start, last)
  1380. var a {.noinit.}: SkipTable
  1381. initSkipTable(a, sub)
  1382. result = find(a, s, sub, start, last)
  1383. proc find*(s: string, chars: set[char], start: Natural = 0, last = 0): int {.noSideEffect,
  1384. rtl, extern: "nsuFindCharSet".} =
  1385. ## Searches for `chars` in `s` inside range `start`..`last`.
  1386. ## If `last` is unspecified, it defaults to `s.high`.
  1387. ##
  1388. ## If `s` contains none of the characters in `chars`, -1 is returned.
  1389. let last = if last==0: s.high else: last
  1390. for i in int(start)..last:
  1391. if s[i] in chars: return i
  1392. return -1
  1393. proc rfind*(s, sub: string, start: int = -1): int {.noSideEffect.} =
  1394. ## Searches for `sub` in `s` in reverse, starting at `start` and going
  1395. ## backwards to 0.
  1396. ##
  1397. ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
  1398. if sub.len == 0:
  1399. return -1
  1400. let realStart = if start == -1: s.len else: start
  1401. for i in countdown(realStart-sub.len, 0):
  1402. for j in 0..sub.len-1:
  1403. result = i
  1404. if sub[j] != s[i+j]:
  1405. result = -1
  1406. break
  1407. if result != -1: return
  1408. return -1
  1409. proc rfind*(s: string, sub: char, start: int = -1): int {.noSideEffect,
  1410. rtl.} =
  1411. ## Searches for `sub` in `s` in reverse starting at position `start`.
  1412. ##
  1413. ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
  1414. let realStart = if start == -1: s.len-1 else: start
  1415. for i in countdown(realStart, 0):
  1416. if sub == s[i]: return i
  1417. return -1
  1418. proc rfind*(s: string, chars: set[char], start: int = -1): int {.noSideEffect.} =
  1419. ## Searches for `chars` in `s` in reverse starting at position `start`.
  1420. ##
  1421. ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
  1422. let realStart = if start == -1: s.len-1 else: start
  1423. for i in countdown(realStart, 0):
  1424. if s[i] in chars: return i
  1425. return -1
  1426. proc center*(s: string, width: int, fillChar: char = ' '): string {.
  1427. noSideEffect, rtl, extern: "nsuCenterString".} =
  1428. ## Return the contents of `s` centered in a string `width` long using
  1429. ## `fillChar` as padding.
  1430. ##
  1431. ## The original string is returned if `width` is less than or equal
  1432. ## to `s.len`.
  1433. if width <= s.len: return s
  1434. result = newString(width)
  1435. # Left padding will be one fillChar
  1436. # smaller if there are an odd number
  1437. # of characters
  1438. let
  1439. charsLeft = (width - s.len)
  1440. leftPadding = charsLeft div 2
  1441. for i in 0 ..< width:
  1442. if i >= leftPadding and i < leftPadding + s.len:
  1443. # we are where the string should be located
  1444. result[i] = s[i-leftPadding]
  1445. else:
  1446. # we are either before or after where
  1447. # the string s should go
  1448. result[i] = fillChar
  1449. proc count*(s: string, sub: string, overlapping: bool = false): int {.
  1450. noSideEffect, rtl, extern: "nsuCountString".} =
  1451. ## Count the occurrences of a substring `sub` in the string `s`.
  1452. ## Overlapping occurrences of `sub` only count when `overlapping`
  1453. ## is set to true.
  1454. doAssert sub.len > 0
  1455. var i = 0
  1456. while true:
  1457. i = s.find(sub, i)
  1458. if i < 0: break
  1459. if overlapping: inc i
  1460. else: i += sub.len
  1461. inc result
  1462. proc count*(s: string, sub: char): int {.noSideEffect,
  1463. rtl, extern: "nsuCountChar".} =
  1464. ## Count the occurrences of the character `sub` in the string `s`.
  1465. for c in s:
  1466. if c == sub: inc result
  1467. proc count*(s: string, subs: set[char]): int {.noSideEffect,
  1468. rtl, extern: "nsuCountCharSet".} =
  1469. ## Count the occurrences of the group of character `subs` in the string `s`.
  1470. doAssert card(subs) > 0
  1471. for c in s:
  1472. if c in subs: inc result
  1473. proc quoteIfContainsWhite*(s: string): string {.deprecated.} =
  1474. ## Returns ``'"' & s & '"'`` if `s` contains a space and does not
  1475. ## start with a quote, else returns `s`.
  1476. ##
  1477. ## **DEPRECATED** as it was confused for shell quoting function. For this
  1478. ## application use `osproc.quoteShell <osproc.html#quoteShell>`_.
  1479. if find(s, {' ', '\t'}) >= 0 and s[0] != '"': result = '"' & s & '"'
  1480. else: result = s
  1481. proc contains*(s: string, c: char): bool {.noSideEffect.} =
  1482. ## Same as ``find(s, c) >= 0``.
  1483. return find(s, c) >= 0
  1484. proc contains*(s, sub: string): bool {.noSideEffect.} =
  1485. ## Same as ``find(s, sub) >= 0``.
  1486. return find(s, sub) >= 0
  1487. proc contains*(s: string, chars: set[char]): bool {.noSideEffect.} =
  1488. ## Same as ``find(s, chars) >= 0``.
  1489. return find(s, chars) >= 0
  1490. proc replace*(s, sub: string, by = ""): string {.noSideEffect,
  1491. rtl, extern: "nsuReplaceStr".} =
  1492. ## Replaces `sub` in `s` by the string `by`.
  1493. result = ""
  1494. let subLen = sub.len
  1495. if subLen == 0:
  1496. result = s
  1497. elif subLen == 1:
  1498. # when the pattern is a single char, we use a faster
  1499. # char-based search that doesn't need a skip table:
  1500. let c = sub[0]
  1501. let last = s.high
  1502. var i = 0
  1503. while true:
  1504. let j = find(s, c, i, last)
  1505. if j < 0: break
  1506. add result, substr(s, i, j - 1)
  1507. add result, by
  1508. i = j + subLen
  1509. # copy the rest:
  1510. add result, substr(s, i)
  1511. else:
  1512. var a {.noinit.}: SkipTable
  1513. initSkipTable(a, sub)
  1514. let last = s.high
  1515. var i = 0
  1516. while true:
  1517. let j = find(a, s, sub, i, last)
  1518. if j < 0: break
  1519. add result, substr(s, i, j - 1)
  1520. add result, by
  1521. i = j + subLen
  1522. # copy the rest:
  1523. add result, substr(s, i)
  1524. proc replace*(s: string, sub, by: char): string {.noSideEffect,
  1525. rtl, extern: "nsuReplaceChar".} =
  1526. ## Replaces `sub` in `s` by the character `by`.
  1527. ##
  1528. ## Optimized version of `replace <#replace,string,string>`_ for characters.
  1529. result = newString(s.len)
  1530. var i = 0
  1531. while i < s.len:
  1532. if s[i] == sub: result[i] = by
  1533. else: result[i] = s[i]
  1534. inc(i)
  1535. proc replaceWord*(s, sub: string, by = ""): string {.noSideEffect,
  1536. rtl, extern: "nsuReplaceWord".} =
  1537. ## Replaces `sub` in `s` by the string `by`.
  1538. ##
  1539. ## Each occurrence of `sub` has to be surrounded by word boundaries
  1540. ## (comparable to ``\\w`` in regular expressions), otherwise it is not
  1541. ## replaced.
  1542. if sub.len == 0: return s
  1543. const wordChars = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\128'..'\255'}
  1544. var a {.noinit.}: SkipTable
  1545. result = ""
  1546. initSkipTable(a, sub)
  1547. var i = 0
  1548. let last = s.high
  1549. let sublen = sub.len
  1550. if sublen > 0:
  1551. while true:
  1552. var j = find(a, s, sub, i, last)
  1553. if j < 0: break
  1554. # word boundary?
  1555. if (j == 0 or s[j-1] notin wordChars) and
  1556. (j+sub.len >= s.len or s[j+sub.len] notin wordChars):
  1557. add result, substr(s, i, j - 1)
  1558. add result, by
  1559. i = j + sublen
  1560. else:
  1561. add result, substr(s, i, j)
  1562. i = j + 1
  1563. # copy the rest:
  1564. add result, substr(s, i)
  1565. proc multiReplace*(s: string, replacements: varargs[(string, string)]): string {.noSideEffect.} =
  1566. ## Same as replace, but specialized for doing multiple replacements in a single
  1567. ## pass through the input string.
  1568. ##
  1569. ## multiReplace performs all replacements in a single pass, this means it can be used
  1570. ## to swap the occurences of "a" and "b", for instance.
  1571. ##
  1572. ## If the resulting string is not longer than the original input string, only a single
  1573. ## memory allocation is required.
  1574. ##
  1575. ## The order of the replacements does matter. Earlier replacements are preferred over later
  1576. ## replacements in the argument list.
  1577. result = newStringOfCap(s.len)
  1578. var i = 0
  1579. var fastChk: set[char] = {}
  1580. for sub, by in replacements.items:
  1581. if sub.len > 0:
  1582. # Include first character of all replacements
  1583. fastChk.incl sub[0]
  1584. while i < s.len:
  1585. block sIteration:
  1586. # Assume most chars in s are not candidates for any replacement operation
  1587. if s[i] in fastChk:
  1588. for sub, by in replacements.items:
  1589. if sub.len > 0 and s.continuesWith(sub, i):
  1590. add result, by
  1591. inc(i, sub.len)
  1592. break sIteration
  1593. # No matching replacement found
  1594. # copy current character from s
  1595. add result, s[i]
  1596. inc(i)
  1597. proc delete*(s: var string, first, last: int) {.noSideEffect,
  1598. rtl, extern: "nsuDelete".} =
  1599. ## Deletes in `s` the characters at position `first` .. `last`.
  1600. ##
  1601. ## This modifies `s` itself, it does not return a copy.
  1602. var i = first
  1603. var j = last+1
  1604. var newLen = len(s)-j+i
  1605. while i < newLen:
  1606. s[i] = s[j]
  1607. inc(i)
  1608. inc(j)
  1609. setLen(s, newLen)
  1610. proc toOct*(x: BiggestInt, len: Positive): string {.noSideEffect,
  1611. rtl, extern: "nsuToOct".} =
  1612. ## Converts `x` into its octal representation.
  1613. ##
  1614. ## The resulting string is always `len` characters long. No leading ``0o``
  1615. ## prefix is generated.
  1616. var
  1617. mask: BiggestInt = 7
  1618. shift: BiggestInt = 0
  1619. assert(len > 0)
  1620. result = newString(len)
  1621. for j in countdown(len-1, 0):
  1622. result[j] = chr(int((x and mask) shr shift) + ord('0'))
  1623. shift = shift + 3
  1624. mask = mask shl 3
  1625. proc toBin*(x: BiggestInt, len: Positive): string {.noSideEffect,
  1626. rtl, extern: "nsuToBin".} =
  1627. ## Converts `x` into its binary representation.
  1628. ##
  1629. ## The resulting string is always `len` characters long. No leading ``0b``
  1630. ## prefix is generated.
  1631. var
  1632. mask: BiggestInt = 1
  1633. shift: BiggestInt = 0
  1634. assert(len > 0)
  1635. result = newString(len)
  1636. for j in countdown(len-1, 0):
  1637. result[j] = chr(int((x and mask) shr shift) + ord('0'))
  1638. shift = shift + 1
  1639. mask = mask shl 1
  1640. proc insertSep*(s: string, sep = '_', digits = 3): string {.noSideEffect,
  1641. rtl, extern: "nsuInsertSep".} =
  1642. ## Inserts the separator `sep` after `digits` digits from right to left.
  1643. ##
  1644. ## Even though the algorithm works with any string `s`, it is only useful
  1645. ## if `s` contains a number.
  1646. runnableExamples:
  1647. doAssert insertSep("1000000") == "1_000_000"
  1648. var L = (s.len-1) div digits + s.len
  1649. result = newString(L)
  1650. var j = 0
  1651. dec(L)
  1652. for i in countdown(len(s)-1, 0):
  1653. if j == digits:
  1654. result[L] = sep
  1655. dec(L)
  1656. j = 0
  1657. result[L] = s[i]
  1658. inc(j)
  1659. dec(L)
  1660. proc escape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect,
  1661. rtl, extern: "nsuEscape".} =
  1662. ## Escapes a string `s`. See `system.addEscapedChar <system.html#addEscapedChar>`_
  1663. ## for the escaping scheme.
  1664. ##
  1665. ## The resulting string is prefixed with `prefix` and suffixed with `suffix`.
  1666. ## Both may be empty strings.
  1667. result = newStringOfCap(s.len + s.len shr 2)
  1668. result.add(prefix)
  1669. for c in items(s):
  1670. case c
  1671. of '\0'..'\31', '\127'..'\255':
  1672. add(result, "\\x")
  1673. add(result, toHex(ord(c), 2))
  1674. of '\\': add(result, "\\\\")
  1675. of '\'': add(result, "\\'")
  1676. of '\"': add(result, "\\\"")
  1677. else: add(result, c)
  1678. add(result, suffix)
  1679. proc unescape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect,
  1680. rtl, extern: "nsuUnescape".} =
  1681. ## Unescapes a string `s`.
  1682. ##
  1683. ## This complements `escape <#escape>`_ as it performs the opposite
  1684. ## operations.
  1685. ##
  1686. ## If `s` does not begin with ``prefix`` and end with ``suffix`` a
  1687. ## ValueError exception will be raised.
  1688. result = newStringOfCap(s.len)
  1689. var i = prefix.len
  1690. if not s.startsWith(prefix):
  1691. raise newException(ValueError,
  1692. "String does not start with: " & prefix)
  1693. while true:
  1694. if i >= s.len-suffix.len: break
  1695. if s[i] == '\\':
  1696. if i+1 >= s.len:
  1697. result.add('\\')
  1698. break
  1699. case s[i+1]:
  1700. of 'x':
  1701. inc i, 2
  1702. var c: int
  1703. i += parseutils.parseHex(s, c, i, maxLen=2)
  1704. result.add(chr(c))
  1705. dec i, 2
  1706. of '\\':
  1707. result.add('\\')
  1708. of '\'':
  1709. result.add('\'')
  1710. of '\"':
  1711. result.add('\"')
  1712. else:
  1713. result.add("\\" & s[i+1])
  1714. inc(i, 2)
  1715. else:
  1716. result.add(s[i])
  1717. inc(i)
  1718. if not s.endsWith(suffix):
  1719. raise newException(ValueError,
  1720. "String does not end in: " & suffix)
  1721. proc validIdentifier*(s: string): bool {.noSideEffect,
  1722. rtl, extern: "nsuValidIdentifier".} =
  1723. ## Returns true if `s` is a valid identifier.
  1724. ##
  1725. ## A valid identifier starts with a character of the set `IdentStartChars`
  1726. ## and is followed by any number of characters of the set `IdentChars`.
  1727. runnableExamples:
  1728. doAssert "abc_def08".validIdentifier
  1729. if s.len > 0 and s[0] in IdentStartChars:
  1730. for i in 1..s.len-1:
  1731. if s[i] notin IdentChars: return false
  1732. return true
  1733. {.push warning[Deprecated]: off.}
  1734. proc editDistance*(a, b: string): int {.noSideEffect,
  1735. rtl, extern: "nsuEditDistance",
  1736. deprecated: "use editdistance.editDistanceAscii instead".} =
  1737. ## Returns the edit distance between `a` and `b`.
  1738. ##
  1739. ## This uses the `Levenshtein`:idx: distance algorithm with only a linear
  1740. ## memory overhead.
  1741. var len1 = a.len
  1742. var len2 = b.len
  1743. if len1 > len2:
  1744. # make `b` the longer string
  1745. return editDistance(b, a)
  1746. # strip common prefix:
  1747. var s = 0
  1748. while s < len1 and a[s] == b[s]:
  1749. inc(s)
  1750. dec(len1)
  1751. dec(len2)
  1752. # strip common suffix:
  1753. while len1 > 0 and len2 > 0 and a[s+len1-1] == b[s+len2-1]:
  1754. dec(len1)
  1755. dec(len2)
  1756. # trivial cases:
  1757. if len1 == 0: return len2
  1758. if len2 == 0: return len1
  1759. # another special case:
  1760. if len1 == 1:
  1761. for j in s..s+len2-1:
  1762. if a[s] == b[j]: return len2 - 1
  1763. return len2
  1764. inc(len1)
  1765. inc(len2)
  1766. var half = len1 shr 1
  1767. # initalize first row:
  1768. #var row = cast[ptr array[0..high(int) div 8, int]](alloc(len2*sizeof(int)))
  1769. var row: seq[int]
  1770. newSeq(row, len2)
  1771. var e = s + len2 - 1 # end marker
  1772. for i in 1..len2 - half - 1: row[i] = i
  1773. row[0] = len1 - half - 1
  1774. for i in 1 .. len1 - 1:
  1775. var char1 = a[i + s - 1]
  1776. var char2p: int
  1777. var D, x: int
  1778. var p: int
  1779. if i >= len1 - half:
  1780. # skip the upper triangle:
  1781. var offset = i - len1 + half
  1782. char2p = offset
  1783. p = offset
  1784. var c3 = row[p] + ord(char1 != b[s + char2p])
  1785. inc(p)
  1786. inc(char2p)
  1787. x = row[p] + 1
  1788. D = x
  1789. if x > c3: x = c3
  1790. row[p] = x
  1791. inc(p)
  1792. else:
  1793. p = 1
  1794. char2p = 0
  1795. D = i
  1796. x = i
  1797. if i <= half + 1:
  1798. # skip the lower triangle:
  1799. e = len2 + i - half - 2
  1800. # main:
  1801. while p <= e:
  1802. dec(D)
  1803. var c3 = D + ord(char1 != b[char2p + s])
  1804. inc(char2p)
  1805. inc(x)
  1806. if x > c3: x = c3
  1807. D = row[p] + 1
  1808. if x > D: x = D
  1809. row[p] = x
  1810. inc(p)
  1811. # lower triangle sentinel:
  1812. if i <= half:
  1813. dec(D)
  1814. var c3 = D + ord(char1 != b[char2p + s])
  1815. inc(x)
  1816. if x > c3: x = c3
  1817. row[p] = x
  1818. result = row[e]
  1819. {.pop.}
  1820. # floating point formating:
  1821. when not defined(js):
  1822. proc c_sprintf(buf, frmt: cstring): cint {.header: "<stdio.h>",
  1823. importc: "sprintf", varargs, noSideEffect.}
  1824. type
  1825. FloatFormatMode* = enum ## the different modes of floating point formating
  1826. ffDefault, ## use the shorter floating point notation
  1827. ffDecimal, ## use decimal floating point notation
  1828. ffScientific ## use scientific notation (using ``e`` character)
  1829. {.deprecated: [TFloatFormat: FloatFormatMode].}
  1830. proc formatBiggestFloat*(f: BiggestFloat, format: FloatFormatMode = ffDefault,
  1831. precision: range[-1..32] = 16;
  1832. decimalSep = '.'): string {.
  1833. noSideEffect, rtl, extern: "nsu$1".} =
  1834. ## Converts a floating point value `f` to a string.
  1835. ##
  1836. ## If ``format == ffDecimal`` then precision is the number of digits to
  1837. ## be printed after the decimal point.
  1838. ## If ``format == ffScientific`` then precision is the maximum number
  1839. ## of significant digits to be printed.
  1840. ## `precision`'s default value is the maximum number of meaningful digits
  1841. ## after the decimal point for Nim's ``biggestFloat`` type.
  1842. ##
  1843. ## If ``precision == -1``, it tries to format it nicely.
  1844. when defined(js):
  1845. var precision = precision
  1846. if precision == -1:
  1847. # use the same default precision as c_sprintf
  1848. precision = 6
  1849. var res: cstring
  1850. case format
  1851. of ffDefault:
  1852. {.emit: "`res` = `f`.toString();".}
  1853. of ffDecimal:
  1854. {.emit: "`res` = `f`.toFixed(`precision`);".}
  1855. of ffScientific:
  1856. {.emit: "`res` = `f`.toExponential(`precision`);".}
  1857. result = $res
  1858. if 1.0 / f == -Inf:
  1859. # JavaScript removes the "-" from negative Zero, add it back here
  1860. result = "-" & $res
  1861. for i in 0 ..< result.len:
  1862. # Depending on the locale either dot or comma is produced,
  1863. # but nothing else is possible:
  1864. if result[i] in {'.', ','}: result[i] = decimalsep
  1865. else:
  1866. const floatFormatToChar: array[FloatFormatMode, char] = ['g', 'f', 'e']
  1867. var
  1868. frmtstr {.noinit.}: array[0..5, char]
  1869. buf {.noinit.}: array[0..2500, char]
  1870. L: cint
  1871. frmtstr[0] = '%'
  1872. if precision >= 0:
  1873. frmtstr[1] = '#'
  1874. frmtstr[2] = '.'
  1875. frmtstr[3] = '*'
  1876. frmtstr[4] = floatFormatToChar[format]
  1877. frmtstr[5] = '\0'
  1878. when defined(nimNoArrayToCstringConversion):
  1879. L = c_sprintf(addr buf, addr frmtstr, precision, f)
  1880. else:
  1881. L = c_sprintf(buf, frmtstr, precision, f)
  1882. else:
  1883. frmtstr[1] = floatFormatToChar[format]
  1884. frmtstr[2] = '\0'
  1885. when defined(nimNoArrayToCstringConversion):
  1886. L = c_sprintf(addr buf, addr frmtstr, f)
  1887. else:
  1888. L = c_sprintf(buf, frmtstr, f)
  1889. result = newString(L)
  1890. for i in 0 ..< L:
  1891. # Depending on the locale either dot or comma is produced,
  1892. # but nothing else is possible:
  1893. if buf[i] in {'.', ','}: result[i] = decimalsep
  1894. else: result[i] = buf[i]
  1895. when defined(windows):
  1896. # VS pre 2015 violates the C standard: "The exponent always contains at
  1897. # least two digits, and only as many more digits as necessary to
  1898. # represent the exponent." [C11 §7.21.6.1]
  1899. # The following post-processing fixes this behavior.
  1900. if result.len > 4 and result[^4] == '+' and result[^3] == '0':
  1901. result[^3] = result[^2]
  1902. result[^2] = result[^1]
  1903. result.setLen(result.len - 1)
  1904. proc formatFloat*(f: float, format: FloatFormatMode = ffDefault,
  1905. precision: range[-1..32] = 16; decimalSep = '.'): string {.
  1906. noSideEffect, rtl, extern: "nsu$1".} =
  1907. ## Converts a floating point value `f` to a string.
  1908. ##
  1909. ## If ``format == ffDecimal`` then precision is the number of digits to
  1910. ## be printed after the decimal point.
  1911. ## If ``format == ffScientific`` then precision is the maximum number
  1912. ## of significant digits to be printed.
  1913. ## `precision`'s default value is the maximum number of meaningful digits
  1914. ## after the decimal point for Nim's ``float`` type.
  1915. ##
  1916. ## If ``precision == -1``, it tries to format it nicely.
  1917. runnableExamples:
  1918. let x = 123.456
  1919. doAssert x.formatFloat() == "123.4560000000000"
  1920. doAssert x.formatFloat(ffDecimal, 4) == "123.4560"
  1921. doAssert x.formatFloat(ffScientific, 2) == "1.23e+02"
  1922. result = formatBiggestFloat(f, format, precision, decimalSep)
  1923. proc trimZeros*(x: var string) {.noSideEffect.} =
  1924. ## Trim trailing zeros from a formatted floating point
  1925. ## value (`x`). Modifies the passed value.
  1926. var spl: seq[string]
  1927. if x.contains('.') or x.contains(','):
  1928. if x.contains('e'):
  1929. spl = x.split('e')
  1930. x = spl[0]
  1931. while x[x.high] == '0':
  1932. x.setLen(x.len-1)
  1933. if x[x.high] in [',', '.']:
  1934. x.setLen(x.len-1)
  1935. if spl.len > 0:
  1936. x &= "e" & spl[1]
  1937. type
  1938. BinaryPrefixMode* = enum ## the different names for binary prefixes
  1939. bpIEC, # use the IEC/ISO standard prefixes such as kibi
  1940. bpColloquial # use the colloquial kilo, mega etc
  1941. proc formatSize*(bytes: int64,
  1942. decimalSep = '.',
  1943. prefix = bpIEC,
  1944. includeSpace = false): string {.noSideEffect.} =
  1945. ## Rounds and formats `bytes`.
  1946. ##
  1947. ## By default, uses the IEC/ISO standard binary prefixes, so 1024 will be
  1948. ## formatted as 1KiB. Set prefix to `bpColloquial` to use the colloquial
  1949. ## names from the SI standard (e.g. k for 1000 being reused as 1024).
  1950. ##
  1951. ## `includeSpace` can be set to true to include the (SI preferred) space
  1952. ## between the number and the unit (e.g. 1 KiB).
  1953. runnableExamples:
  1954. doAssert formatSize((1'i64 shl 31) + (300'i64 shl 20)) == "2.293GiB"
  1955. doAssert formatSize((2.234*1024*1024).int) == "2.234MiB"
  1956. doAssert formatSize(4096, includeSpace=true) == "4 KiB"
  1957. doAssert formatSize(4096, prefix=bpColloquial, includeSpace=true) == "4 kB"
  1958. doAssert formatSize(4096) == "4KiB"
  1959. doAssert formatSize(5_378_934, prefix=bpColloquial, decimalSep=',') == "5,13MB"
  1960. const iecPrefixes = ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"]
  1961. const collPrefixes = ["", "k", "M", "G", "T", "P", "E", "Z", "Y"]
  1962. var
  1963. xb: int64 = bytes
  1964. fbytes: float
  1965. last_xb: int64 = bytes
  1966. matchedIndex: int
  1967. prefixes: array[9, string]
  1968. if prefix == bpColloquial:
  1969. prefixes = collPrefixes
  1970. else:
  1971. prefixes = iecPrefixes
  1972. # Iterate through prefixes seeing if value will be greater than
  1973. # 0 in each case
  1974. for index in 1..<prefixes.len:
  1975. last_xb = xb
  1976. xb = bytes div (1'i64 shl (index*10))
  1977. matchedIndex = index
  1978. if xb == 0:
  1979. xb = last_xb
  1980. matchedIndex = index - 1
  1981. break
  1982. # xb has the integer number for the latest value; index should be correct
  1983. fbytes = bytes.float / (1'i64 shl (matchedIndex*10)).float
  1984. result = formatFloat(fbytes, format=ffDecimal, precision=3, decimalSep=decimalSep)
  1985. result.trimZeros()
  1986. if includeSpace:
  1987. result &= " "
  1988. result &= prefixes[matchedIndex]
  1989. result &= "B"
  1990. proc formatEng*(f: BiggestFloat,
  1991. precision: range[0..32] = 10,
  1992. trim: bool = true,
  1993. siPrefix: bool = false,
  1994. unit: string = "",
  1995. decimalSep = '.',
  1996. useUnitSpace = false): string {.noSideEffect.} =
  1997. ## Converts a floating point value `f` to a string using engineering notation.
  1998. ##
  1999. ## Numbers in of the range -1000.0<f<1000.0 will be formatted without an
  2000. ## exponent. Numbers outside of this range will be formatted as a
  2001. ## significand in the range -1000.0<f<1000.0 and an exponent that will always
  2002. ## be an integer multiple of 3, corresponding with the SI prefix scale k, M,
  2003. ## G, T etc for numbers with an absolute value greater than 1 and m, μ, n, p
  2004. ## etc for numbers with an absolute value less than 1.
  2005. ##
  2006. ## The default configuration (`trim=true` and `precision=10`) shows the
  2007. ## **shortest** form that precisely (up to a maximum of 10 decimal places)
  2008. ## displays the value. For example, 4.100000 will be displayed as 4.1 (which
  2009. ## is mathematically identical) whereas 4.1000003 will be displayed as
  2010. ## 4.1000003.
  2011. ##
  2012. ## If `trim` is set to true, trailing zeros will be removed; if false, the
  2013. ## number of digits specified by `precision` will always be shown.
  2014. ##
  2015. ## `precision` can be used to set the number of digits to be shown after the
  2016. ## decimal point or (if `trim` is true) the maximum number of digits to be
  2017. ## shown.
  2018. ##
  2019. ## .. code-block:: nim
  2020. ##
  2021. ## formatEng(0, 2, trim=false) == "0.00"
  2022. ## formatEng(0, 2) == "0"
  2023. ## formatEng(0.053, 0) == "53e-3"
  2024. ## formatEng(52731234, 2) == "52.73e6"
  2025. ## formatEng(-52731234, 2) == "-52.73e6"
  2026. ##
  2027. ## If `siPrefix` is set to true, the number will be displayed with the SI
  2028. ## prefix corresponding to the exponent. For example 4100 will be displayed
  2029. ## as "4.1 k" instead of "4.1e3". Note that `u` is used for micro- in place
  2030. ## of the greek letter mu (μ) as per ISO 2955. Numbers with an absolute
  2031. ## value outside of the range 1e-18<f<1000e18 (1a<f<1000E) will be displayed
  2032. ## with an exponent rather than an SI prefix, regardless of whether
  2033. ## `siPrefix` is true.
  2034. ##
  2035. ## If `useUnitSpace` is true, the provided unit will be appended to the string
  2036. ## (with a space as required by the SI standard). This behaviour is slightly
  2037. ## different to appending the unit to the result as the location of the space
  2038. ## is altered depending on whether there is an exponent.
  2039. ##
  2040. ## .. code-block:: nim
  2041. ##
  2042. ## formatEng(4100, siPrefix=true, unit="V") == "4.1 kV"
  2043. ## formatEng(4.1, siPrefix=true, unit="V") == "4.1 V"
  2044. ## formatEng(4.1, siPrefix=true) == "4.1" # Note lack of space
  2045. ## formatEng(4100, siPrefix=true) == "4.1 k"
  2046. ## formatEng(4.1, siPrefix=true, unit="") == "4.1 " # Space with unit=""
  2047. ## formatEng(4100, siPrefix=true, unit="") == "4.1 k"
  2048. ## formatEng(4100) == "4.1e3"
  2049. ## formatEng(4100, unit="V") == "4.1e3 V"
  2050. ## formatEng(4100, unit="", useUnitSpace=true) == "4.1e3 " # Space with useUnitSpace=true
  2051. ##
  2052. ## `decimalSep` is used as the decimal separator.
  2053. var
  2054. absolute: BiggestFloat
  2055. significand: BiggestFloat
  2056. fexponent: BiggestFloat
  2057. exponent: int
  2058. splitResult: seq[string]
  2059. suffix: string = ""
  2060. proc getPrefix(exp: int): char =
  2061. ## Get the SI prefix for a given exponent
  2062. ##
  2063. ## Assumes exponent is a multiple of 3; returns ' ' if no prefix found
  2064. const siPrefixes = ['a','f','p','n','u','m',' ','k','M','G','T','P','E']
  2065. var index: int = (exp div 3) + 6
  2066. result = ' '
  2067. if index in low(siPrefixes)..high(siPrefixes):
  2068. result = siPrefixes[index]
  2069. # Most of the work is done with the sign ignored, so get the absolute value
  2070. absolute = abs(f)
  2071. significand = f
  2072. if absolute == 0.0:
  2073. # Simple case: just format it and force the exponent to 0
  2074. exponent = 0
  2075. result = significand.formatBiggestFloat(ffDecimal, precision, decimalSep='.')
  2076. else:
  2077. # Find the best exponent that's a multiple of 3
  2078. fexponent = floor(log10(absolute))
  2079. fexponent = 3.0 * floor(fexponent / 3.0)
  2080. # Adjust the significand for the new exponent
  2081. significand /= pow(10.0, fexponent)
  2082. # Adjust the significand and check whether it has affected
  2083. # the exponent
  2084. absolute = abs(significand)
  2085. if absolute >= 1000.0:
  2086. significand *= 0.001
  2087. fexponent += 3
  2088. # Components of the result:
  2089. result = significand.formatBiggestFloat(ffDecimal, precision, decimalSep='.')
  2090. exponent = fexponent.int()
  2091. splitResult = result.split('.')
  2092. result = splitResult[0]
  2093. # result should have at most one decimal character
  2094. if splitResult.len() > 1:
  2095. # If trim is set, we get rid of trailing zeros. Don't use trimZeros here as
  2096. # we can be a bit more efficient through knowledge that there will never be
  2097. # an exponent in this part.
  2098. if trim:
  2099. while splitResult[1].endsWith("0"):
  2100. # Trim last character
  2101. splitResult[1].setLen(splitResult[1].len-1)
  2102. if splitResult[1].len() > 0:
  2103. result &= decimalSep & splitResult[1]
  2104. else:
  2105. result &= decimalSep & splitResult[1]
  2106. # Combine the results accordingly
  2107. if siPrefix and exponent != 0:
  2108. var p = getPrefix(exponent)
  2109. if p != ' ':
  2110. suffix = " " & p
  2111. exponent = 0 # Exponent replaced by SI prefix
  2112. if suffix == "" and useUnitSpace:
  2113. suffix = " "
  2114. suffix &= unit
  2115. if exponent != 0:
  2116. result &= "e" & $exponent
  2117. result &= suffix
  2118. proc findNormalized(x: string, inArray: openArray[string]): int =
  2119. var i = 0
  2120. while i < high(inArray):
  2121. if cmpIgnoreStyle(x, inArray[i]) == 0: return i
  2122. inc(i, 2) # incrementing by 1 would probably lead to a
  2123. # security hole...
  2124. return -1
  2125. proc invalidFormatString() {.noinline.} =
  2126. raise newException(ValueError, "invalid format string")
  2127. proc addf*(s: var string, formatstr: string, a: varargs[string, `$`]) {.
  2128. noSideEffect, rtl, extern: "nsuAddf".} =
  2129. ## The same as ``add(s, formatstr % a)``, but more efficient.
  2130. const PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '\128'..'\255', '_'}
  2131. var i = 0
  2132. var num = 0
  2133. while i < len(formatstr):
  2134. if formatstr[i] == '$' and i+1 < len(formatstr):
  2135. case formatstr[i+1]
  2136. of '#':
  2137. if num > a.high: invalidFormatString()
  2138. add s, a[num]
  2139. inc i, 2
  2140. inc num
  2141. of '$':
  2142. add s, '$'
  2143. inc(i, 2)
  2144. of '1'..'9', '-':
  2145. var j = 0
  2146. inc(i) # skip $
  2147. var negative = formatstr[i] == '-'
  2148. if negative: inc i
  2149. while i < formatstr.len and formatstr[i] in Digits:
  2150. j = j * 10 + ord(formatstr[i]) - ord('0')
  2151. inc(i)
  2152. let idx = if not negative: j-1 else: a.len-j
  2153. if idx < 0 or idx > a.high: invalidFormatString()
  2154. add s, a[idx]
  2155. of '{':
  2156. var j = i+2
  2157. var k = 0
  2158. var negative = formatstr[j] == '-'
  2159. if negative: inc j
  2160. var isNumber = 0
  2161. while j < formatstr.len and formatstr[j] notin {'\0', '}'}:
  2162. if formatstr[j] in Digits:
  2163. k = k * 10 + ord(formatstr[j]) - ord('0')
  2164. if isNumber == 0: isNumber = 1
  2165. else:
  2166. isNumber = -1
  2167. inc(j)
  2168. if isNumber == 1:
  2169. let idx = if not negative: k-1 else: a.len-k
  2170. if idx < 0 or idx > a.high: invalidFormatString()
  2171. add s, a[idx]
  2172. else:
  2173. var x = findNormalized(substr(formatstr, i+2, j-1), a)
  2174. if x >= 0 and x < high(a): add s, a[x+1]
  2175. else: invalidFormatString()
  2176. i = j+1
  2177. of 'a'..'z', 'A'..'Z', '\128'..'\255', '_':
  2178. var j = i+1
  2179. while j < formatstr.len and formatstr[j] in PatternChars: inc(j)
  2180. var x = findNormalized(substr(formatstr, i+1, j-1), a)
  2181. if x >= 0 and x < high(a): add s, a[x+1]
  2182. else: invalidFormatString()
  2183. i = j
  2184. else:
  2185. invalidFormatString()
  2186. else:
  2187. add s, formatstr[i]
  2188. inc(i)
  2189. proc `%` *(formatstr: string, a: openArray[string]): string {.noSideEffect,
  2190. rtl, extern: "nsuFormatOpenArray".} =
  2191. ## Interpolates a format string with the values from `a`.
  2192. ##
  2193. ## The `substitution`:idx: operator performs string substitutions in
  2194. ## `formatstr` and returns a modified `formatstr`. This is often called
  2195. ## `string interpolation`:idx:.
  2196. ##
  2197. ## This is best explained by an example:
  2198. ##
  2199. ## .. code-block:: nim
  2200. ## "$1 eats $2." % ["The cat", "fish"]
  2201. ##
  2202. ## Results in:
  2203. ##
  2204. ## .. code-block:: nim
  2205. ## "The cat eats fish."
  2206. ##
  2207. ## The substitution variables (the thing after the ``$``) are enumerated
  2208. ## from 1 to ``a.len``.
  2209. ## To produce a verbatim ``$``, use ``$$``.
  2210. ## The notation ``$#`` can be used to refer to the next substitution
  2211. ## variable:
  2212. ##
  2213. ## .. code-block:: nim
  2214. ## "$# eats $#." % ["The cat", "fish"]
  2215. ##
  2216. ## Substitution variables can also be words (that is
  2217. ## ``[A-Za-z_]+[A-Za-z0-9_]*``) in which case the arguments in `a` with even
  2218. ## indices are keys and with odd indices are the corresponding values.
  2219. ## An example:
  2220. ##
  2221. ## .. code-block:: nim
  2222. ## "$animal eats $food." % ["animal", "The cat", "food", "fish"]
  2223. ##
  2224. ## Results in:
  2225. ##
  2226. ## .. code-block:: nim
  2227. ## "The cat eats fish."
  2228. ##
  2229. ## The variables are compared with `cmpIgnoreStyle`. `ValueError` is
  2230. ## raised if an ill-formed format string has been passed to the `%` operator.
  2231. result = newStringOfCap(formatstr.len + a.len shl 4)
  2232. addf(result, formatstr, a)
  2233. proc `%` *(formatstr, a: string): string {.noSideEffect,
  2234. rtl, extern: "nsuFormatSingleElem".} =
  2235. ## This is the same as ``formatstr % [a]``.
  2236. result = newStringOfCap(formatstr.len + a.len)
  2237. addf(result, formatstr, [a])
  2238. proc format*(formatstr: string, a: varargs[string, `$`]): string {.noSideEffect,
  2239. rtl, extern: "nsuFormatVarargs".} =
  2240. ## This is the same as ``formatstr % a`` except that it supports
  2241. ## auto stringification.
  2242. result = newStringOfCap(formatstr.len + a.len)
  2243. addf(result, formatstr, a)
  2244. {.pop.}
  2245. proc removeSuffix*(s: var string, chars: set[char] = Newlines) {.
  2246. rtl, extern: "nsuRemoveSuffixCharSet".} =
  2247. ## Removes all characters from `chars` from the end of the string `s`
  2248. ## (in-place).
  2249. runnableExamples:
  2250. var userInput = "Hello World!*~\r\n"
  2251. userInput.removeSuffix
  2252. doAssert userInput == "Hello World!*~"
  2253. userInput.removeSuffix({'~', '*'})
  2254. doAssert userInput == "Hello World!"
  2255. var otherInput = "Hello!?!"
  2256. otherInput.removeSuffix({'!', '?'})
  2257. doAssert otherInput == "Hello"
  2258. if s.len == 0: return
  2259. var last = s.high
  2260. while last > -1 and s[last] in chars: last -= 1
  2261. s.setLen(last + 1)
  2262. proc removeSuffix*(s: var string, c: char) {.
  2263. rtl, extern: "nsuRemoveSuffixChar".} =
  2264. ## Removes all occurrences of a single character (in-place) from the end
  2265. ## of a string.
  2266. ##
  2267. runnableExamples:
  2268. var table = "users"
  2269. table.removeSuffix('s')
  2270. doAssert table == "user"
  2271. var dots = "Trailing dots......."
  2272. dots.removeSuffix('.')
  2273. doAssert dots == "Trailing dots"
  2274. removeSuffix(s, chars = {c})
  2275. proc removeSuffix*(s: var string, suffix: string) {.
  2276. rtl, extern: "nsuRemoveSuffixString".} =
  2277. ## Remove the first matching suffix (in-place) from a string.
  2278. runnableExamples:
  2279. var answers = "yeses"
  2280. answers.removeSuffix("es")
  2281. doAssert answers == "yes"
  2282. var newLen = s.len
  2283. if s.endsWith(suffix):
  2284. newLen -= len(suffix)
  2285. s.setLen(newLen)
  2286. proc removePrefix*(s: var string, chars: set[char] = Newlines) {.
  2287. rtl, extern: "nsuRemovePrefixCharSet".} =
  2288. ## Removes all characters from `chars` from the start of the string `s`
  2289. ## (in-place).
  2290. ##
  2291. runnableExamples:
  2292. var userInput = "\r\n*~Hello World!"
  2293. userInput.removePrefix
  2294. doAssert userInput == "*~Hello World!"
  2295. userInput.removePrefix({'~', '*'})
  2296. doAssert userInput == "Hello World!"
  2297. var otherInput = "?!?Hello!?!"
  2298. otherInput.removePrefix({'!', '?'})
  2299. doAssert otherInput == "Hello!?!"
  2300. var start = 0
  2301. while start < s.len and s[start] in chars: start += 1
  2302. if start > 0: s.delete(0, start - 1)
  2303. proc removePrefix*(s: var string, c: char) {.
  2304. rtl, extern: "nsuRemovePrefixChar".} =
  2305. ## Removes all occurrences of a single character (in-place) from the start
  2306. ## of a string.
  2307. ##
  2308. runnableExamples:
  2309. var ident = "pControl"
  2310. ident.removePrefix('p')
  2311. doAssert ident == "Control"
  2312. removePrefix(s, chars = {c})
  2313. proc removePrefix*(s: var string, prefix: string) {.
  2314. rtl, extern: "nsuRemovePrefixString".} =
  2315. ## Remove the first matching prefix (in-place) from a string.
  2316. ##
  2317. runnableExamples:
  2318. var answers = "yesyes"
  2319. answers.removePrefix("yes")
  2320. doAssert answers == "yes"
  2321. if s.startsWith(prefix):
  2322. s.delete(0, prefix.len - 1)
  2323. proc stripLineEnd*(s: var string) =
  2324. ## Returns ``s`` stripped from one of these suffixes:
  2325. ## ``\r, \n, \r\n, \f, \v`` (at most once instance).
  2326. ## For example, can be useful in conjunction with ``osproc.execCmdEx``.
  2327. ## aka: `chomp`:idx:
  2328. runnableExamples:
  2329. var s = "foo\n\n"
  2330. s.stripLineEnd
  2331. doAssert s == "foo\n"
  2332. s = "foo\r\n"
  2333. s.stripLineEnd
  2334. doAssert s == "foo"
  2335. if s.len > 0:
  2336. case s[^1]
  2337. of '\n':
  2338. if s.len > 1 and s[^2] == '\r':
  2339. s.setLen s.len-2
  2340. else:
  2341. s.setLen s.len-1
  2342. of '\r', '\v', '\f':
  2343. s.setLen s.len-1
  2344. else:
  2345. discard
  2346. when isMainModule:
  2347. proc nonStaticTests =
  2348. doAssert formatBiggestFloat(1234.567, ffDecimal, -1) == "1234.567000"
  2349. when not defined(js):
  2350. doAssert formatBiggestFloat(1234.567, ffDecimal, 0) == "1235." # <=== bug 8242
  2351. doAssert formatBiggestFloat(1234.567, ffDecimal, 1) == "1234.6"
  2352. doAssert formatBiggestFloat(0.00000000001, ffDecimal, 11) == "0.00000000001"
  2353. doAssert formatBiggestFloat(0.00000000001, ffScientific, 1, ',') in
  2354. ["1,0e-11", "1,0e-011"]
  2355. # bug #6589
  2356. when not defined(js):
  2357. doAssert formatFloat(123.456, ffScientific, precision = -1) == "1.234560e+02"
  2358. doAssert "$# $3 $# $#" % ["a", "b", "c"] == "a c b c"
  2359. doAssert "${1}12 ${-1}$2" % ["a", "b"] == "a12 bb"
  2360. block: # formatSize tests
  2361. when not defined(js):
  2362. doAssert formatSize((1'i64 shl 31) + (300'i64 shl 20)) == "2.293GiB" # <=== bug #8231
  2363. doAssert formatSize((2.234*1024*1024).int) == "2.234MiB"
  2364. doAssert formatSize(4096) == "4KiB"
  2365. doAssert formatSize(4096, prefix=bpColloquial, includeSpace=true) == "4 kB"
  2366. doAssert formatSize(4096, includeSpace=true) == "4 KiB"
  2367. doAssert formatSize(5_378_934, prefix=bpColloquial, decimalSep=',') == "5,13MB"
  2368. block: # formatEng tests
  2369. doAssert formatEng(0, 2, trim=false) == "0.00"
  2370. doAssert formatEng(0, 2) == "0"
  2371. doAssert formatEng(53, 2, trim=false) == "53.00"
  2372. doAssert formatEng(0.053, 2, trim=false) == "53.00e-3"
  2373. doAssert formatEng(0.053, 4, trim=false) == "53.0000e-3"
  2374. doAssert formatEng(0.053, 4, trim=true) == "53e-3"
  2375. doAssert formatEng(0.053, 0) == "53e-3"
  2376. doAssert formatEng(52731234) == "52.731234e6"
  2377. doAssert formatEng(-52731234) == "-52.731234e6"
  2378. doAssert formatEng(52731234, 1) == "52.7e6"
  2379. doAssert formatEng(-52731234, 1) == "-52.7e6"
  2380. doAssert formatEng(52731234, 1, decimalSep=',') == "52,7e6"
  2381. doAssert formatEng(-52731234, 1, decimalSep=',') == "-52,7e6"
  2382. doAssert formatEng(4100, siPrefix=true, unit="V") == "4.1 kV"
  2383. doAssert formatEng(4.1, siPrefix=true, unit="V", useUnitSpace=true) == "4.1 V"
  2384. doAssert formatEng(4.1, siPrefix=true) == "4.1" # Note lack of space
  2385. doAssert formatEng(4100, siPrefix=true) == "4.1 k"
  2386. doAssert formatEng(4.1, siPrefix=true, unit="", useUnitSpace=true) == "4.1 " # Includes space
  2387. doAssert formatEng(4100, siPrefix=true, unit="") == "4.1 k"
  2388. doAssert formatEng(4100) == "4.1e3"
  2389. doAssert formatEng(4100, unit="V", useUnitSpace=true) == "4.1e3 V"
  2390. doAssert formatEng(4100, unit="", useUnitSpace=true) == "4.1e3 "
  2391. # Don't use SI prefix as number is too big
  2392. doAssert formatEng(3.1e22, siPrefix=true, unit="a", useUnitSpace=true) == "31e21 a"
  2393. # Don't use SI prefix as number is too small
  2394. doAssert formatEng(3.1e-25, siPrefix=true, unit="A", useUnitSpace=true) == "310e-27 A"
  2395. proc staticTests =
  2396. doAssert align("abc", 4) == " abc"
  2397. doAssert align("a", 0) == "a"
  2398. doAssert align("1232", 6) == " 1232"
  2399. doAssert align("1232", 6, '#') == "##1232"
  2400. doAssert alignLeft("abc", 4) == "abc "
  2401. doAssert alignLeft("a", 0) == "a"
  2402. doAssert alignLeft("1232", 6) == "1232 "
  2403. doAssert alignLeft("1232", 6, '#') == "1232##"
  2404. let
  2405. inp = """ this is a long text -- muchlongerthan10chars and here
  2406. it goes"""
  2407. outp = " this is a\nlong text\n--\nmuchlongerthan10chars\nand here\nit goes"
  2408. doAssert wordWrap(inp, 10, false) == outp
  2409. let
  2410. longInp = """ThisIsOneVeryLongStringWhichWeWillSplitIntoEightSeparatePartsNow"""
  2411. longOutp = "ThisIsOn\neVeryLon\ngStringW\nhichWeWi\nllSplitI\nntoEight\nSeparate\nPartsNow"
  2412. doAssert wordWrap(longInp, 8, true) == longOutp
  2413. doAssert "$animal eats $food." % ["animal", "The cat", "food", "fish"] ==
  2414. "The cat eats fish."
  2415. doAssert "-ld a-ldz -ld".replaceWord("-ld") == " a-ldz "
  2416. doAssert "-lda-ldz -ld abc".replaceWord("-ld") == "-lda-ldz abc"
  2417. doAssert "-lda-ldz -ld abc".replaceWord("") == "-lda-ldz -ld abc"
  2418. doAssert "oo".replace("", "abc") == "oo"
  2419. type MyEnum = enum enA, enB, enC, enuD, enE
  2420. doAssert parseEnum[MyEnum]("enu_D") == enuD
  2421. doAssert parseEnum("invalid enum value", enC) == enC
  2422. doAssert center("foo", 13) == " foo "
  2423. doAssert center("foo", 0) == "foo"
  2424. doAssert center("foo", 3, fillChar = 'a') == "foo"
  2425. doAssert center("foo", 10, fillChar = '\t') == "\t\t\tfoo\t\t\t\t"
  2426. doAssert count("foofoofoo", "foofoo") == 1
  2427. doAssert count("foofoofoo", "foofoo", overlapping = true) == 2
  2428. doAssert count("foofoofoo", 'f') == 3
  2429. doAssert count("foofoofoobar", {'f','b'}) == 4
  2430. doAssert strip(" foofoofoo ") == "foofoofoo"
  2431. doAssert strip("sfoofoofoos", chars = {'s'}) == "foofoofoo"
  2432. doAssert strip("barfoofoofoobar", chars = {'b', 'a', 'r'}) == "foofoofoo"
  2433. doAssert strip("stripme but don't strip this stripme",
  2434. chars = {'s', 't', 'r', 'i', 'p', 'm', 'e'}) ==
  2435. " but don't strip this "
  2436. doAssert strip("sfoofoofoos", leading = false, chars = {'s'}) == "sfoofoofoo"
  2437. doAssert strip("sfoofoofoos", trailing = false, chars = {'s'}) == "foofoofoos"
  2438. doAssert " foo\n bar".indent(4, "Q") == "QQQQ foo\nQQQQ bar"
  2439. doAssert "abba".multiReplace(("a", "b"), ("b", "a")) == "baab"
  2440. doAssert "Hello World.".multiReplace(("ello", "ELLO"), ("World.", "PEOPLE!")) == "HELLO PEOPLE!"
  2441. doAssert "aaaa".multiReplace(("a", "aa"), ("aa", "bb")) == "aaaaaaaa"
  2442. doAssert isAlphaAscii('r')
  2443. doAssert isAlphaAscii('A')
  2444. doAssert(not isAlphaAscii('$'))
  2445. doAssert isAlphaNumeric('3')
  2446. doAssert isAlphaNumeric('R')
  2447. doAssert(not isAlphaNumeric('!'))
  2448. doAssert isDigit('3')
  2449. doAssert(not isDigit('a'))
  2450. doAssert(not isDigit('%'))
  2451. doAssert isSpaceAscii('\t')
  2452. doAssert isSpaceAscii('\l')
  2453. doAssert(not isSpaceAscii('A'))
  2454. doAssert(isNilOrWhitespace(""))
  2455. doAssert(isNilOrWhitespace(" "))
  2456. doAssert(isNilOrWhitespace("\t\l \v\r\f"))
  2457. doAssert(not isNilOrWhitespace("ABc \td"))
  2458. doAssert isLowerAscii('a')
  2459. doAssert isLowerAscii('z')
  2460. doAssert(not isLowerAscii('A'))
  2461. doAssert(not isLowerAscii('5'))
  2462. doAssert(not isLowerAscii('&'))
  2463. doAssert(not isLowerAscii(' '))
  2464. doAssert isUpperAscii('A')
  2465. doAssert(not isUpperAscii('b'))
  2466. doAssert(not isUpperAscii('5'))
  2467. doAssert(not isUpperAscii('%'))
  2468. doAssert rsplit("foo bar", seps=Whitespace) == @["foo", "bar"]
  2469. doAssert rsplit(" foo bar", seps=Whitespace, maxsplit=1) == @[" foo", "bar"]
  2470. doAssert rsplit(" foo bar ", seps=Whitespace, maxsplit=1) == @[" foo bar", ""]
  2471. doAssert rsplit(":foo:bar", sep=':') == @["", "foo", "bar"]
  2472. doAssert rsplit(":foo:bar", sep=':', maxsplit=2) == @["", "foo", "bar"]
  2473. doAssert rsplit(":foo:bar", sep=':', maxsplit=3) == @["", "foo", "bar"]
  2474. doAssert rsplit("foothebar", sep="the") == @["foo", "bar"]
  2475. doAssert(unescape(r"\x013", "", "") == "\x013")
  2476. doAssert join(["foo", "bar", "baz"]) == "foobarbaz"
  2477. doAssert join(@["foo", "bar", "baz"], ", ") == "foo, bar, baz"
  2478. doAssert join([1, 2, 3]) == "123"
  2479. doAssert join(@[1, 2, 3], ", ") == "1, 2, 3"
  2480. doAssert """~~!!foo
  2481. ~~!!bar
  2482. ~~!!baz""".unindent(2, "~~!!") == "foo\nbar\nbaz"
  2483. doAssert """~~!!foo
  2484. ~~!!bar
  2485. ~~!!baz""".unindent(2, "~~!!aa") == "~~!!foo\n~~!!bar\n~~!!baz"
  2486. doAssert """~~foo
  2487. ~~ bar
  2488. ~~ baz""".unindent(4, "~") == "foo\n bar\n baz"
  2489. doAssert """foo
  2490. bar
  2491. baz
  2492. """.unindent(4) == "foo\nbar\nbaz\n"
  2493. doAssert """foo
  2494. bar
  2495. baz
  2496. """.unindent(2) == "foo\n bar\n baz\n"
  2497. doAssert """foo
  2498. bar
  2499. baz
  2500. """.unindent(100) == "foo\nbar\nbaz\n"
  2501. doAssert """foo
  2502. foo
  2503. bar
  2504. """.unindent() == "foo\nfoo\nbar\n"
  2505. let s = " this is an example "
  2506. let s2 = ":this;is;an:example;;"
  2507. doAssert s.split() == @["", "this", "is", "an", "example", "", ""]
  2508. doAssert s2.split(seps={':', ';'}) == @["", "this", "is", "an", "example", "", ""]
  2509. doAssert s.split(maxsplit=4) == @["", "this", "is", "an", "example "]
  2510. doAssert s.split(' ', maxsplit=1) == @["", "this is an example "]
  2511. doAssert s.split(" ", maxsplit=4) == @["", "this", "is", "an", "example "]
  2512. doAssert s.splitWhitespace() == @["this", "is", "an", "example"]
  2513. doAssert s.splitWhitespace(maxsplit=1) == @["this", "is an example "]
  2514. doAssert s.splitWhitespace(maxsplit=2) == @["this", "is", "an example "]
  2515. doAssert s.splitWhitespace(maxsplit=3) == @["this", "is", "an", "example "]
  2516. doAssert s.splitWhitespace(maxsplit=4) == @["this", "is", "an", "example"]
  2517. block: # startsWith / endsWith char tests
  2518. var s = "abcdef"
  2519. doAssert s.startsWith('a')
  2520. doAssert s.startsWith('b') == false
  2521. doAssert s.endsWith('f')
  2522. doAssert s.endsWith('a') == false
  2523. doAssert s.endsWith('\0') == false
  2524. #echo("strutils tests passed")
  2525. nonStaticTests()
  2526. staticTests()
  2527. static: staticTests()