parseutils.nim 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module contains helpers for parsing tokens, numbers, integers, floats,
  10. ## identifiers, etc.
  11. ##
  12. ## To unpack raw bytes look at the `streams <streams.html>`_ module.
  13. ##
  14. ## .. code-block:: nim
  15. ## :test:
  16. ##
  17. ## let logs = @["2019-01-10: OK_", "2019-01-11: FAIL_", "2019-01: aaaa"]
  18. ## var outp: seq[string]
  19. ##
  20. ## for log in logs:
  21. ## var res: string
  22. ## if parseUntil(log, res, ':') == 10: # YYYY-MM-DD == 10
  23. ## outp.add(res & " - " & captureBetween(log, ' ', '_'))
  24. ## doAssert outp == @["2019-01-10 - OK", "2019-01-11 - FAIL"]
  25. ##
  26. ## .. code-block:: nim
  27. ## :test:
  28. ## from std/strutils import Digits, parseInt
  29. ##
  30. ## let
  31. ## input1 = "2019 school start"
  32. ## input2 = "3 years back"
  33. ## startYear = input1[0 .. skipWhile(input1, Digits)-1] # 2019
  34. ## yearsBack = input2[0 .. skipWhile(input2, Digits)-1] # 3
  35. ## examYear = parseInt(startYear) + parseInt(yearsBack)
  36. ## doAssert "Examination is in " & $examYear == "Examination is in 2022"
  37. ##
  38. ## **See also:**
  39. ## * `strutils module<strutils.html>`_ for combined and identical parsing proc's
  40. ## * `json module<json.html>`_ for a JSON parser
  41. ## * `parsecfg module<parsecfg.html>`_ for a configuration file parser
  42. ## * `parsecsv module<parsecsv.html>`_ for a simple CSV (comma separated value) parser
  43. ## * `parseopt module<parseopt.html>`_ for a command line parser
  44. ## * `parsexml module<parsexml.html>`_ for a XML / HTML parser
  45. ## * `other parsers<lib.html#pure-libraries-parsers>`_ for other parsers
  46. {.push debugger: off.} # the user does not want to trace a part
  47. # of the standard library!
  48. include "system/inclrtl"
  49. const
  50. Whitespace = {' ', '\t', '\v', '\r', '\l', '\f'}
  51. IdentChars = {'a'..'z', 'A'..'Z', '0'..'9', '_'}
  52. IdentStartChars = {'a'..'z', 'A'..'Z', '_'}
  53. ## copied from strutils
  54. proc toLower(c: char): char {.inline.} =
  55. result = if c in {'A'..'Z'}: chr(ord(c)-ord('A')+ord('a')) else: c
  56. proc parseBin*[T: SomeInteger](s: string, number: var T, start = 0,
  57. maxLen = 0): int {.noSideEffect.} =
  58. ## Parses a binary number and stores its value in ``number``.
  59. ##
  60. ## Returns the number of the parsed characters or 0 in case of an error.
  61. ## If error, the value of ``number`` is not changed.
  62. ##
  63. ## If ``maxLen == 0``, the parsing continues until the first non-bin character
  64. ## or to the end of the string. Otherwise, no more than ``maxLen`` characters
  65. ## are parsed starting from the ``start`` position.
  66. ##
  67. ## It does not check for overflow. If the value represented by the string is
  68. ## too big to fit into ``number``, only the value of last fitting characters
  69. ## will be stored in ``number`` without producing an error.
  70. runnableExamples:
  71. var num: int
  72. doAssert parseBin("0100_1110_0110_1001_1110_1101", num) == 29
  73. doAssert num == 5138925
  74. doAssert parseBin("3", num) == 0
  75. var num8: int8
  76. doAssert parseBin("0b_0100_1110_0110_1001_1110_1101", num8) == 32
  77. doAssert num8 == 0b1110_1101'i8
  78. doAssert parseBin("0b_0100_1110_0110_1001_1110_1101", num8, 3, 9) == 9
  79. doAssert num8 == 0b0100_1110'i8
  80. var num8u: uint8
  81. doAssert parseBin("0b_0100_1110_0110_1001_1110_1101", num8u) == 32
  82. doAssert num8u == 237
  83. var num64: int64
  84. doAssert parseBin("0100111001101001111011010100111001101001", num64) == 40
  85. doAssert num64 == 336784608873
  86. var i = start
  87. var output = T(0)
  88. var foundDigit = false
  89. let last = min(s.len, if maxLen == 0: s.len else: i + maxLen)
  90. if i + 1 < last and s[i] == '0' and (s[i+1] in {'b', 'B'}): inc(i, 2)
  91. while i < last:
  92. case s[i]
  93. of '_': discard
  94. of '0'..'1':
  95. output = output shl 1 or T(ord(s[i]) - ord('0'))
  96. foundDigit = true
  97. else: break
  98. inc(i)
  99. if foundDigit:
  100. number = output
  101. result = i - start
  102. proc parseOct*[T: SomeInteger](s: string, number: var T, start = 0,
  103. maxLen = 0): int {.noSideEffect.} =
  104. ## Parses an octal number and stores its value in ``number``.
  105. ##
  106. ## Returns the number of the parsed characters or 0 in case of an error.
  107. ## If error, the value of ``number`` is not changed.
  108. ##
  109. ## If ``maxLen == 0``, the parsing continues until the first non-oct character
  110. ## or to the end of the string. Otherwise, no more than ``maxLen`` characters
  111. ## are parsed starting from the ``start`` position.
  112. ##
  113. ## It does not check for overflow. If the value represented by the string is
  114. ## too big to fit into ``number``, only the value of last fitting characters
  115. ## will be stored in ``number`` without producing an error.
  116. runnableExamples:
  117. var num: int
  118. doAssert parseOct("0o23464755", num) == 10
  119. doAssert num == 5138925
  120. doAssert parseOct("8", num) == 0
  121. var num8: int8
  122. doAssert parseOct("0o_1464_755", num8) == 11
  123. doAssert num8 == -19
  124. doAssert parseOct("0o_1464_755", num8, 3, 3) == 3
  125. doAssert num8 == 102
  126. var num8u: uint8
  127. doAssert parseOct("1464755", num8u) == 7
  128. doAssert num8u == 237
  129. var num64: int64
  130. doAssert parseOct("2346475523464755", num64) == 16
  131. doAssert num64 == 86216859871725
  132. var i = start
  133. var output = T(0)
  134. var foundDigit = false
  135. let last = min(s.len, if maxLen == 0: s.len else: i + maxLen)
  136. if i + 1 < last and s[i] == '0' and (s[i+1] in {'o', 'O'}): inc(i, 2)
  137. while i < last:
  138. case s[i]
  139. of '_': discard
  140. of '0'..'7':
  141. output = output shl 3 or T(ord(s[i]) - ord('0'))
  142. foundDigit = true
  143. else: break
  144. inc(i)
  145. if foundDigit:
  146. number = output
  147. result = i - start
  148. proc parseHex*[T: SomeInteger](s: string, number: var T, start = 0,
  149. maxLen = 0): int {.noSideEffect.} =
  150. ## Parses a hexadecimal number and stores its value in ``number``.
  151. ##
  152. ## Returns the number of the parsed characters or 0 in case of an error.
  153. ## If error, the value of ``number`` is not changed.
  154. ##
  155. ## If ``maxLen == 0``, the parsing continues until the first non-hex character
  156. ## or to the end of the string. Otherwise, no more than ``maxLen`` characters
  157. ## are parsed starting from the ``start`` position.
  158. ##
  159. ## It does not check for overflow. If the value represented by the string is
  160. ## too big to fit into ``number``, only the value of last fitting characters
  161. ## will be stored in ``number`` without producing an error.
  162. runnableExamples:
  163. var num: int
  164. doAssert parseHex("4E_69_ED", num) == 8
  165. doAssert num == 5138925
  166. doAssert parseHex("X", num) == 0
  167. doAssert parseHex("#ABC", num) == 4
  168. var num8: int8
  169. doAssert parseHex("0x_4E_69_ED", num8) == 11
  170. doAssert num8 == 0xED'i8
  171. doAssert parseHex("0x_4E_69_ED", num8, 3, 2) == 2
  172. doAssert num8 == 0x4E'i8
  173. var num8u: uint8
  174. doAssert parseHex("0x_4E_69_ED", num8u) == 11
  175. doAssert num8u == 237
  176. var num64: int64
  177. doAssert parseHex("4E69ED4E69ED", num64) == 12
  178. doAssert num64 == 86216859871725
  179. var i = start
  180. var output = T(0)
  181. var foundDigit = false
  182. let last = min(s.len, if maxLen == 0: s.len else: i + maxLen)
  183. if i + 1 < last and s[i] == '0' and (s[i+1] in {'x', 'X'}): inc(i, 2)
  184. elif i < last and s[i] == '#': inc(i)
  185. while i < last:
  186. case s[i]
  187. of '_': discard
  188. of '0'..'9':
  189. output = output shl 4 or T(ord(s[i]) - ord('0'))
  190. foundDigit = true
  191. of 'a'..'f':
  192. output = output shl 4 or T(ord(s[i]) - ord('a') + 10)
  193. foundDigit = true
  194. of 'A'..'F':
  195. output = output shl 4 or T(ord(s[i]) - ord('A') + 10)
  196. foundDigit = true
  197. else: break
  198. inc(i)
  199. if foundDigit:
  200. number = output
  201. result = i - start
  202. proc parseIdent*(s: string, ident: var string, start = 0): int =
  203. ## Parses an identifier and stores it in ``ident``. Returns
  204. ## the number of the parsed characters or 0 in case of an error.
  205. ## If error, the value of `ident` is not changed.
  206. runnableExamples:
  207. var res: string
  208. doAssert parseIdent("Hello World", res, 0) == 5
  209. doAssert res == "Hello"
  210. doAssert parseIdent("Hello World", res, 1) == 4
  211. doAssert res == "ello"
  212. doAssert parseIdent("Hello World", res, 6) == 5
  213. doAssert res == "World"
  214. var i = start
  215. if i < s.len and s[i] in IdentStartChars:
  216. inc(i)
  217. while i < s.len and s[i] in IdentChars: inc(i)
  218. ident = substr(s, start, i-1)
  219. result = i-start
  220. proc parseIdent*(s: string, start = 0): string =
  221. ## Parses an identifier and returns it or an empty string in
  222. ## case of an error.
  223. runnableExamples:
  224. doAssert parseIdent("Hello World", 0) == "Hello"
  225. doAssert parseIdent("Hello World", 1) == "ello"
  226. doAssert parseIdent("Hello World", 5) == ""
  227. doAssert parseIdent("Hello World", 6) == "World"
  228. result = ""
  229. var i = start
  230. if i < s.len and s[i] in IdentStartChars:
  231. inc(i)
  232. while i < s.len and s[i] in IdentChars: inc(i)
  233. result = substr(s, start, i-1)
  234. proc parseChar*(s: string, c: var char, start = 0): int =
  235. ## Parses a single character, stores it in `c` and returns 1.
  236. ## In case of error (if start >= s.len) it returns 0
  237. ## and the value of `c` is unchanged.
  238. runnableExamples:
  239. var c: char
  240. doAssert "nim".parseChar(c, 3) == 0
  241. doAssert c == '\0'
  242. doAssert "nim".parseChar(c, 0) == 1
  243. doAssert c == 'n'
  244. if start < s.len:
  245. c = s[start]
  246. result = 1
  247. proc skipWhitespace*(s: string, start = 0): int {.inline.} =
  248. ## Skips the whitespace starting at ``s[start]``. Returns the number of
  249. ## skipped characters.
  250. runnableExamples:
  251. doAssert skipWhitespace("Hello World", 0) == 0
  252. doAssert skipWhitespace(" Hello World", 0) == 1
  253. doAssert skipWhitespace("Hello World", 5) == 1
  254. doAssert skipWhitespace("Hello World", 5) == 2
  255. result = 0
  256. while start+result < s.len and s[start+result] in Whitespace: inc(result)
  257. proc skip*(s, token: string, start = 0): int {.inline.} =
  258. ## Skips the `token` starting at ``s[start]``. Returns the length of `token`
  259. ## or 0 if there was no `token` at ``s[start]``.
  260. runnableExamples:
  261. doAssert skip("2019-01-22", "2019", 0) == 4
  262. doAssert skip("2019-01-22", "19", 0) == 0
  263. doAssert skip("2019-01-22", "19", 2) == 2
  264. doAssert skip("CAPlow", "CAP", 0) == 3
  265. doAssert skip("CAPlow", "cap", 0) == 0
  266. result = 0
  267. while start+result < s.len and result < token.len and
  268. s[result+start] == token[result]:
  269. inc(result)
  270. if result != token.len: result = 0
  271. proc skipIgnoreCase*(s, token: string, start = 0): int =
  272. ## Same as `skip` but case is ignored for token matching.
  273. runnableExamples:
  274. doAssert skipIgnoreCase("CAPlow", "CAP", 0) == 3
  275. doAssert skipIgnoreCase("CAPlow", "cap", 0) == 3
  276. result = 0
  277. while start+result < s.len and result < token.len and
  278. toLower(s[result+start]) == toLower(token[result]): inc(result)
  279. if result != token.len: result = 0
  280. proc skipUntil*(s: string, until: set[char], start = 0): int {.inline.} =
  281. ## Skips all characters until one char from the set `until` is found
  282. ## or the end is reached.
  283. ## Returns number of characters skipped.
  284. runnableExamples:
  285. doAssert skipUntil("Hello World", {'W', 'e'}, 0) == 1
  286. doAssert skipUntil("Hello World", {'W'}, 0) == 6
  287. doAssert skipUntil("Hello World", {'W', 'd'}, 0) == 6
  288. result = 0
  289. while start+result < s.len and s[result+start] notin until: inc(result)
  290. proc skipUntil*(s: string, until: char, start = 0): int {.inline.} =
  291. ## Skips all characters until the char `until` is found
  292. ## or the end is reached.
  293. ## Returns number of characters skipped.
  294. runnableExamples:
  295. doAssert skipUntil("Hello World", 'o', 0) == 4
  296. doAssert skipUntil("Hello World", 'o', 4) == 0
  297. doAssert skipUntil("Hello World", 'W', 0) == 6
  298. doAssert skipUntil("Hello World", 'w', 0) == 11
  299. result = 0
  300. while start+result < s.len and s[result+start] != until: inc(result)
  301. proc skipWhile*(s: string, toSkip: set[char], start = 0): int {.inline.} =
  302. ## Skips all characters while one char from the set `token` is found.
  303. ## Returns number of characters skipped.
  304. runnableExamples:
  305. doAssert skipWhile("Hello World", {'H', 'e'}) == 2
  306. doAssert skipWhile("Hello World", {'e'}) == 0
  307. doAssert skipWhile("Hello World", {'W', 'o', 'r'}, 6) == 3
  308. result = 0
  309. while start+result < s.len and s[result+start] in toSkip: inc(result)
  310. proc fastSubstr(s: string; token: var string; start, length: int) =
  311. token.setLen length
  312. for i in 0 ..< length: token[i] = s[i+start]
  313. proc parseUntil*(s: string, token: var string, until: set[char],
  314. start = 0): int {.inline.} =
  315. ## Parses a token and stores it in ``token``. Returns
  316. ## the number of the parsed characters or 0 in case of an error. A token
  317. ## consists of the characters notin `until`.
  318. runnableExamples:
  319. var myToken: string
  320. doAssert parseUntil("Hello World", myToken, {'W', 'o', 'r'}) == 4
  321. doAssert myToken == "Hell"
  322. doAssert parseUntil("Hello World", myToken, {'W', 'r'}) == 6
  323. doAssert myToken == "Hello "
  324. doAssert parseUntil("Hello World", myToken, {'W', 'r'}, 3) == 3
  325. doAssert myToken == "lo "
  326. var i = start
  327. while i < s.len and s[i] notin until: inc(i)
  328. result = i-start
  329. fastSubstr(s, token, start, result)
  330. #token = substr(s, start, i-1)
  331. proc parseUntil*(s: string, token: var string, until: char,
  332. start = 0): int {.inline.} =
  333. ## Parses a token and stores it in ``token``. Returns
  334. ## the number of the parsed characters or 0 in case of an error. A token
  335. ## consists of any character that is not the `until` character.
  336. runnableExamples:
  337. var myToken: string
  338. doAssert parseUntil("Hello World", myToken, 'W') == 6
  339. doAssert myToken == "Hello "
  340. doAssert parseUntil("Hello World", myToken, 'o') == 4
  341. doAssert myToken == "Hell"
  342. doAssert parseUntil("Hello World", myToken, 'o', 2) == 2
  343. doAssert myToken == "ll"
  344. var i = start
  345. while i < s.len and s[i] != until: inc(i)
  346. result = i-start
  347. fastSubstr(s, token, start, result)
  348. #token = substr(s, start, i-1)
  349. proc parseUntil*(s: string, token: var string, until: string,
  350. start = 0): int {.inline.} =
  351. ## Parses a token and stores it in ``token``. Returns
  352. ## the number of the parsed characters or 0 in case of an error. A token
  353. ## consists of any character that comes before the `until` token.
  354. runnableExamples:
  355. var myToken: string
  356. doAssert parseUntil("Hello World", myToken, "Wor") == 6
  357. doAssert myToken == "Hello "
  358. doAssert parseUntil("Hello World", myToken, "Wor", 2) == 4
  359. doAssert myToken == "llo "
  360. when (NimMajor, NimMinor) <= (1, 0):
  361. if until.len == 0:
  362. token.setLen(0)
  363. return 0
  364. var i = start
  365. while i < s.len:
  366. if until.len > 0 and s[i] == until[0]:
  367. var u = 1
  368. while i+u < s.len and u < until.len and s[i+u] == until[u]:
  369. inc u
  370. if u >= until.len: break
  371. inc(i)
  372. result = i-start
  373. fastSubstr(s, token, start, result)
  374. #token = substr(s, start, i-1)
  375. proc parseWhile*(s: string, token: var string, validChars: set[char],
  376. start = 0): int {.inline.} =
  377. ## Parses a token and stores it in ``token``. Returns
  378. ## the number of the parsed characters or 0 in case of an error. A token
  379. ## consists of the characters in `validChars`.
  380. runnableExamples:
  381. var myToken: string
  382. doAssert parseWhile("Hello World", myToken, {'W', 'o', 'r'}, 0) == 0
  383. doAssert myToken.len() == 0
  384. doAssert parseWhile("Hello World", myToken, {'W', 'o', 'r'}, 6) == 3
  385. doAssert myToken == "Wor"
  386. var i = start
  387. while i < s.len and s[i] in validChars: inc(i)
  388. result = i-start
  389. fastSubstr(s, token, start, result)
  390. #token = substr(s, start, i-1)
  391. proc captureBetween*(s: string, first: char, second = '\0', start = 0): string =
  392. ## Finds the first occurrence of ``first``, then returns everything from there
  393. ## up to ``second`` (if ``second`` is '\0', then ``first`` is used).
  394. runnableExamples:
  395. doAssert captureBetween("Hello World", 'e') == "llo World"
  396. doAssert captureBetween("Hello World", 'e', 'r') == "llo Wo"
  397. doAssert captureBetween("Hello World", 'l', start = 6) == "d"
  398. var i = skipUntil(s, first, start)+1+start
  399. result = ""
  400. discard s.parseUntil(result, if second == '\0': first else: second, i)
  401. proc integerOutOfRangeError() {.noinline.} =
  402. raise newException(ValueError, "Parsed integer outside of valid range")
  403. # See #6752
  404. when defined(js):
  405. {.push overflowChecks: off.}
  406. proc rawParseInt(s: string, b: var BiggestInt, start = 0): int =
  407. var
  408. sign: BiggestInt = -1
  409. i = start
  410. if i < s.len:
  411. if s[i] == '+': inc(i)
  412. elif s[i] == '-':
  413. inc(i)
  414. sign = 1
  415. if i < s.len and s[i] in {'0'..'9'}:
  416. b = 0
  417. while i < s.len and s[i] in {'0'..'9'}:
  418. let c = ord(s[i]) - ord('0')
  419. if b >= (low(BiggestInt) + c) div 10:
  420. b = b * 10 - c
  421. else:
  422. integerOutOfRangeError()
  423. inc(i)
  424. while i < s.len and s[i] == '_': inc(i) # underscores are allowed and ignored
  425. if sign == -1 and b == low(BiggestInt):
  426. integerOutOfRangeError()
  427. else:
  428. b = b * sign
  429. result = i - start
  430. when defined(js):
  431. {.pop.} # overflowChecks: off
  432. proc parseBiggestInt*(s: string, number: var BiggestInt, start = 0): int {.
  433. rtl, extern: "npuParseBiggestInt", noSideEffect, raises: [ValueError].} =
  434. ## Parses an integer starting at `start` and stores the value into `number`.
  435. ## Result is the number of processed chars or 0 if there is no integer.
  436. ## `ValueError` is raised if the parsed integer is out of the valid range.
  437. runnableExamples:
  438. var res: BiggestInt
  439. doAssert parseBiggestInt("9223372036854775807", res, 0) == 19
  440. doAssert res == 9223372036854775807
  441. var res = BiggestInt(0)
  442. # use 'res' for exception safety (don't write to 'number' in case of an
  443. # overflow exception):
  444. result = rawParseInt(s, res, start)
  445. if result != 0:
  446. number = res
  447. proc parseInt*(s: string, number: var int, start = 0): int {.
  448. rtl, extern: "npuParseInt", noSideEffect, raises: [ValueError].} =
  449. ## Parses an integer starting at `start` and stores the value into `number`.
  450. ## Result is the number of processed chars or 0 if there is no integer.
  451. ## `ValueError` is raised if the parsed integer is out of the valid range.
  452. runnableExamples:
  453. var res: int
  454. doAssert parseInt("2019", res, 0) == 4
  455. doAssert res == 2019
  456. doAssert parseInt("2019", res, 2) == 2
  457. doAssert res == 19
  458. var res = BiggestInt(0)
  459. result = parseBiggestInt(s, res, start)
  460. when sizeof(int) <= 4:
  461. if res < low(int) or res > high(int):
  462. integerOutOfRangeError()
  463. if result != 0:
  464. number = int(res)
  465. proc parseSaturatedNatural*(s: string, b: var int, start = 0): int {.
  466. raises: [].} =
  467. ## Parses a natural number into ``b``. This cannot raise an overflow
  468. ## error. ``high(int)`` is returned for an overflow.
  469. ## The number of processed character is returned.
  470. ## This is usually what you really want to use instead of `parseInt`:idx:.
  471. runnableExamples:
  472. var res = 0
  473. discard parseSaturatedNatural("848", res)
  474. doAssert res == 848
  475. var i = start
  476. if i < s.len and s[i] == '+': inc(i)
  477. if i < s.len and s[i] in {'0'..'9'}:
  478. b = 0
  479. while i < s.len and s[i] in {'0'..'9'}:
  480. let c = ord(s[i]) - ord('0')
  481. if b <= (high(int) - c) div 10:
  482. b = b * 10 + c
  483. else:
  484. b = high(int)
  485. inc(i)
  486. while i < s.len and s[i] == '_': inc(i) # underscores are allowed and ignored
  487. result = i - start
  488. proc rawParseUInt(s: string, b: var BiggestUInt, start = 0): int =
  489. var
  490. res = 0.BiggestUInt
  491. prev = 0.BiggestUInt
  492. i = start
  493. if i < s.len - 1 and s[i] == '-' and s[i + 1] in {'0'..'9'}:
  494. integerOutOfRangeError()
  495. if i < s.len and s[i] == '+': inc(i) # Allow
  496. if i < s.len and s[i] in {'0'..'9'}:
  497. b = 0
  498. while i < s.len and s[i] in {'0'..'9'}:
  499. prev = res
  500. res = res * 10 + (ord(s[i]) - ord('0')).BiggestUInt
  501. if prev > res:
  502. integerOutOfRangeError()
  503. inc(i)
  504. while i < s.len and s[i] == '_': inc(i) # underscores are allowed and ignored
  505. b = res
  506. result = i - start
  507. proc parseBiggestUInt*(s: string, number: var BiggestUInt, start = 0): int {.
  508. rtl, extern: "npuParseBiggestUInt", noSideEffect, raises: [ValueError].} =
  509. ## Parses an unsigned integer starting at `start` and stores the value
  510. ## into `number`.
  511. ## `ValueError` is raised if the parsed integer is out of the valid range.
  512. runnableExamples:
  513. var res: BiggestUInt
  514. doAssert parseBiggestUInt("12", res, 0) == 2
  515. doAssert res == 12
  516. doAssert parseBiggestUInt("1111111111111111111", res, 0) == 19
  517. doAssert res == 1111111111111111111'u64
  518. var res = BiggestUInt(0)
  519. # use 'res' for exception safety (don't write to 'number' in case of an
  520. # overflow exception):
  521. result = rawParseUInt(s, res, start)
  522. if result != 0:
  523. number = res
  524. proc parseUInt*(s: string, number: var uint, start = 0): int {.
  525. rtl, extern: "npuParseUInt", noSideEffect, raises: [ValueError].} =
  526. ## Parses an unsigned integer starting at `start` and stores the value
  527. ## into `number`.
  528. ## `ValueError` is raised if the parsed integer is out of the valid range.
  529. runnableExamples:
  530. var res: uint
  531. doAssert parseUInt("3450", res) == 4
  532. doAssert res == 3450
  533. doAssert parseUInt("3450", res, 2) == 2
  534. doAssert res == 50
  535. var res = BiggestUInt(0)
  536. result = parseBiggestUInt(s, res, start)
  537. when sizeof(BiggestUInt) > sizeof(uint) and sizeof(uint) <= 4:
  538. if res > 0xFFFF_FFFF'u64:
  539. integerOutOfRangeError()
  540. if result != 0:
  541. number = uint(res)
  542. proc parseBiggestFloat*(s: string, number: var BiggestFloat, start = 0): int {.
  543. magic: "ParseBiggestFloat", importc: "nimParseBiggestFloat", noSideEffect.}
  544. ## Parses a float starting at `start` and stores the value into `number`.
  545. ## Result is the number of processed chars or 0 if a parsing error
  546. ## occurred.
  547. proc parseFloat*(s: string, number: var float, start = 0): int {.
  548. rtl, extern: "npuParseFloat", noSideEffect.} =
  549. ## Parses a float starting at `start` and stores the value into `number`.
  550. ## Result is the number of processed chars or 0 if there occurred a parsing
  551. ## error.
  552. runnableExamples:
  553. var res: float
  554. doAssert parseFloat("32", res, 0) == 2
  555. doAssert res == 32.0
  556. doAssert parseFloat("32.57", res, 0) == 5
  557. doAssert res == 32.57
  558. doAssert parseFloat("32.57", res, 3) == 2
  559. doAssert res == 57.00
  560. var bf = BiggestFloat(0.0)
  561. result = parseBiggestFloat(s, bf, start)
  562. if result != 0:
  563. number = bf
  564. type
  565. InterpolatedKind* = enum ## Describes for `interpolatedFragments`
  566. ## which part of the interpolated string is
  567. ## yielded; for example in "str$$$var${expr}"
  568. ikStr, ## ``str`` part of the interpolated string
  569. ikDollar, ## escaped ``$`` part of the interpolated string
  570. ikVar, ## ``var`` part of the interpolated string
  571. ikExpr ## ``expr`` part of the interpolated string
  572. iterator interpolatedFragments*(s: string): tuple[kind: InterpolatedKind,
  573. value: string] =
  574. ## Tokenizes the string `s` into substrings for interpolation purposes.
  575. ##
  576. runnableExamples:
  577. var outp: seq[tuple[kind: InterpolatedKind, value: string]]
  578. for k, v in interpolatedFragments(" $this is ${an example} $$"):
  579. outp.add (k, v)
  580. doAssert outp == @[(ikStr, " "),
  581. (ikVar, "this"),
  582. (ikStr, " is "),
  583. (ikExpr, "an example"),
  584. (ikStr, " "),
  585. (ikDollar, "$")]
  586. var i = 0
  587. var kind: InterpolatedKind
  588. while true:
  589. var j = i
  590. if j < s.len and s[j] == '$':
  591. if j+1 < s.len and s[j+1] == '{':
  592. inc j, 2
  593. var nesting = 0
  594. block curlies:
  595. while j < s.len:
  596. case s[j]
  597. of '{': inc nesting
  598. of '}':
  599. if nesting == 0:
  600. inc j
  601. break curlies
  602. dec nesting
  603. else: discard
  604. inc j
  605. raise newException(ValueError,
  606. "Expected closing '}': " & substr(s, i, s.high))
  607. inc i, 2 # skip ${
  608. kind = ikExpr
  609. elif j+1 < s.len and s[j+1] in IdentStartChars:
  610. inc j, 2
  611. while j < s.len and s[j] in IdentChars: inc(j)
  612. inc i # skip $
  613. kind = ikVar
  614. elif j+1 < s.len and s[j+1] == '$':
  615. inc j, 2
  616. inc i # skip $
  617. kind = ikDollar
  618. else:
  619. raise newException(ValueError,
  620. "Unable to parse a variable name at " & substr(s, i, s.high))
  621. else:
  622. while j < s.len and s[j] != '$': inc j
  623. kind = ikStr
  624. if j > i:
  625. # do not copy the trailing } for ikExpr:
  626. yield (kind, substr(s, i, j-1-ord(kind == ikExpr)))
  627. else:
  628. break
  629. i = j
  630. {.pop.}