strscans.nim 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2016 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ##[
  10. This module contains a `scanf`:idx: macro that can be used for extracting
  11. substrings from an input string. This is often easier than regular expressions.
  12. Some examples as an appetizer:
  13. ```nim
  14. # check if input string matches a triple of integers:
  15. const input = "(1,2,4)"
  16. var x, y, z: int
  17. if scanf(input, "($i,$i,$i)", x, y, z):
  18. echo "matches and x is ", x, " y is ", y, " z is ", z
  19. # check if input string matches an ISO date followed by an identifier followed
  20. # by whitespace and a floating point number:
  21. var year, month, day: int
  22. var identifier: string
  23. var myfloat: float
  24. if scanf(input, "$i-$i-$i $w$s$f", year, month, day, identifier, myfloat):
  25. echo "yes, we have a match!"
  26. ```
  27. As can be seen from the examples, strings are matched verbatim except for
  28. substrings starting with ``$``. These constructions are available:
  29. ================= ========================================================
  30. ``$b`` Matches a binary integer. This uses ``parseutils.parseBin``.
  31. ``$o`` Matches an octal integer. This uses ``parseutils.parseOct``.
  32. ``$i`` Matches a decimal integer. This uses ``parseutils.parseInt``.
  33. ``$h`` Matches a hex integer. This uses ``parseutils.parseHex``.
  34. ``$f`` Matches a floating-point number. Uses ``parseFloat``.
  35. ``$w`` Matches an ASCII identifier: ``[A-Za-z_][A-Za-z_0-9]*``.
  36. ``$c`` Matches a single ASCII character.
  37. ``$s`` Skips optional whitespace.
  38. ``$$`` Matches a single dollar sign.
  39. ``$.`` Matches if the end of the input string has been reached.
  40. ``$*`` Matches until the token following the ``$*`` was found.
  41. The match is allowed to be of 0 length.
  42. ``$+`` Matches until the token following the ``$+`` was found.
  43. The match must consist of at least one char.
  44. ``${foo}`` User defined matcher. Uses the proc ``foo`` to perform
  45. the match. See below for more details.
  46. ``$[foo]`` Call user defined proc ``foo`` to **skip** some optional
  47. parts in the input string. See below for more details.
  48. ================= ========================================================
  49. Even though ``$*`` and ``$+`` look similar to the regular expressions ``.*``
  50. and ``.+``, they work quite differently. There is no non-deterministic
  51. state machine involved and the matches are non-greedy. ``[$*]``
  52. matches ``[xyz]`` via ``parseutils.parseUntil``.
  53. Furthermore no backtracking is performed, if parsing fails after a value
  54. has already been bound to a matched subexpression this value is not restored
  55. to its original value. This rarely causes problems in practice and if it does
  56. for you, it's easy enough to bind to a temporary variable first.
  57. Startswith vs full match
  58. ========================
  59. ``scanf`` returns true if the input string **starts with** the specified
  60. pattern. If instead it should only return true if there is also nothing
  61. left in the input, append ``$.`` to your pattern.
  62. User definable matchers
  63. =======================
  64. One very nice advantage over regular expressions is that ``scanf`` is
  65. extensible with ordinary Nim procs. The proc is either enclosed in ``${}``
  66. or in ``$[]``. ``${}`` matches and binds the result
  67. to a variable (that was passed to the ``scanf`` macro) while ``$[]`` merely
  68. matches optional tokens without any result binding.
  69. In this example, we define a helper proc ``someSep`` that skips some separators
  70. which we then use in our scanf pattern to help us in the matching process:
  71. ```nim
  72. proc someSep(input: string; start: int; seps: set[char] = {':','-','.'}): int =
  73. # Note: The parameters and return value must match to what ``scanf`` requires
  74. result = 0
  75. while start+result < input.len and input[start+result] in seps: inc result
  76. if scanf(input, "$w$[someSep]$w", key, value):
  77. ...
  78. ```
  79. It also possible to pass arguments to a user definable matcher:
  80. ```nim
  81. proc ndigits(input: string; intVal: var int; start: int; n: int): int =
  82. # matches exactly ``n`` digits. Matchers need to return 0 if nothing
  83. # matched or otherwise the number of processed chars.
  84. var x = 0
  85. var i = 0
  86. while i < n and i+start < input.len and input[i+start] in {'0'..'9'}:
  87. x = x * 10 + input[i+start].ord - '0'.ord
  88. inc i
  89. # only overwrite if we had a match
  90. if i == n:
  91. result = n
  92. intVal = x
  93. # match an ISO date extracting year, month, day at the same time.
  94. # Also ensure the input ends after the ISO date:
  95. var year, month, day: int
  96. if scanf("2013-01-03", "${ndigits(4)}-${ndigits(2)}-${ndigits(2)}$.", year, month, day):
  97. ...
  98. ```
  99. The scanp macro
  100. ===============
  101. This module also implements a ``scanp`` macro, which syntax somewhat resembles
  102. an EBNF or PEG grammar, except that it uses Nim's expression syntax and so has
  103. to use prefix instead of postfix operators.
  104. ============== ===============================================================
  105. ``(E)`` Grouping
  106. ``*E`` Zero or more
  107. ``+E`` One or more
  108. ``?E`` Zero or One
  109. ``E{n,m}`` From ``n`` up to ``m`` times ``E``
  110. ``~E`` Not predicate
  111. ``a ^* b`` Shortcut for ``?(a *(b a))``. Usually used for separators.
  112. ``a ^+ b`` Shortcut for ``?(a +(b a))``. Usually used for separators.
  113. ``'a'`` Matches a single character
  114. ``{'a'..'b'}`` Matches a character set
  115. ``"s"`` Matches a string
  116. ``E -> a`` Bind matching to some action
  117. ``$_`` Access the currently matched character
  118. ============== ===============================================================
  119. Note that unordered or ordered choice operators (``/``, ``|``) are
  120. not implemented.
  121. Simple example that parses the ``/etc/passwd`` file line by line:
  122. ```nim
  123. const
  124. etc_passwd = """root:x:0:0:root:/root:/bin/bash
  125. daemon:x:1:1:daemon:/usr/sbin:/bin/sh
  126. bin:x:2:2:bin:/bin:/bin/sh
  127. sys:x:3:3:sys:/dev:/bin/sh
  128. nobody:x:65534:65534:nobody:/nonexistent:/bin/sh
  129. messagebus:x:103:107::/var/run/dbus:/bin/false
  130. """
  131. proc parsePasswd(content: string): seq[string] =
  132. result = @[]
  133. var idx = 0
  134. while true:
  135. var entry = ""
  136. if scanp(content, idx, +(~{'\L', '\0'} -> entry.add($_)), '\L'):
  137. result.add entry
  138. else:
  139. break
  140. ```
  141. The ``scanp`` maps the grammar code into Nim code that performs the parsing.
  142. The parsing is performed with the help of 3 helper templates that that can be
  143. implemented for a custom type.
  144. These templates need to be named ``atom`` and ``nxt``. ``atom`` should be
  145. overloaded to handle both `char` and `set[char]`.
  146. ```nim
  147. import std/streams
  148. template atom(input: Stream; idx: int; c: char): bool =
  149. ## Used in scanp for the matching of atoms (usually chars).
  150. peekChar(input) == c
  151. template atom(input: Stream; idx: int; s: set[char]): bool =
  152. peekChar(input) in s
  153. template nxt(input: Stream; idx, step: int = 1) =
  154. inc(idx, step)
  155. setPosition(input, idx)
  156. if scanp(content, idx, +( ~{'\L', '\0'} -> entry.add(peekChar($input))), '\L'):
  157. result.add entry
  158. ```
  159. Calling ordinary Nim procs inside the macro is possible:
  160. ```nim
  161. proc digits(s: string; intVal: var int; start: int): int =
  162. var x = 0
  163. while result+start < s.len and s[result+start] in {'0'..'9'} and s[result+start] != ':':
  164. x = x * 10 + s[result+start].ord - '0'.ord
  165. inc result
  166. intVal = x
  167. proc extractUsers(content: string): seq[string] =
  168. # Extracts the username and home directory
  169. # of each entry (with UID greater than 1000)
  170. const
  171. digits = {'0'..'9'}
  172. result = @[]
  173. var idx = 0
  174. while true:
  175. var login = ""
  176. var uid = 0
  177. var homedir = ""
  178. if scanp(content, idx, *(~ {':', '\0'}) -> login.add($_), ':', * ~ ':', ':',
  179. digits($input, uid, $index), ':', *`digits`, ':', * ~ ':', ':',
  180. *('/', * ~{':', '/'}) -> homedir.add($_), ':', *('/', * ~{'\L', '/'}), '\L'):
  181. if uid >= 1000:
  182. result.add login & " " & homedir
  183. else:
  184. break
  185. ```
  186. When used for matching, keep in mind that likewise scanf, no backtracking
  187. is performed.
  188. ```nim
  189. proc skipUntil(s: string; until: string; unless = '\0'; start: int): int =
  190. # Skips all characters until the string `until` is found. Returns 0
  191. # if the char `unless` is found first or the end is reached.
  192. var i = start
  193. var u = 0
  194. while true:
  195. if i >= s.len or s[i] == unless:
  196. return 0
  197. elif s[i] == until[0]:
  198. u = 1
  199. while i+u < s.len and u < until.len and s[i+u] == until[u]:
  200. inc u
  201. if u >= until.len: break
  202. inc(i)
  203. result = i+u-start
  204. iterator collectLinks(s: string): string =
  205. const quote = {'\'', '"'}
  206. var idx, old = 0
  207. var res = ""
  208. while idx < s.len:
  209. old = idx
  210. if scanp(s, idx, "<a", skipUntil($input, "href=", '>', $index),
  211. `quote`, *( ~`quote`) -> res.add($_)):
  212. yield res
  213. res = ""
  214. idx = old + 1
  215. for r in collectLinks(body):
  216. echo r
  217. ```
  218. In this example both macros are combined seamlessly in order to maximise
  219. efficiency and perform different checks.
  220. ```nim
  221. iterator parseIps*(soup: string): string =
  222. ## ipv4 only!
  223. const digits = {'0'..'9'}
  224. var a, b, c, d: int
  225. var buf = ""
  226. var idx = 0
  227. while idx < soup.len:
  228. if scanp(soup, idx, (`digits`{1,3}, '.', `digits`{1,3}, '.',
  229. `digits`{1,3}, '.', `digits`{1,3}) -> buf.add($_)):
  230. discard buf.scanf("$i.$i.$i.$i", a, b, c, d)
  231. if (a >= 0 and a <= 254) and
  232. (b >= 0 and b <= 254) and
  233. (c >= 0 and c <= 254) and
  234. (d >= 0 and d <= 254):
  235. yield buf
  236. buf.setLen(0) # need to clear `buf` each time, cause it might contain garbage
  237. idx.inc
  238. ```
  239. ]##
  240. import macros, parseutils
  241. import std/private/since
  242. when defined(nimPreviewSlimSystem):
  243. import std/assertions
  244. proc conditionsToIfChain(n, idx, res: NimNode; start: int): NimNode =
  245. assert n.kind == nnkStmtList
  246. if start >= n.len: return newAssignment(res, newLit true)
  247. var ifs: NimNode = nil
  248. if n[start+1].kind == nnkEmpty:
  249. ifs = conditionsToIfChain(n, idx, res, start+3)
  250. else:
  251. ifs = newIfStmt((n[start+1],
  252. newTree(nnkStmtList, newCall(bindSym"inc", idx, n[start+2]),
  253. conditionsToIfChain(n, idx, res, start+3))))
  254. result = newTree(nnkStmtList, n[start], ifs)
  255. proc notZero(x: NimNode): NimNode = newCall(bindSym"!=", x, newLit 0)
  256. proc buildUserCall(x: string; args: varargs[NimNode]): NimNode =
  257. let y = parseExpr(x)
  258. result = newTree(nnkCall)
  259. if y.kind in nnkCallKinds: result.add y[0]
  260. else: result.add y
  261. for a in args: result.add a
  262. if y.kind in nnkCallKinds:
  263. for i in 1..<y.len: result.add y[i]
  264. macro scanf*(input: string; pattern: static[string]; results: varargs[typed]): bool =
  265. ## See top level documentation of this module about how ``scanf`` works.
  266. template matchBind(parser) {.dirty.} =
  267. var resLen = genSym(nskLet, "resLen")
  268. conds.add newLetStmt(resLen, newCall(bindSym(parser), inp, results[i], idx))
  269. conds.add resLen.notZero
  270. conds.add resLen
  271. template at(s: string; i: int): char = (if i < s.len: s[i] else: '\0')
  272. template matchError() =
  273. error("type mismatch between pattern '$" & pattern[p] & "' (position: " & $p &
  274. ") and " & $getTypeInst(results[i]) & " var '" & repr(results[i]) & "'")
  275. var i = 0
  276. var p = 0
  277. var idx = genSym(nskVar, "idx")
  278. var res = genSym(nskVar, "res")
  279. let inp = genSym(nskLet, "inp")
  280. result = newTree(nnkStmtListExpr, newLetStmt(inp, input),
  281. newVarStmt(idx, newLit 0), newVarStmt(res, newLit false))
  282. var conds = newTree(nnkStmtList)
  283. var fullMatch = false
  284. while p < pattern.len:
  285. if pattern[p] == '$':
  286. inc p
  287. case pattern[p]
  288. of '$':
  289. var resLen = genSym(nskLet, "resLen")
  290. conds.add newLetStmt(resLen, newCall(bindSym"skip", inp,
  291. newLit($pattern[p]), idx))
  292. conds.add resLen.notZero
  293. conds.add resLen
  294. of 'w':
  295. if i < results.len and getType(results[i]).typeKind == ntyString:
  296. matchBind "parseIdent"
  297. else:
  298. matchError
  299. inc i
  300. of 'c':
  301. if i < results.len and getType(results[i]).typeKind == ntyChar:
  302. matchBind "parseChar"
  303. else:
  304. matchError
  305. inc i
  306. of 'b':
  307. if i < results.len and getType(results[i]).typeKind == ntyInt:
  308. matchBind "parseBin"
  309. else:
  310. matchError
  311. inc i
  312. of 'o':
  313. if i < results.len and getType(results[i]).typeKind == ntyInt:
  314. matchBind "parseOct"
  315. else:
  316. matchError
  317. inc i
  318. of 'i':
  319. if i < results.len and getType(results[i]).typeKind == ntyInt:
  320. matchBind "parseInt"
  321. else:
  322. matchError
  323. inc i
  324. of 'h':
  325. if i < results.len and getType(results[i]).typeKind == ntyInt:
  326. matchBind "parseHex"
  327. else:
  328. matchError
  329. inc i
  330. of 'f':
  331. if i < results.len and getType(results[i]).typeKind == ntyFloat:
  332. matchBind "parseFloat"
  333. else:
  334. matchError
  335. inc i
  336. of 's':
  337. conds.add newCall(bindSym"inc", idx,
  338. newCall(bindSym"skipWhitespace", inp, idx))
  339. conds.add newEmptyNode()
  340. conds.add newEmptyNode()
  341. of '.':
  342. if p == pattern.len-1:
  343. fullMatch = true
  344. else:
  345. error("invalid format string")
  346. of '*', '+':
  347. if i < results.len and getType(results[i]).typeKind == ntyString:
  348. var min = ord(pattern[p] == '+')
  349. var q = p+1
  350. var token = ""
  351. while q < pattern.len and pattern[q] != '$':
  352. token.add pattern[q]
  353. inc q
  354. var resLen = genSym(nskLet, "resLen")
  355. conds.add newLetStmt(resLen, newCall(bindSym"parseUntil", inp,
  356. results[i], newLit(token), idx))
  357. conds.add newCall(bindSym">=", resLen, newLit min)
  358. conds.add resLen
  359. else:
  360. matchError
  361. inc i
  362. of '{':
  363. inc p
  364. var nesting = 0
  365. let start = p
  366. while true:
  367. case pattern.at(p)
  368. of '{': inc nesting
  369. of '}':
  370. if nesting == 0: break
  371. dec nesting
  372. of '\0': error("expected closing '}'")
  373. else: discard
  374. inc p
  375. let expr = pattern.substr(start, p-1)
  376. if i < results.len:
  377. var resLen = genSym(nskLet, "resLen")
  378. conds.add newLetStmt(resLen, buildUserCall(expr, inp, results[i], idx))
  379. conds.add newCall(bindSym"!=", resLen, newLit 0)
  380. conds.add resLen
  381. else:
  382. error("no var given for $" & expr & " (position: " & $p & ")")
  383. inc i
  384. of '[':
  385. inc p
  386. var nesting = 0
  387. let start = p
  388. while true:
  389. case pattern.at(p)
  390. of '[': inc nesting
  391. of ']':
  392. if nesting == 0: break
  393. dec nesting
  394. of '\0': error("expected closing ']'")
  395. else: discard
  396. inc p
  397. let expr = pattern.substr(start, p-1)
  398. conds.add newCall(bindSym"inc", idx, buildUserCall(expr, inp, idx))
  399. conds.add newEmptyNode()
  400. conds.add newEmptyNode()
  401. else: error("invalid format string")
  402. inc p
  403. else:
  404. var token = ""
  405. while p < pattern.len and pattern[p] != '$':
  406. token.add pattern[p]
  407. inc p
  408. var resLen = genSym(nskLet, "resLen")
  409. conds.add newLetStmt(resLen, newCall(bindSym"skip", inp, newLit(token), idx))
  410. conds.add resLen.notZero
  411. conds.add resLen
  412. result.add conditionsToIfChain(conds, idx, res, 0)
  413. if fullMatch:
  414. result.add newCall(bindSym"and", res,
  415. newCall(bindSym">=", idx, newCall(bindSym"len", inp)))
  416. else:
  417. result.add res
  418. macro scanTuple*(input: untyped; pattern: static[string]; matcherTypes: varargs[untyped]): untyped {.since: (1, 5).}=
  419. ## Works identically as scanf, but instead of predeclaring variables it returns a tuple.
  420. ## Tuple is started with a bool which indicates if the scan was successful
  421. ## followed by the requested data.
  422. ## If using a user defined matcher, provide the types in order they appear after pattern:
  423. ## `line.scanTuple("${yourMatcher()}", int)`
  424. runnableExamples:
  425. let (success, year, month, day, time) = scanTuple("1000-01-01 00:00:00", "$i-$i-$i$s$+")
  426. if success:
  427. assert year == 1000
  428. assert month == 1
  429. assert day == 1
  430. assert time == "00:00:00"
  431. var
  432. p = 0
  433. userMatches = 0
  434. arguments: seq[NimNode]
  435. result = newStmtList()
  436. template addVar(typ: string) =
  437. let varIdent = ident("temp" & $arguments.len)
  438. result.add(newNimNode(nnkVarSection).add(newIdentDefs(varIdent, ident(typ), newEmptyNode())))
  439. arguments.add(varIdent)
  440. while p < pattern.len:
  441. if pattern[p] == '$':
  442. inc p
  443. case pattern[p]
  444. of 'w', '*', '+':
  445. addVar("string")
  446. of 'c':
  447. addVar("char")
  448. of 'b', 'o', 'i', 'h':
  449. addVar("int")
  450. of 'f':
  451. addVar("float")
  452. of '{':
  453. if userMatches < matcherTypes.len:
  454. let varIdent = ident("temp" & $arguments.len)
  455. result.add(newNimNode(nnkVarSection).add(newIdentDefs(varIdent, matcherTypes[userMatches], newEmptyNode())))
  456. arguments.add(varIdent)
  457. inc userMatches
  458. else: discard
  459. inc p
  460. result.add nnkTupleConstr.newTree(newCall(ident("scanf"), input, newStrLitNode(pattern)))
  461. for arg in arguments:
  462. result[^1][0].add arg
  463. result[^1].add arg
  464. result = newBlockStmt(result)
  465. template atom*(input: string; idx: int; c: char): bool =
  466. ## Used in scanp for the matching of atoms (usually chars).
  467. ## EOF is matched as ``'\0'``.
  468. (idx < input.len and input[idx] == c) or (idx == input.len and c == '\0')
  469. template atom*(input: string; idx: int; s: set[char]): bool =
  470. (idx < input.len and input[idx] in s) or (idx == input.len and '\0' in s)
  471. template hasNxt*(input: string; idx: int): bool = idx < input.len
  472. #template prepare*(input: string): int = 0
  473. template success*(x: int): bool = x != 0
  474. template nxt*(input: string; idx, step: int = 1) = inc(idx, step)
  475. macro scanp*(input, idx: typed; pattern: varargs[untyped]): bool =
  476. ## See top level documentation of this module about how ``scanp`` works.
  477. type StmtTriple = tuple[init, cond, action: NimNode]
  478. template interf(x): untyped = bindSym(x, brForceOpen)
  479. proc toIfChain(n: seq[StmtTriple]; idx, res: NimNode; start: int): NimNode =
  480. if start >= n.len: return newAssignment(res, newLit true)
  481. var ifs: NimNode = nil
  482. if n[start].cond.kind == nnkEmpty:
  483. ifs = toIfChain(n, idx, res, start+1)
  484. else:
  485. ifs = newIfStmt((n[start].cond,
  486. newTree(nnkStmtList, n[start].action,
  487. toIfChain(n, idx, res, start+1))))
  488. result = newTree(nnkStmtList, n[start].init, ifs)
  489. proc attach(x, attached: NimNode): NimNode =
  490. if attached == nil: x
  491. else: newStmtList(attached, x)
  492. proc placeholder(n, x, j: NimNode): NimNode =
  493. if n.kind == nnkPrefix and n[0].eqIdent("$"):
  494. let n1 = n[1]
  495. if n1.eqIdent"_" or n1.eqIdent"current":
  496. result = newTree(nnkBracketExpr, x, j)
  497. elif n1.eqIdent"input":
  498. result = x
  499. elif n1.eqIdent"i" or n1.eqIdent"index":
  500. result = j
  501. else:
  502. error("unknown pattern " & repr(n))
  503. else:
  504. result = copyNimNode(n)
  505. for i in 0 ..< n.len:
  506. result.add placeholder(n[i], x, j)
  507. proc atm(it, input, idx, attached: NimNode): StmtTriple =
  508. template `!!`(x): untyped = attach(x, attached)
  509. case it.kind
  510. of nnkIdent:
  511. var resLen = genSym(nskLet, "resLen")
  512. result = (newLetStmt(resLen, newCall(it, input, idx)),
  513. newCall(interf"success", resLen),
  514. !!newCall(interf"nxt", input, idx, resLen))
  515. of nnkCallKinds:
  516. # *{'A'..'Z'} !! s.add(!_)
  517. template buildWhile(input, idx, init, cond, action): untyped =
  518. mixin hasNxt
  519. while hasNxt(input, idx):
  520. init
  521. if not cond: break
  522. action
  523. # (x) a # bind action a to (x)
  524. if it[0].kind in {nnkPar, nnkTupleConstr} and it.len == 2:
  525. result = atm(it[0], input, idx, placeholder(it[1], input, idx))
  526. elif it.kind == nnkInfix and it[0].eqIdent"->":
  527. # bind matching to some action:
  528. result = atm(it[1], input, idx, placeholder(it[2], input, idx))
  529. elif it.kind == nnkInfix and it[0].eqIdent"as":
  530. let cond = if it[1].kind in nnkCallKinds: placeholder(it[1], input, idx)
  531. else: newCall(it[1], input, idx)
  532. result = (newLetStmt(it[2], cond),
  533. newCall(interf"success", it[2]),
  534. !!newCall(interf"nxt", input, idx, it[2]))
  535. elif it.kind == nnkPrefix and it[0].eqIdent"*":
  536. let (init, cond, action) = atm(it[1], input, idx, attached)
  537. result = (getAst(buildWhile(input, idx, init, cond, action)),
  538. newEmptyNode(), newEmptyNode())
  539. elif it.kind == nnkPrefix and it[0].eqIdent"+":
  540. # x+ is the same as xx*
  541. result = atm(newTree(nnkTupleConstr, it[1], newTree(nnkPrefix, ident"*", it[1])),
  542. input, idx, attached)
  543. elif it.kind == nnkPrefix and it[0].eqIdent"?":
  544. # optional.
  545. let (init, cond, action) = atm(it[1], input, idx, attached)
  546. if cond.kind == nnkEmpty:
  547. error("'?' operator applied to a non-condition")
  548. else:
  549. result = (newTree(nnkStmtList, init, newIfStmt((cond, action))),
  550. newEmptyNode(), newEmptyNode())
  551. elif it.kind == nnkPrefix and it[0].eqIdent"~":
  552. # not operator
  553. let (init, cond, action) = atm(it[1], input, idx, attached)
  554. if cond.kind == nnkEmpty:
  555. error("'~' operator applied to a non-condition")
  556. else:
  557. result = (init, newCall(bindSym"not", cond), action)
  558. elif it.kind == nnkInfix and it[0].eqIdent"|":
  559. let a = atm(it[1], input, idx, attached)
  560. let b = atm(it[2], input, idx, attached)
  561. if a.cond.kind == nnkEmpty or b.cond.kind == nnkEmpty:
  562. error("'|' operator applied to a non-condition")
  563. else:
  564. result = (newStmtList(a.init, newIfStmt((a.cond, a.action),
  565. (newTree(nnkStmtListExpr, b.init, b.cond), b.action))),
  566. newEmptyNode(), newEmptyNode())
  567. elif it.kind == nnkInfix and it[0].eqIdent"^*":
  568. # a ^* b is rewritten to: (a *(b a))?
  569. #exprList = expr ^+ comma
  570. template tmp(a, b): untyped = ?(a, *(b, a))
  571. result = atm(getAst(tmp(it[1], it[2])), input, idx, attached)
  572. elif it.kind == nnkInfix and it[0].eqIdent"^+":
  573. # a ^* b is rewritten to: (a +(b a))?
  574. template tmp(a, b): untyped = (a, *(b, a))
  575. result = atm(getAst(tmp(it[1], it[2])), input, idx, attached)
  576. elif it.kind == nnkCommand and it.len == 2 and it[0].eqIdent"pred":
  577. # enforce that the wrapped call is interpreted as a predicate, not a non-terminal:
  578. result = (newEmptyNode(), placeholder(it[1], input, idx), newEmptyNode())
  579. else:
  580. var resLen = genSym(nskLet, "resLen")
  581. result = (newLetStmt(resLen, placeholder(it, input, idx)),
  582. newCall(interf"success", resLen),
  583. !!newCall(interf"nxt", input, idx, resLen))
  584. of nnkStrLit..nnkTripleStrLit:
  585. var resLen = genSym(nskLet, "resLen")
  586. result = (newLetStmt(resLen, newCall(interf"skip", input, it, idx)),
  587. newCall(interf"success", resLen),
  588. !!newCall(interf"nxt", input, idx, resLen))
  589. of nnkCurly, nnkAccQuoted, nnkCharLit:
  590. result = (newEmptyNode(), newCall(interf"atom", input, idx, it),
  591. !!newCall(interf"nxt", input, idx))
  592. of nnkCurlyExpr:
  593. if it.len == 3 and it[1].kind == nnkIntLit and it[2].kind == nnkIntLit:
  594. var h = newTree(nnkTupleConstr, it[0])
  595. for count in 2i64 .. it[1].intVal: h.add(it[0])
  596. for count in it[1].intVal .. it[2].intVal-1:
  597. h.add(newTree(nnkPrefix, ident"?", it[0]))
  598. result = atm(h, input, idx, attached)
  599. elif it.len == 2 and it[1].kind == nnkIntLit:
  600. var h = newTree(nnkTupleConstr, it[0])
  601. for count in 2i64 .. it[1].intVal: h.add(it[0])
  602. result = atm(h, input, idx, attached)
  603. else:
  604. error("invalid pattern")
  605. of nnkPar, nnkTupleConstr:
  606. if it.len == 1 and it.kind == nnkPar:
  607. result = atm(it[0], input, idx, attached)
  608. else:
  609. # concatenation:
  610. var conds: seq[StmtTriple] = @[]
  611. for x in it: conds.add atm(x, input, idx, attached)
  612. var res = genSym(nskVar, "res")
  613. result = (newStmtList(newVarStmt(res, newLit false),
  614. toIfChain(conds, idx, res, 0)), res, newEmptyNode())
  615. else:
  616. error("invalid pattern")
  617. #var idx = genSym(nskVar, "idx")
  618. var res = genSym(nskVar, "res")
  619. result = newTree(nnkStmtListExpr, #newVarStmt(idx, newCall(interf"prepare", input)),
  620. newVarStmt(res, newLit false))
  621. var conds: seq[StmtTriple] = @[]
  622. for it in pattern:
  623. conds.add atm(it, input, idx, nil)
  624. result.add toIfChain(conds, idx, res, 0)
  625. result.add res
  626. when defined(debugScanp):
  627. echo repr result