strscans.nim 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2016 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ##[
  10. This module contains a `scanf`:idx: macro that can be used for extracting
  11. substrings from an input string. This is often easier than regular expressions.
  12. Some examples as an appetizer:
  13. .. code-block:: nim
  14. # check if input string matches a triple of integers:
  15. const input = "(1,2,4)"
  16. var x, y, z: int
  17. if scanf(input, "($i,$i,$i)", x, y, z):
  18. echo "matches and x is ", x, " y is ", y, " z is ", z
  19. # check if input string matches an ISO date followed by an identifier followed
  20. # by whitespace and a floating point number:
  21. var year, month, day: int
  22. var identifier: string
  23. var myfloat: float
  24. if scanf(input, "$i-$i-$i $w$s$f", year, month, day, identifier, myfloat):
  25. echo "yes, we have a match!"
  26. As can be seen from the examples, strings are matched verbatim except for
  27. substrings starting with ``$``. These constructions are available:
  28. ================= ========================================================
  29. ``$b`` Matches a binary integer. This uses ``parseutils.parseBin``.
  30. ``$o`` Matches an octal integer. This uses ``parseutils.parseOct``.
  31. ``$i`` Matches a decimal integer. This uses ``parseutils.parseInt``.
  32. ``$h`` Matches a hex integer. This uses ``parseutils.parseHex``.
  33. ``$f`` Matches a floating-point number. Uses ``parseFloat``.
  34. ``$w`` Matches an ASCII identifier: ``[A-Za-z_][A-Za-z_0-9]*``.
  35. ``$c`` Matches a single ASCII character.
  36. ``$s`` Skips optional whitespace.
  37. ``$$`` Matches a single dollar sign.
  38. ``$.`` Matches if the end of the input string has been reached.
  39. ``$*`` Matches until the token following the ``$*`` was found.
  40. The match is allowed to be of 0 length.
  41. ``$+`` Matches until the token following the ``$+`` was found.
  42. The match must consist of at least one char.
  43. ``${foo}`` User defined matcher. Uses the proc ``foo`` to perform
  44. the match. See below for more details.
  45. ``$[foo]`` Call user defined proc ``foo`` to **skip** some optional
  46. parts in the input string. See below for more details.
  47. ================= ========================================================
  48. Even though ``$*`` and ``$+`` look similar to the regular expressions ``.*``
  49. and ``.+``, they work quite differently. There is no non-deterministic
  50. state machine involved and the matches are non-greedy. ``[$*]``
  51. matches ``[xyz]`` via ``parseutils.parseUntil``.
  52. Furthermore no backtracking is performed, if parsing fails after a value
  53. has already been bound to a matched subexpression this value is not restored
  54. to its original value. This rarely causes problems in practice and if it does
  55. for you, it's easy enough to bind to a temporary variable first.
  56. Startswith vs full match
  57. ========================
  58. ``scanf`` returns true if the input string **starts with** the specified
  59. pattern. If instead it should only return true if there is also nothing
  60. left in the input, append ``$.`` to your pattern.
  61. User definable matchers
  62. =======================
  63. One very nice advantage over regular expressions is that ``scanf`` is
  64. extensible with ordinary Nim procs. The proc is either enclosed in ``${}``
  65. or in ``$[]``. ``${}`` matches and binds the result
  66. to a variable (that was passed to the ``scanf`` macro) while ``$[]`` merely
  67. matches optional tokens without any result binding.
  68. In this example, we define a helper proc ``someSep`` that skips some separators
  69. which we then use in our scanf pattern to help us in the matching process:
  70. .. code-block:: nim
  71. proc someSep(input: string; start: int; seps: set[char] = {':','-','.'}): int =
  72. # Note: The parameters and return value must match to what ``scanf`` requires
  73. result = 0
  74. while start+result < input.len and input[start+result] in seps: inc result
  75. if scanf(input, "$w$[someSep]$w", key, value):
  76. ...
  77. It also possible to pass arguments to a user definable matcher:
  78. .. code-block:: nim
  79. proc ndigits(input: string; intVal: var int; start: int; n: int): int =
  80. # matches exactly ``n`` digits. Matchers need to return 0 if nothing
  81. # matched or otherwise the number of processed chars.
  82. var x = 0
  83. var i = 0
  84. while i < n and i+start < input.len and input[i+start] in {'0'..'9'}:
  85. x = x * 10 + input[i+start].ord - '0'.ord
  86. inc i
  87. # only overwrite if we had a match
  88. if i == n:
  89. result = n
  90. intVal = x
  91. # match an ISO date extracting year, month, day at the same time.
  92. # Also ensure the input ends after the ISO date:
  93. var year, month, day: int
  94. if scanf("2013-01-03", "${ndigits(4)}-${ndigits(2)}-${ndigits(2)}$.", year, month, day):
  95. ...
  96. The scanp macro
  97. ===============
  98. This module also implements a ``scanp`` macro, which syntax somewhat resembles
  99. an EBNF or PEG grammar, except that it uses Nim's expression syntax and so has
  100. to use prefix instead of postfix operators.
  101. ============== ===============================================================
  102. ``(E)`` Grouping
  103. ``*E`` Zero or more
  104. ``+E`` One or more
  105. ``?E`` Zero or One
  106. ``E{n,m}`` From ``n`` up to ``m`` times ``E``
  107. ``~E`` Not predicate
  108. ``a ^* b`` Shortcut for ``?(a *(b a))``. Usually used for separators.
  109. ``a ^+ b`` Shortcut for ``?(a +(b a))``. Usually used for separators.
  110. ``'a'`` Matches a single character
  111. ``{'a'..'b'}`` Matches a character set
  112. ``"s"`` Matches a string
  113. ``E -> a`` Bind matching to some action
  114. ``$_`` Access the currently matched character
  115. ============== ===============================================================
  116. Note that unordered or ordered choice operators (``/``, ``|``) are
  117. not implemented.
  118. Simple example that parses the ``/etc/passwd`` file line by line:
  119. .. code-block:: nim
  120. const
  121. etc_passwd = """root:x:0:0:root:/root:/bin/bash
  122. daemon:x:1:1:daemon:/usr/sbin:/bin/sh
  123. bin:x:2:2:bin:/bin:/bin/sh
  124. sys:x:3:3:sys:/dev:/bin/sh
  125. nobody:x:65534:65534:nobody:/nonexistent:/bin/sh
  126. messagebus:x:103:107::/var/run/dbus:/bin/false
  127. """
  128. proc parsePasswd(content: string): seq[string] =
  129. result = @[]
  130. var idx = 0
  131. while true:
  132. var entry = ""
  133. if scanp(content, idx, +(~{'\L', '\0'} -> entry.add($_)), '\L'):
  134. result.add entry
  135. else:
  136. break
  137. The ``scanp`` maps the grammar code into Nim code that performs the parsing.
  138. The parsing is performed with the help of 3 helper templates that that can be
  139. implemented for a custom type.
  140. These templates need to be named ``atom`` and ``nxt``. ``atom`` should be
  141. overloaded to handle both single characters and sets of character.
  142. .. code-block:: nim
  143. import std/streams
  144. template atom(input: Stream; idx: int; c: char): bool =
  145. ## Used in scanp for the matching of atoms (usually chars).
  146. peekChar(input) == c
  147. template atom(input: Stream; idx: int; s: set[char]): bool =
  148. peekChar(input) in s
  149. template nxt(input: Stream; idx, step: int = 1) =
  150. inc(idx, step)
  151. setPosition(input, idx)
  152. if scanp(content, idx, +( ~{'\L', '\0'} -> entry.add(peekChar($input))), '\L'):
  153. result.add entry
  154. Calling ordinary Nim procs inside the macro is possible:
  155. .. code-block:: nim
  156. proc digits(s: string; intVal: var int; start: int): int =
  157. var x = 0
  158. while result+start < s.len and s[result+start] in {'0'..'9'} and s[result+start] != ':':
  159. x = x * 10 + s[result+start].ord - '0'.ord
  160. inc result
  161. intVal = x
  162. proc extractUsers(content: string): seq[string] =
  163. # Extracts the username and home directory
  164. # of each entry (with UID greater than 1000)
  165. const
  166. digits = {'0'..'9'}
  167. result = @[]
  168. var idx = 0
  169. while true:
  170. var login = ""
  171. var uid = 0
  172. var homedir = ""
  173. if scanp(content, idx, *(~ {':', '\0'}) -> login.add($_), ':', * ~ ':', ':',
  174. digits($input, uid, $index), ':', *`digits`, ':', * ~ ':', ':',
  175. *('/', * ~{':', '/'}) -> homedir.add($_), ':', *('/', * ~{'\L', '/'}), '\L'):
  176. if uid >= 1000:
  177. result.add login & " " & homedir
  178. else:
  179. break
  180. When used for matching, keep in mind that likewise scanf, no backtracking
  181. is performed.
  182. .. code-block:: nim
  183. proc skipUntil(s: string; until: string; unless = '\0'; start: int): int =
  184. # Skips all characters until the string `until` is found. Returns 0
  185. # if the char `unless` is found first or the end is reached.
  186. var i = start
  187. var u = 0
  188. while true:
  189. if i >= s.len or s[i] == unless:
  190. return 0
  191. elif s[i] == until[0]:
  192. u = 1
  193. while i+u < s.len and u < until.len and s[i+u] == until[u]:
  194. inc u
  195. if u >= until.len: break
  196. inc(i)
  197. result = i+u-start
  198. iterator collectLinks(s: string): string =
  199. const quote = {'\'', '"'}
  200. var idx, old = 0
  201. var res = ""
  202. while idx < s.len:
  203. old = idx
  204. if scanp(s, idx, "<a", skipUntil($input, "href=", '>', $index),
  205. `quote`, *( ~`quote`) -> res.add($_)):
  206. yield res
  207. res = ""
  208. idx = old + 1
  209. for r in collectLinks(body):
  210. echo r
  211. In this example both macros are combined seamlessly in order to maximise
  212. efficiency and perform different checks.
  213. .. code-block:: nim
  214. iterator parseIps*(soup: string): string =
  215. ## ipv4 only!
  216. const digits = {'0'..'9'}
  217. var a, b, c, d: int
  218. var buf = ""
  219. var idx = 0
  220. while idx < soup.len:
  221. if scanp(soup, idx, (`digits`{1,3}, '.', `digits`{1,3}, '.',
  222. `digits`{1,3}, '.', `digits`{1,3}) -> buf.add($_)):
  223. discard buf.scanf("$i.$i.$i.$i", a, b, c, d)
  224. if (a >= 0 and a <= 254) and
  225. (b >= 0 and b <= 254) and
  226. (c >= 0 and c <= 254) and
  227. (d >= 0 and d <= 254):
  228. yield buf
  229. buf.setLen(0) # need to clear `buf` each time, cause it might contain garbage
  230. idx.inc
  231. ]##
  232. import macros, parseutils
  233. import std/private/since
  234. when defined(nimPreviewSlimSystem):
  235. import std/assertions
  236. proc conditionsToIfChain(n, idx, res: NimNode; start: int): NimNode =
  237. assert n.kind == nnkStmtList
  238. if start >= n.len: return newAssignment(res, newLit true)
  239. var ifs: NimNode = nil
  240. if n[start+1].kind == nnkEmpty:
  241. ifs = conditionsToIfChain(n, idx, res, start+3)
  242. else:
  243. ifs = newIfStmt((n[start+1],
  244. newTree(nnkStmtList, newCall(bindSym"inc", idx, n[start+2]),
  245. conditionsToIfChain(n, idx, res, start+3))))
  246. result = newTree(nnkStmtList, n[start], ifs)
  247. proc notZero(x: NimNode): NimNode = newCall(bindSym"!=", x, newLit 0)
  248. proc buildUserCall(x: string; args: varargs[NimNode]): NimNode =
  249. let y = parseExpr(x)
  250. result = newTree(nnkCall)
  251. if y.kind in nnkCallKinds: result.add y[0]
  252. else: result.add y
  253. for a in args: result.add a
  254. if y.kind in nnkCallKinds:
  255. for i in 1..<y.len: result.add y[i]
  256. macro scanf*(input: string; pattern: static[string]; results: varargs[typed]): bool =
  257. ## See top level documentation of this module about how ``scanf`` works.
  258. template matchBind(parser) {.dirty.} =
  259. var resLen = genSym(nskLet, "resLen")
  260. conds.add newLetStmt(resLen, newCall(bindSym(parser), inp, results[i], idx))
  261. conds.add resLen.notZero
  262. conds.add resLen
  263. template at(s: string; i: int): char = (if i < s.len: s[i] else: '\0')
  264. template matchError() =
  265. error("type mismatch between pattern '$" & pattern[p] & "' (position: " & $p &
  266. ") and " & $getTypeInst(results[i]) & " var '" & repr(results[i]) & "'")
  267. var i = 0
  268. var p = 0
  269. var idx = genSym(nskVar, "idx")
  270. var res = genSym(nskVar, "res")
  271. let inp = genSym(nskLet, "inp")
  272. result = newTree(nnkStmtListExpr, newLetStmt(inp, input),
  273. newVarStmt(idx, newLit 0), newVarStmt(res, newLit false))
  274. var conds = newTree(nnkStmtList)
  275. var fullMatch = false
  276. while p < pattern.len:
  277. if pattern[p] == '$':
  278. inc p
  279. case pattern[p]
  280. of '$':
  281. var resLen = genSym(nskLet, "resLen")
  282. conds.add newLetStmt(resLen, newCall(bindSym"skip", inp,
  283. newLit($pattern[p]), idx))
  284. conds.add resLen.notZero
  285. conds.add resLen
  286. of 'w':
  287. if i < results.len and getType(results[i]).typeKind == ntyString:
  288. matchBind "parseIdent"
  289. else:
  290. matchError
  291. inc i
  292. of 'c':
  293. if i < results.len and getType(results[i]).typeKind == ntyChar:
  294. matchBind "parseChar"
  295. else:
  296. matchError
  297. inc i
  298. of 'b':
  299. if i < results.len and getType(results[i]).typeKind == ntyInt:
  300. matchBind "parseBin"
  301. else:
  302. matchError
  303. inc i
  304. of 'o':
  305. if i < results.len and getType(results[i]).typeKind == ntyInt:
  306. matchBind "parseOct"
  307. else:
  308. matchError
  309. inc i
  310. of 'i':
  311. if i < results.len and getType(results[i]).typeKind == ntyInt:
  312. matchBind "parseInt"
  313. else:
  314. matchError
  315. inc i
  316. of 'h':
  317. if i < results.len and getType(results[i]).typeKind == ntyInt:
  318. matchBind "parseHex"
  319. else:
  320. matchError
  321. inc i
  322. of 'f':
  323. if i < results.len and getType(results[i]).typeKind == ntyFloat:
  324. matchBind "parseFloat"
  325. else:
  326. matchError
  327. inc i
  328. of 's':
  329. conds.add newCall(bindSym"inc", idx,
  330. newCall(bindSym"skipWhitespace", inp, idx))
  331. conds.add newEmptyNode()
  332. conds.add newEmptyNode()
  333. of '.':
  334. if p == pattern.len-1:
  335. fullMatch = true
  336. else:
  337. error("invalid format string")
  338. of '*', '+':
  339. if i < results.len and getType(results[i]).typeKind == ntyString:
  340. var min = ord(pattern[p] == '+')
  341. var q = p+1
  342. var token = ""
  343. while q < pattern.len and pattern[q] != '$':
  344. token.add pattern[q]
  345. inc q
  346. var resLen = genSym(nskLet, "resLen")
  347. conds.add newLetStmt(resLen, newCall(bindSym"parseUntil", inp,
  348. results[i], newLit(token), idx))
  349. conds.add newCall(bindSym">=", resLen, newLit min)
  350. conds.add resLen
  351. else:
  352. matchError
  353. inc i
  354. of '{':
  355. inc p
  356. var nesting = 0
  357. let start = p
  358. while true:
  359. case pattern.at(p)
  360. of '{': inc nesting
  361. of '}':
  362. if nesting == 0: break
  363. dec nesting
  364. of '\0': error("expected closing '}'")
  365. else: discard
  366. inc p
  367. let expr = pattern.substr(start, p-1)
  368. if i < results.len:
  369. var resLen = genSym(nskLet, "resLen")
  370. conds.add newLetStmt(resLen, buildUserCall(expr, inp, results[i], idx))
  371. conds.add newCall(bindSym"!=", resLen, newLit 0)
  372. conds.add resLen
  373. else:
  374. error("no var given for $" & expr & " (position: " & $p & ")")
  375. inc i
  376. of '[':
  377. inc p
  378. var nesting = 0
  379. let start = p
  380. while true:
  381. case pattern.at(p)
  382. of '[': inc nesting
  383. of ']':
  384. if nesting == 0: break
  385. dec nesting
  386. of '\0': error("expected closing ']'")
  387. else: discard
  388. inc p
  389. let expr = pattern.substr(start, p-1)
  390. conds.add newCall(bindSym"inc", idx, buildUserCall(expr, inp, idx))
  391. conds.add newEmptyNode()
  392. conds.add newEmptyNode()
  393. else: error("invalid format string")
  394. inc p
  395. else:
  396. var token = ""
  397. while p < pattern.len and pattern[p] != '$':
  398. token.add pattern[p]
  399. inc p
  400. var resLen = genSym(nskLet, "resLen")
  401. conds.add newLetStmt(resLen, newCall(bindSym"skip", inp, newLit(token), idx))
  402. conds.add resLen.notZero
  403. conds.add resLen
  404. result.add conditionsToIfChain(conds, idx, res, 0)
  405. if fullMatch:
  406. result.add newCall(bindSym"and", res,
  407. newCall(bindSym">=", idx, newCall(bindSym"len", inp)))
  408. else:
  409. result.add res
  410. macro scanTuple*(input: untyped; pattern: static[string]; matcherTypes: varargs[untyped]): untyped {.since: (1, 5).}=
  411. ## Works identically as scanf, but instead of predeclaring variables it returns a tuple.
  412. ## Tuple is started with a bool which indicates if the scan was successful
  413. ## followed by the requested data.
  414. ## If using a user defined matcher, provide the types in order they appear after pattern:
  415. ## `line.scanTuple("${yourMatcher()}", int)`
  416. runnableExamples:
  417. let (success, year, month, day, time) = scanTuple("1000-01-01 00:00:00", "$i-$i-$i$s$+")
  418. if success:
  419. assert year == 1000
  420. assert month == 1
  421. assert day == 1
  422. assert time == "00:00:00"
  423. var
  424. p = 0
  425. userMatches = 0
  426. arguments: seq[NimNode]
  427. result = newStmtList()
  428. template addVar(typ: string) =
  429. let varIdent = ident("temp" & $arguments.len)
  430. result.add(newNimNode(nnkVarSection).add(newIdentDefs(varIdent, ident(typ), newEmptyNode())))
  431. arguments.add(varIdent)
  432. while p < pattern.len:
  433. if pattern[p] == '$':
  434. inc p
  435. case pattern[p]
  436. of 'w', '*', '+':
  437. addVar("string")
  438. of 'c':
  439. addVar("char")
  440. of 'b', 'o', 'i', 'h':
  441. addVar("int")
  442. of 'f':
  443. addVar("float")
  444. of '{':
  445. if userMatches < matcherTypes.len:
  446. let varIdent = ident("temp" & $arguments.len)
  447. result.add(newNimNode(nnkVarSection).add(newIdentDefs(varIdent, matcherTypes[userMatches], newEmptyNode())))
  448. arguments.add(varIdent)
  449. inc userMatches
  450. else: discard
  451. inc p
  452. result.add nnkTupleConstr.newTree(newCall(ident("scanf"), input, newStrLitNode(pattern)))
  453. for arg in arguments:
  454. result[^1][0].add arg
  455. result[^1].add arg
  456. result = newBlockStmt(result)
  457. template atom*(input: string; idx: int; c: char): bool =
  458. ## Used in scanp for the matching of atoms (usually chars).
  459. ## EOF is matched as ``'\0'``.
  460. (idx < input.len and input[idx] == c) or (idx == input.len and c == '\0')
  461. template atom*(input: string; idx: int; s: set[char]): bool =
  462. (idx < input.len and input[idx] in s) or (idx == input.len and '\0' in s)
  463. template hasNxt*(input: string; idx: int): bool = idx < input.len
  464. #template prepare*(input: string): int = 0
  465. template success*(x: int): bool = x != 0
  466. template nxt*(input: string; idx, step: int = 1) = inc(idx, step)
  467. macro scanp*(input, idx: typed; pattern: varargs[untyped]): bool =
  468. ## See top level documentation of this module about how ``scanp`` works.
  469. type StmtTriple = tuple[init, cond, action: NimNode]
  470. template interf(x): untyped = bindSym(x, brForceOpen)
  471. proc toIfChain(n: seq[StmtTriple]; idx, res: NimNode; start: int): NimNode =
  472. if start >= n.len: return newAssignment(res, newLit true)
  473. var ifs: NimNode = nil
  474. if n[start].cond.kind == nnkEmpty:
  475. ifs = toIfChain(n, idx, res, start+1)
  476. else:
  477. ifs = newIfStmt((n[start].cond,
  478. newTree(nnkStmtList, n[start].action,
  479. toIfChain(n, idx, res, start+1))))
  480. result = newTree(nnkStmtList, n[start].init, ifs)
  481. proc attach(x, attached: NimNode): NimNode =
  482. if attached == nil: x
  483. else: newStmtList(attached, x)
  484. proc placeholder(n, x, j: NimNode): NimNode =
  485. if n.kind == nnkPrefix and n[0].eqIdent("$"):
  486. let n1 = n[1]
  487. if n1.eqIdent"_" or n1.eqIdent"current":
  488. result = newTree(nnkBracketExpr, x, j)
  489. elif n1.eqIdent"input":
  490. result = x
  491. elif n1.eqIdent"i" or n1.eqIdent"index":
  492. result = j
  493. else:
  494. error("unknown pattern " & repr(n))
  495. else:
  496. result = copyNimNode(n)
  497. for i in 0 ..< n.len:
  498. result.add placeholder(n[i], x, j)
  499. proc atm(it, input, idx, attached: NimNode): StmtTriple =
  500. template `!!`(x): untyped = attach(x, attached)
  501. case it.kind
  502. of nnkIdent:
  503. var resLen = genSym(nskLet, "resLen")
  504. result = (newLetStmt(resLen, newCall(it, input, idx)),
  505. newCall(interf"success", resLen),
  506. !!newCall(interf"nxt", input, idx, resLen))
  507. of nnkCallKinds:
  508. # *{'A'..'Z'} !! s.add(!_)
  509. template buildWhile(input, idx, init, cond, action): untyped =
  510. mixin hasNxt
  511. while hasNxt(input, idx):
  512. init
  513. if not cond: break
  514. action
  515. # (x) a # bind action a to (x)
  516. if it[0].kind in {nnkPar, nnkTupleConstr} and it.len == 2:
  517. result = atm(it[0], input, idx, placeholder(it[1], input, idx))
  518. elif it.kind == nnkInfix and it[0].eqIdent"->":
  519. # bind matching to some action:
  520. result = atm(it[1], input, idx, placeholder(it[2], input, idx))
  521. elif it.kind == nnkInfix and it[0].eqIdent"as":
  522. let cond = if it[1].kind in nnkCallKinds: placeholder(it[1], input, idx)
  523. else: newCall(it[1], input, idx)
  524. result = (newLetStmt(it[2], cond),
  525. newCall(interf"success", it[2]),
  526. !!newCall(interf"nxt", input, idx, it[2]))
  527. elif it.kind == nnkPrefix and it[0].eqIdent"*":
  528. let (init, cond, action) = atm(it[1], input, idx, attached)
  529. result = (getAst(buildWhile(input, idx, init, cond, action)),
  530. newEmptyNode(), newEmptyNode())
  531. elif it.kind == nnkPrefix and it[0].eqIdent"+":
  532. # x+ is the same as xx*
  533. result = atm(newTree(nnkTupleConstr, it[1], newTree(nnkPrefix, ident"*", it[1])),
  534. input, idx, attached)
  535. elif it.kind == nnkPrefix and it[0].eqIdent"?":
  536. # optional.
  537. let (init, cond, action) = atm(it[1], input, idx, attached)
  538. if cond.kind == nnkEmpty:
  539. error("'?' operator applied to a non-condition")
  540. else:
  541. result = (newTree(nnkStmtList, init, newIfStmt((cond, action))),
  542. newEmptyNode(), newEmptyNode())
  543. elif it.kind == nnkPrefix and it[0].eqIdent"~":
  544. # not operator
  545. let (init, cond, action) = atm(it[1], input, idx, attached)
  546. if cond.kind == nnkEmpty:
  547. error("'~' operator applied to a non-condition")
  548. else:
  549. result = (init, newCall(bindSym"not", cond), action)
  550. elif it.kind == nnkInfix and it[0].eqIdent"|":
  551. let a = atm(it[1], input, idx, attached)
  552. let b = atm(it[2], input, idx, attached)
  553. if a.cond.kind == nnkEmpty or b.cond.kind == nnkEmpty:
  554. error("'|' operator applied to a non-condition")
  555. else:
  556. result = (newStmtList(a.init, newIfStmt((a.cond, a.action),
  557. (newTree(nnkStmtListExpr, b.init, b.cond), b.action))),
  558. newEmptyNode(), newEmptyNode())
  559. elif it.kind == nnkInfix and it[0].eqIdent"^*":
  560. # a ^* b is rewritten to: (a *(b a))?
  561. #exprList = expr ^+ comma
  562. template tmp(a, b): untyped = ?(a, *(b, a))
  563. result = atm(getAst(tmp(it[1], it[2])), input, idx, attached)
  564. elif it.kind == nnkInfix and it[0].eqIdent"^+":
  565. # a ^* b is rewritten to: (a +(b a))?
  566. template tmp(a, b): untyped = (a, *(b, a))
  567. result = atm(getAst(tmp(it[1], it[2])), input, idx, attached)
  568. elif it.kind == nnkCommand and it.len == 2 and it[0].eqIdent"pred":
  569. # enforce that the wrapped call is interpreted as a predicate, not a non-terminal:
  570. result = (newEmptyNode(), placeholder(it[1], input, idx), newEmptyNode())
  571. else:
  572. var resLen = genSym(nskLet, "resLen")
  573. result = (newLetStmt(resLen, placeholder(it, input, idx)),
  574. newCall(interf"success", resLen),
  575. !!newCall(interf"nxt", input, idx, resLen))
  576. of nnkStrLit..nnkTripleStrLit:
  577. var resLen = genSym(nskLet, "resLen")
  578. result = (newLetStmt(resLen, newCall(interf"skip", input, it, idx)),
  579. newCall(interf"success", resLen),
  580. !!newCall(interf"nxt", input, idx, resLen))
  581. of nnkCurly, nnkAccQuoted, nnkCharLit:
  582. result = (newEmptyNode(), newCall(interf"atom", input, idx, it),
  583. !!newCall(interf"nxt", input, idx))
  584. of nnkCurlyExpr:
  585. if it.len == 3 and it[1].kind == nnkIntLit and it[2].kind == nnkIntLit:
  586. var h = newTree(nnkTupleConstr, it[0])
  587. for count in 2i64 .. it[1].intVal: h.add(it[0])
  588. for count in it[1].intVal .. it[2].intVal-1:
  589. h.add(newTree(nnkPrefix, ident"?", it[0]))
  590. result = atm(h, input, idx, attached)
  591. elif it.len == 2 and it[1].kind == nnkIntLit:
  592. var h = newTree(nnkTupleConstr, it[0])
  593. for count in 2i64 .. it[1].intVal: h.add(it[0])
  594. result = atm(h, input, idx, attached)
  595. else:
  596. error("invalid pattern")
  597. of nnkPar, nnkTupleConstr:
  598. if it.len == 1 and it.kind == nnkPar:
  599. result = atm(it[0], input, idx, attached)
  600. else:
  601. # concatenation:
  602. var conds: seq[StmtTriple] = @[]
  603. for x in it: conds.add atm(x, input, idx, attached)
  604. var res = genSym(nskVar, "res")
  605. result = (newStmtList(newVarStmt(res, newLit false),
  606. toIfChain(conds, idx, res, 0)), res, newEmptyNode())
  607. else:
  608. error("invalid pattern")
  609. #var idx = genSym(nskVar, "idx")
  610. var res = genSym(nskVar, "res")
  611. result = newTree(nnkStmtListExpr, #newVarStmt(idx, newCall(interf"prepare", input)),
  612. newVarStmt(res, newLit false))
  613. var conds: seq[StmtTriple] = @[]
  614. for it in pattern:
  615. conds.add atm(it, input, idx, nil)
  616. result.add toIfChain(conds, idx, res, 0)
  617. result.add res
  618. when defined(debugScanp):
  619. echo repr result