tpegs.nim 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345
  1. discard """
  2. matrix: "--mm:refc; --mm:orc; --exceptions:goto"
  3. targets: "c cpp js"
  4. output: '''
  5. PEG AST traversal output
  6. ------------------------
  7. pkNonTerminal: Sum @(2, 3)
  8. pkSequence: (Product (('+' / '-') Product)*)
  9. pkNonTerminal: Product @(3, 7)
  10. pkSequence: (Value (('*' / '/') Value)*)
  11. pkNonTerminal: Value @(4, 5)
  12. pkOrderedChoice: (([0-9] [0-9]*) / ('(' Expr ')'))
  13. pkSequence: ([0-9] [0-9]*)
  14. pkCharChoice: [0-9]
  15. pkGreedyRepSet: [0-9]*
  16. pkSequence: ('(' Expr ')')
  17. pkChar: '('
  18. pkNonTerminal: Expr @(1, 4)
  19. pkNonTerminal: Sum @(2, 3)
  20. pkChar: ')'
  21. pkGreedyRep: (('*' / '/') Value)*
  22. pkSequence: (('*' / '/') Value)
  23. pkOrderedChoice: ('*' / '/')
  24. pkChar: '*'
  25. pkChar: '/'
  26. pkNonTerminal: Value @(4, 5)
  27. pkGreedyRep: (('+' / '-') Product)*
  28. pkSequence: (('+' / '-') Product)
  29. pkOrderedChoice: ('+' / '-')
  30. pkChar: '+'
  31. pkChar: '-'
  32. pkNonTerminal: Product @(3, 7)
  33. Event parser output
  34. -------------------
  35. @[5.0]
  36. +
  37. @[5.0, 3.0]
  38. @[8.0]
  39. /
  40. @[8.0, 2.0]
  41. @[4.0]
  42. -
  43. @[4.0, 7.0]
  44. -*
  45. @[4.0, 7.0, 22.0]
  46. @[4.0, 154.0]
  47. -
  48. @[-150.0]
  49. '''
  50. """
  51. when defined(nimHasEffectsOf):
  52. {.experimental: "strictEffects".}
  53. import std/[strutils, streams, pegs, assertions]
  54. const
  55. indent = " "
  56. let
  57. pegAst = """
  58. Expr <- Sum
  59. Sum <- Product (('+' / '-')Product)*
  60. Product <- Value (('*' / '/')Value)*
  61. Value <- [0-9]+ / '(' Expr ')'
  62. """.peg
  63. txt = "(5+3)/2-7*22"
  64. block:
  65. var
  66. outp = newStringStream()
  67. processed: seq[string] = @[]
  68. proc prt(outp: Stream, kind: PegKind, s: string; level: int = 0) =
  69. outp.writeLine indent.repeat(level) & "$1: $2" % [$kind, s]
  70. proc recLoop(p: Peg, level: int = 0) =
  71. case p.kind
  72. of pkEmpty..pkWhitespace:
  73. discard
  74. of pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle:
  75. outp.prt(p.kind, $p, level)
  76. of pkChar, pkGreedyRepChar:
  77. outp.prt(p.kind, $p, level)
  78. of pkCharChoice, pkGreedyRepSet:
  79. outp.prt(p.kind, $p, level)
  80. of pkNonTerminal:
  81. outp.prt(p.kind,
  82. "$1 @($3, $4)" % [p.nt.name, $p.nt.rule.kind, $p.nt.line, $p.nt.col], level)
  83. if not(p.nt.name in processed):
  84. processed.add p.nt.name
  85. p.nt.rule.recLoop level+1
  86. of pkBackRef..pkBackRefIgnoreStyle:
  87. outp.prt(p.kind, $p, level)
  88. else:
  89. outp.prt(p.kind, $p, level)
  90. for s in items(p):
  91. s.recLoop level+1
  92. pegAst.recLoop
  93. echo "PEG AST traversal output"
  94. echo "------------------------"
  95. echo outp.data
  96. block:
  97. var
  98. pStack {.threadvar.}: seq[string]
  99. valStack {.threadvar.}: seq[float]
  100. opStack {.threadvar.}: string
  101. let
  102. parseArithExpr = pegAst.eventParser:
  103. pkNonTerminal:
  104. enter:
  105. pStack.add p.nt.name
  106. leave:
  107. pStack.setLen pStack.high
  108. if length > 0:
  109. let matchStr = s.substr(start, start+length-1)
  110. case p.nt.name
  111. of "Value":
  112. try:
  113. valStack.add matchStr.parseFloat
  114. echo valStack
  115. except ValueError:
  116. discard
  117. of "Sum", "Product":
  118. try:
  119. let val {.used.} = matchStr.parseFloat
  120. except ValueError:
  121. if valStack.len > 1 and opStack.len > 0:
  122. valStack[^2] = case opStack[^1]
  123. of '+': valStack[^2] + valStack[^1]
  124. of '-': valStack[^2] - valStack[^1]
  125. of '*': valStack[^2] * valStack[^1]
  126. else: valStack[^2] / valStack[^1]
  127. valStack.setLen valStack.high
  128. echo valStack
  129. opStack.setLen opStack.high
  130. echo opStack
  131. pkChar:
  132. leave:
  133. if length == 1 and "Value" != pStack[^1]:
  134. let matchChar = s[start]
  135. opStack.add matchChar
  136. echo opStack
  137. echo "Event parser output"
  138. echo "-------------------"
  139. let pLen = parseArithExpr(txt)
  140. doAssert txt.len == pLen
  141. import std/importutils
  142. block:
  143. proc pegsTest() =
  144. privateAccess(NonTerminal)
  145. privateAccess(Captures)
  146. if "test" =~ peg"s <- {{\ident}}": # bug #19104
  147. doAssert matches[0] == "test"
  148. doAssert matches[1] == "test", $matches[1]
  149. doAssert escapePeg("abc''def'") == r"'abc'\x27\x27'def'\x27"
  150. doAssert match("(a b c)", peg"'(' @ ')'")
  151. doAssert match("W_HI_Le", peg"\y 'while'")
  152. doAssert(not match("W_HI_L", peg"\y 'while'"))
  153. doAssert(not match("W_HI_Le", peg"\y v'while'"))
  154. doAssert match("W_HI_Le", peg"y'while'")
  155. doAssert($ +digits == $peg"\d+")
  156. doAssert "0158787".match(peg"\d+")
  157. doAssert "ABC 0232".match(peg"\w+\s+\d+")
  158. doAssert "ABC".match(peg"\d+ / \w+")
  159. var accum: seq[string] = @[]
  160. for word in split("00232this02939is39an22example111", peg"\d+"):
  161. accum.add(word)
  162. doAssert(accum == @["this", "is", "an", "example"])
  163. doAssert matchLen("key", ident) == 3
  164. var pattern = sequence(ident, *whitespace, term('='), *whitespace, ident)
  165. doAssert matchLen("key1= cal9", pattern) == 11
  166. var ws = newNonTerminal("ws", 1, 1)
  167. ws.rule = *whitespace
  168. var expr = newNonTerminal("expr", 1, 1)
  169. expr.rule = sequence(capture(ident), *sequence(
  170. nonterminal(ws), term('+'), nonterminal(ws), nonterminal(expr)))
  171. var c: Captures = default(Captures)
  172. var s = "a+b + c +d+e+f"
  173. doAssert rawMatch(s, expr.rule, 0, c) == len(s)
  174. var a = ""
  175. for i in 0..c.ml-1:
  176. a.add(substr(s, c.matches[i][0], c.matches[i][1]))
  177. doAssert a == "abcdef"
  178. #echo expr.rule
  179. #const filename = "lib/devel/peg/grammar.txt"
  180. #var grammar = parsePeg(newFileStream(filename, fmRead), filename)
  181. #echo "a <- [abc]*?".match(grammar)
  182. doAssert find("_____abc_______", term("abc"), 2) == 5
  183. doAssert match("_______ana", peg"A <- 'ana' / . A")
  184. doAssert match("abcs%%%", peg"A <- ..A / .A / '%'")
  185. var matches: array[0..MaxSubpatterns-1, string] = default(array[0..MaxSubpatterns-1, string])
  186. if "abc" =~ peg"{'a'}'bc' 'xyz' / {\ident}":
  187. doAssert matches[0] == "abc"
  188. else:
  189. doAssert false
  190. var g2 = peg"""S <- A B / C D
  191. A <- 'a'+
  192. B <- 'b'+
  193. C <- 'c'+
  194. D <- 'd'+
  195. """
  196. doAssert($g2 == "((A B) / (C D))")
  197. doAssert match("cccccdddddd", g2)
  198. doAssert("var1=key; var2=key2".replacef(peg"{\ident}'='{\ident}", "$1<-$2$2") ==
  199. "var1<-keykey; var2<-key2key2")
  200. doAssert("var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2") ==
  201. "$1<-$2$2; $1<-$2$2")
  202. doAssert "var1=key; var2=key2".endsWith(peg"{\ident}'='{\ident}")
  203. if "aaaaaa" =~ peg"'aa' !. / ({'a'})+":
  204. doAssert matches[0] == "a"
  205. else:
  206. doAssert false
  207. if match("abcdefg", peg"c {d} ef {g}", matches, 2):
  208. doAssert matches[0] == "d"
  209. doAssert matches[1] == "g"
  210. else:
  211. doAssert false
  212. accum = @[]
  213. for x in findAll("abcdef", peg".", 3):
  214. accum.add(x)
  215. doAssert(accum == @["d", "e", "f"])
  216. for x in findAll("abcdef", peg"^{.}", 3):
  217. doAssert x == "d"
  218. if "f(a, b)" =~ peg"{[0-9]+} / ({\ident} '(' {@} ')')":
  219. doAssert matches[0] == "f"
  220. doAssert matches[1] == "a, b"
  221. else:
  222. doAssert false
  223. doAssert match("eine übersicht und außerdem", peg"(\letter \white*)+")
  224. # ß is not a lower cased letter?!
  225. doAssert match("eine übersicht und auerdem", peg"(\lower \white*)+")
  226. doAssert match("EINE ÜBERSICHT UND AUSSERDEM", peg"(\upper \white*)+")
  227. doAssert(not match("456678", peg"(\letter)+"))
  228. doAssert("var1 = key; var2 = key2".replacef(
  229. peg"\skip(\s*) {\ident}'='{\ident}", "$1<-$2$2") ==
  230. "var1<-keykey;var2<-key2key2")
  231. doAssert match("prefix/start", peg"^start$", 7)
  232. if "foo" =~ peg"{'a'}?.*":
  233. doAssert matches[0].len == 0
  234. else: doAssert false
  235. if "foo" =~ peg"{''}.*":
  236. doAssert matches[0] == ""
  237. else: doAssert false
  238. if "foo" =~ peg"{'foo'}":
  239. doAssert matches[0] == "foo"
  240. else: doAssert false
  241. let empty_test = peg"^\d*"
  242. let str = "XYZ"
  243. doAssert(str.find(empty_test) == 0)
  244. doAssert(str.match(empty_test))
  245. proc handleMatches(m: int, n: int, c: openArray[string]): string =
  246. result = ""
  247. if m > 0:
  248. result.add ", "
  249. result.add case n:
  250. of 2: toLowerAscii(c[0]) & ": '" & c[1] & "'"
  251. of 1: toLowerAscii(c[0]) & ": ''"
  252. else: ""
  253. doAssert("Var1=key1;var2=Key2; VAR3".
  254. replace(peg"{\ident}('='{\ident})* ';'* \s*",
  255. handleMatches) == "var1: 'key1', var2: 'Key2', var3: ''")
  256. doAssert "test1".match(peg"""{@}$""")
  257. doAssert "test2".match(peg"""{(!$ .)*} $""")
  258. doAssert "abbb".match(peg"{a} {b} $2 $^1")
  259. doAssert "abBA".match(peg"{a} {b} i$2 i$^2")
  260. doAssert "abba".match(peg"{a} {b} $^1 {} $^1")
  261. block:
  262. let grammar = peg"""
  263. program <- {''} stmt* $
  264. stmt <- call / block
  265. call <- 'call()' EOL
  266. EOL <- \n / $
  267. block <- 'block:' \n indBody
  268. indBody <- {$^1 ' '+} stmt ($^1 stmt)* {}
  269. """
  270. let program = """
  271. call()
  272. block:
  273. block:
  274. call()
  275. call()
  276. call()
  277. call()
  278. """
  279. var c: Captures = default(Captures)
  280. doAssert program.len == program.rawMatch(grammar, 0, c)
  281. doAssert c.ml == 1
  282. block:
  283. # bug #21632
  284. let p = peg"""
  285. atext <- \w / \d
  286. """
  287. doAssert "a".match(p)
  288. doAssert "1".match(p)
  289. pegsTest()
  290. static:
  291. pegsTest()