tpegs.nim 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328
  1. discard """
  2. targets: "c cpp js"
  3. output: '''
  4. PEG AST traversal output
  5. ------------------------
  6. pkNonTerminal: Sum @(2, 3)
  7. pkSequence: (Product (('+' / '-') Product)*)
  8. pkNonTerminal: Product @(3, 7)
  9. pkSequence: (Value (('*' / '/') Value)*)
  10. pkNonTerminal: Value @(4, 5)
  11. pkOrderedChoice: (([0-9] [0-9]*) / ('(' Expr ')'))
  12. pkSequence: ([0-9] [0-9]*)
  13. pkCharChoice: [0-9]
  14. pkGreedyRepSet: [0-9]*
  15. pkSequence: ('(' Expr ')')
  16. pkChar: '('
  17. pkNonTerminal: Expr @(1, 4)
  18. pkNonTerminal: Sum @(2, 3)
  19. pkChar: ')'
  20. pkGreedyRep: (('*' / '/') Value)*
  21. pkSequence: (('*' / '/') Value)
  22. pkOrderedChoice: ('*' / '/')
  23. pkChar: '*'
  24. pkChar: '/'
  25. pkNonTerminal: Value @(4, 5)
  26. pkGreedyRep: (('+' / '-') Product)*
  27. pkSequence: (('+' / '-') Product)
  28. pkOrderedChoice: ('+' / '-')
  29. pkChar: '+'
  30. pkChar: '-'
  31. pkNonTerminal: Product @(3, 7)
  32. Event parser output
  33. -------------------
  34. @[5.0]
  35. +
  36. @[5.0, 3.0]
  37. @[8.0]
  38. /
  39. @[8.0, 2.0]
  40. @[4.0]
  41. -
  42. @[4.0, 7.0]
  43. -*
  44. @[4.0, 7.0, 22.0]
  45. @[4.0, 154.0]
  46. -
  47. @[-150.0]
  48. '''
  49. """
  50. import std/[strutils, streams, pegs]
  51. const
  52. indent = " "
  53. let
  54. pegAst = """
  55. Expr <- Sum
  56. Sum <- Product (('+' / '-')Product)*
  57. Product <- Value (('*' / '/')Value)*
  58. Value <- [0-9]+ / '(' Expr ')'
  59. """.peg
  60. txt = "(5+3)/2-7*22"
  61. block:
  62. var
  63. outp = newStringStream()
  64. processed: seq[string] = @[]
  65. proc prt(outp: Stream, kind: PegKind, s: string; level: int = 0) =
  66. outp.writeLine indent.repeat(level) & "$1: $2" % [$kind, s]
  67. proc recLoop(p: Peg, level: int = 0) =
  68. case p.kind
  69. of pkEmpty..pkWhitespace:
  70. discard
  71. of pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle:
  72. outp.prt(p.kind, $p, level)
  73. of pkChar, pkGreedyRepChar:
  74. outp.prt(p.kind, $p, level)
  75. of pkCharChoice, pkGreedyRepSet:
  76. outp.prt(p.kind, $p, level)
  77. of pkNonTerminal:
  78. outp.prt(p.kind,
  79. "$1 @($3, $4)" % [p.nt.name, $p.nt.rule.kind, $p.nt.line, $p.nt.col], level)
  80. if not(p.nt.name in processed):
  81. processed.add p.nt.name
  82. p.nt.rule.recLoop level+1
  83. of pkBackRef..pkBackRefIgnoreStyle:
  84. outp.prt(p.kind, $p, level)
  85. else:
  86. outp.prt(p.kind, $p, level)
  87. for s in items(p):
  88. s.recLoop level+1
  89. pegAst.recLoop
  90. echo "PEG AST traversal output"
  91. echo "------------------------"
  92. echo outp.data
  93. block:
  94. var
  95. pStack: seq[string] = @[]
  96. valStack: seq[float] = @[]
  97. opStack = ""
  98. let
  99. parseArithExpr = pegAst.eventParser:
  100. pkNonTerminal:
  101. enter:
  102. pStack.add p.nt.name
  103. leave:
  104. pStack.setLen pStack.high
  105. if length > 0:
  106. let matchStr = s.substr(start, start+length-1)
  107. case p.nt.name
  108. of "Value":
  109. try:
  110. valStack.add matchStr.parseFloat
  111. echo valStack
  112. except ValueError:
  113. discard
  114. of "Sum", "Product":
  115. try:
  116. let val {.used.} = matchStr.parseFloat
  117. except ValueError:
  118. if valStack.len > 1 and opStack.len > 0:
  119. valStack[^2] = case opStack[^1]
  120. of '+': valStack[^2] + valStack[^1]
  121. of '-': valStack[^2] - valStack[^1]
  122. of '*': valStack[^2] * valStack[^1]
  123. else: valStack[^2] / valStack[^1]
  124. valStack.setLen valStack.high
  125. echo valStack
  126. opStack.setLen opStack.high
  127. echo opStack
  128. pkChar:
  129. leave:
  130. if length == 1 and "Value" != pStack[^1]:
  131. let matchChar = s[start]
  132. opStack.add matchChar
  133. echo opStack
  134. echo "Event parser output"
  135. echo "-------------------"
  136. let pLen = parseArithExpr(txt)
  137. doAssert txt.len == pLen
  138. import std/importutils
  139. block:
  140. proc pegsTest() =
  141. privateAccess(NonTerminal)
  142. privateAccess(Captures)
  143. doAssert escapePeg("abc''def'") == r"'abc'\x27\x27'def'\x27"
  144. doAssert match("(a b c)", peg"'(' @ ')'")
  145. doAssert match("W_HI_Le", peg"\y 'while'")
  146. doAssert(not match("W_HI_L", peg"\y 'while'"))
  147. doAssert(not match("W_HI_Le", peg"\y v'while'"))
  148. doAssert match("W_HI_Le", peg"y'while'")
  149. doAssert($ +digits == $peg"\d+")
  150. doAssert "0158787".match(peg"\d+")
  151. doAssert "ABC 0232".match(peg"\w+\s+\d+")
  152. doAssert "ABC".match(peg"\d+ / \w+")
  153. var accum: seq[string] = @[]
  154. for word in split("00232this02939is39an22example111", peg"\d+"):
  155. accum.add(word)
  156. doAssert(accum == @["this", "is", "an", "example"])
  157. doAssert matchLen("key", ident) == 3
  158. var pattern = sequence(ident, *whitespace, term('='), *whitespace, ident)
  159. doAssert matchLen("key1= cal9", pattern) == 11
  160. var ws = newNonTerminal("ws", 1, 1)
  161. ws.rule = *whitespace
  162. var expr = newNonTerminal("expr", 1, 1)
  163. expr.rule = sequence(capture(ident), *sequence(
  164. nonterminal(ws), term('+'), nonterminal(ws), nonterminal(expr)))
  165. var c: Captures
  166. var s = "a+b + c +d+e+f"
  167. doAssert rawMatch(s, expr.rule, 0, c) == len(s)
  168. var a = ""
  169. for i in 0..c.ml-1:
  170. a.add(substr(s, c.matches[i][0], c.matches[i][1]))
  171. doAssert a == "abcdef"
  172. #echo expr.rule
  173. #const filename = "lib/devel/peg/grammar.txt"
  174. #var grammar = parsePeg(newFileStream(filename, fmRead), filename)
  175. #echo "a <- [abc]*?".match(grammar)
  176. doAssert find("_____abc_______", term("abc"), 2) == 5
  177. doAssert match("_______ana", peg"A <- 'ana' / . A")
  178. doAssert match("abcs%%%", peg"A <- ..A / .A / '%'")
  179. var matches: array[0..MaxSubpatterns-1, string]
  180. if "abc" =~ peg"{'a'}'bc' 'xyz' / {\ident}":
  181. doAssert matches[0] == "abc"
  182. else:
  183. doAssert false
  184. var g2 = peg"""S <- A B / C D
  185. A <- 'a'+
  186. B <- 'b'+
  187. C <- 'c'+
  188. D <- 'd'+
  189. """
  190. doAssert($g2 == "((A B) / (C D))")
  191. doAssert match("cccccdddddd", g2)
  192. doAssert("var1=key; var2=key2".replacef(peg"{\ident}'='{\ident}", "$1<-$2$2") ==
  193. "var1<-keykey; var2<-key2key2")
  194. doAssert("var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2") ==
  195. "$1<-$2$2; $1<-$2$2")
  196. doAssert "var1=key; var2=key2".endsWith(peg"{\ident}'='{\ident}")
  197. if "aaaaaa" =~ peg"'aa' !. / ({'a'})+":
  198. doAssert matches[0] == "a"
  199. else:
  200. doAssert false
  201. if match("abcdefg", peg"c {d} ef {g}", matches, 2):
  202. doAssert matches[0] == "d"
  203. doAssert matches[1] == "g"
  204. else:
  205. doAssert false
  206. accum = @[]
  207. for x in findAll("abcdef", peg".", 3):
  208. accum.add(x)
  209. doAssert(accum == @["d", "e", "f"])
  210. for x in findAll("abcdef", peg"^{.}", 3):
  211. doAssert x == "d"
  212. if "f(a, b)" =~ peg"{[0-9]+} / ({\ident} '(' {@} ')')":
  213. doAssert matches[0] == "f"
  214. doAssert matches[1] == "a, b"
  215. else:
  216. doAssert false
  217. doAssert match("eine übersicht und außerdem", peg"(\letter \white*)+")
  218. # ß is not a lower cased letter?!
  219. doAssert match("eine übersicht und auerdem", peg"(\lower \white*)+")
  220. doAssert match("EINE ÜBERSICHT UND AUSSERDEM", peg"(\upper \white*)+")
  221. doAssert(not match("456678", peg"(\letter)+"))
  222. doAssert("var1 = key; var2 = key2".replacef(
  223. peg"\skip(\s*) {\ident}'='{\ident}", "$1<-$2$2") ==
  224. "var1<-keykey;var2<-key2key2")
  225. doAssert match("prefix/start", peg"^start$", 7)
  226. if "foo" =~ peg"{'a'}?.*":
  227. doAssert matches[0].len == 0
  228. else: doAssert false
  229. if "foo" =~ peg"{''}.*":
  230. doAssert matches[0] == ""
  231. else: doAssert false
  232. if "foo" =~ peg"{'foo'}":
  233. doAssert matches[0] == "foo"
  234. else: doAssert false
  235. let empty_test = peg"^\d*"
  236. let str = "XYZ"
  237. doAssert(str.find(empty_test) == 0)
  238. doAssert(str.match(empty_test))
  239. proc handleMatches(m: int, n: int, c: openArray[string]): string =
  240. result = ""
  241. if m > 0:
  242. result.add ", "
  243. result.add case n:
  244. of 2: toLowerAscii(c[0]) & ": '" & c[1] & "'"
  245. of 1: toLowerAscii(c[0]) & ": ''"
  246. else: ""
  247. doAssert("Var1=key1;var2=Key2; VAR3".
  248. replace(peg"{\ident}('='{\ident})* ';'* \s*",
  249. handleMatches) == "var1: 'key1', var2: 'Key2', var3: ''")
  250. doAssert "test1".match(peg"""{@}$""")
  251. doAssert "test2".match(peg"""{(!$ .)*} $""")
  252. doAssert "abbb".match(peg"{a} {b} $2 $^1")
  253. doAssert "abBA".match(peg"{a} {b} i$2 i$^2")
  254. doAssert "abba".match(peg"{a} {b} $^1 {} $^1")
  255. block:
  256. let grammar = peg"""
  257. program <- {''} stmt* $
  258. stmt <- call / block
  259. call <- 'call()' EOL
  260. EOL <- \n / $
  261. block <- 'block:' \n indBody
  262. indBody <- {$^1 ' '+} stmt ($^1 stmt)* {}
  263. """
  264. let program = """
  265. call()
  266. block:
  267. block:
  268. call()
  269. call()
  270. call()
  271. call()
  272. """
  273. var c: Captures
  274. doAssert program.len == program.rawMatch(grammar, 0, c)
  275. doAssert c.ml == 1
  276. pegsTest()
  277. static:
  278. pegsTest()