parsesql.nim 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2009 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## The `parsesql` module implements a high performance SQL file
  10. ## parser. It parses PostgreSQL syntax and the SQL ANSI standard.
  11. ##
  12. ## Unstable API.
  13. import std/[strutils, lexbase]
  14. import std/private/decode_helpers
  15. when defined(nimPreviewSlimSystem):
  16. import std/assertions
  17. # ------------------- scanner -------------------------------------------------
  18. type
  19. TokKind = enum ## enumeration of all SQL tokens
  20. tkInvalid, ## invalid token
  21. tkEof, ## end of file reached
  22. tkIdentifier, ## abc
  23. tkQuotedIdentifier, ## "abc"
  24. tkStringConstant, ## 'abc'
  25. tkEscapeConstant, ## e'abc'
  26. tkDollarQuotedConstant, ## $tag$abc$tag$
  27. tkBitStringConstant, ## B'00011'
  28. tkHexStringConstant, ## x'00011'
  29. tkInteger,
  30. tkNumeric,
  31. tkOperator, ## + - * / < > = ~ ! @ # % ^ & | ` ?
  32. tkSemicolon, ## ';'
  33. tkColon, ## ':'
  34. tkComma, ## ','
  35. tkParLe, ## '('
  36. tkParRi, ## ')'
  37. tkBracketLe, ## '['
  38. tkBracketRi, ## ']'
  39. tkDot ## '.'
  40. Token = object # a token
  41. kind: TokKind # the type of the token
  42. literal: string # the parsed (string) literal
  43. SqlLexer* = object of BaseLexer ## the parser object.
  44. filename: string
  45. const
  46. tokKindToStr: array[TokKind, string] = [
  47. "invalid", "[EOF]", "identifier", "quoted identifier", "string constant",
  48. "escape string constant", "dollar quoted constant", "bit string constant",
  49. "hex string constant", "integer constant", "numeric constant", "operator",
  50. ";", ":", ",", "(", ")", "[", "]", "."
  51. ]
  52. reservedKeywords = @[
  53. # statements
  54. "select", "from", "where", "group", "limit", "offset", "having",
  55. # functions
  56. "count",
  57. ]
  58. proc close(L: var SqlLexer) =
  59. lexbase.close(L)
  60. proc getColumn(L: SqlLexer): int =
  61. ## get the current column the parser has arrived at.
  62. result = getColNumber(L, L.bufpos)
  63. proc getLine(L: SqlLexer): int =
  64. result = L.lineNumber
  65. proc handleOctChar(c: var SqlLexer, xi: var int) =
  66. if c.buf[c.bufpos] in {'0'..'7'}:
  67. xi = (xi shl 3) or (ord(c.buf[c.bufpos]) - ord('0'))
  68. inc(c.bufpos)
  69. proc getEscapedChar(c: var SqlLexer, tok: var Token) =
  70. inc(c.bufpos)
  71. case c.buf[c.bufpos]
  72. of 'n', 'N':
  73. add(tok.literal, '\L')
  74. inc(c.bufpos)
  75. of 'r', 'R', 'c', 'C':
  76. add(tok.literal, '\c')
  77. inc(c.bufpos)
  78. of 'l', 'L':
  79. add(tok.literal, '\L')
  80. inc(c.bufpos)
  81. of 'f', 'F':
  82. add(tok.literal, '\f')
  83. inc(c.bufpos)
  84. of 'e', 'E':
  85. add(tok.literal, '\e')
  86. inc(c.bufpos)
  87. of 'a', 'A':
  88. add(tok.literal, '\a')
  89. inc(c.bufpos)
  90. of 'b', 'B':
  91. add(tok.literal, '\b')
  92. inc(c.bufpos)
  93. of 'v', 'V':
  94. add(tok.literal, '\v')
  95. inc(c.bufpos)
  96. of 't', 'T':
  97. add(tok.literal, '\t')
  98. inc(c.bufpos)
  99. of '\'', '\"':
  100. add(tok.literal, c.buf[c.bufpos])
  101. inc(c.bufpos)
  102. of '\\':
  103. add(tok.literal, '\\')
  104. inc(c.bufpos)
  105. of 'x', 'X':
  106. inc(c.bufpos)
  107. var xi = 0
  108. if handleHexChar(c.buf[c.bufpos], xi):
  109. inc(c.bufpos)
  110. if handleHexChar(c.buf[c.bufpos], xi):
  111. inc(c.bufpos)
  112. add(tok.literal, chr(xi))
  113. of '0'..'7':
  114. var xi = 0
  115. handleOctChar(c, xi)
  116. handleOctChar(c, xi)
  117. handleOctChar(c, xi)
  118. if (xi <= 255): add(tok.literal, chr(xi))
  119. else: tok.kind = tkInvalid
  120. else: tok.kind = tkInvalid
  121. proc handleCRLF(c: var SqlLexer, pos: int): int =
  122. case c.buf[pos]
  123. of '\c': result = lexbase.handleCR(c, pos)
  124. of '\L': result = lexbase.handleLF(c, pos)
  125. else: result = pos
  126. proc skip(c: var SqlLexer) =
  127. var pos = c.bufpos
  128. var nested = 0
  129. while true:
  130. case c.buf[pos]
  131. of ' ', '\t':
  132. inc(pos)
  133. of '-':
  134. if c.buf[pos+1] == '-':
  135. while not (c.buf[pos] in {'\c', '\L', lexbase.EndOfFile}): inc(pos)
  136. else:
  137. break
  138. of '/':
  139. if c.buf[pos+1] == '*':
  140. inc(pos, 2)
  141. while true:
  142. case c.buf[pos]
  143. of '\0': break
  144. of '\c', '\L':
  145. pos = handleCRLF(c, pos)
  146. of '*':
  147. if c.buf[pos+1] == '/':
  148. inc(pos, 2)
  149. if nested <= 0: break
  150. dec(nested)
  151. else:
  152. inc(pos)
  153. of '/':
  154. if c.buf[pos+1] == '*':
  155. inc(pos, 2)
  156. inc(nested)
  157. else:
  158. inc(pos)
  159. else: inc(pos)
  160. else: break
  161. of '\c', '\L':
  162. pos = handleCRLF(c, pos)
  163. else:
  164. break # EndOfFile also leaves the loop
  165. c.bufpos = pos
  166. proc getString(c: var SqlLexer, tok: var Token, kind: TokKind) =
  167. var pos = c.bufpos + 1
  168. tok.kind = kind
  169. block parseLoop:
  170. while true:
  171. while true:
  172. var ch = c.buf[pos]
  173. if ch == '\'':
  174. if c.buf[pos+1] == '\'':
  175. inc(pos, 2)
  176. add(tok.literal, '\'')
  177. else:
  178. inc(pos)
  179. break
  180. elif ch in {'\c', '\L', lexbase.EndOfFile}:
  181. tok.kind = tkInvalid
  182. break parseLoop
  183. elif (ch == '\\') and kind == tkEscapeConstant:
  184. c.bufpos = pos
  185. getEscapedChar(c, tok)
  186. pos = c.bufpos
  187. else:
  188. add(tok.literal, ch)
  189. inc(pos)
  190. c.bufpos = pos
  191. var line = c.lineNumber
  192. skip(c)
  193. if c.lineNumber > line:
  194. # a new line whitespace has been parsed, so we check if the string
  195. # continues after the whitespace:
  196. pos = c.bufpos
  197. if c.buf[pos] == '\'': inc(pos)
  198. else: break parseLoop
  199. else: break parseLoop
  200. c.bufpos = pos
  201. proc getDollarString(c: var SqlLexer, tok: var Token) =
  202. var pos = c.bufpos + 1
  203. tok.kind = tkDollarQuotedConstant
  204. var tag = "$"
  205. while c.buf[pos] in IdentChars:
  206. add(tag, c.buf[pos])
  207. inc(pos)
  208. if c.buf[pos] == '$': inc(pos)
  209. else:
  210. tok.kind = tkInvalid
  211. return
  212. while true:
  213. case c.buf[pos]
  214. of '\c', '\L':
  215. pos = handleCRLF(c, pos)
  216. add(tok.literal, "\L")
  217. of '\0':
  218. tok.kind = tkInvalid
  219. break
  220. of '$':
  221. inc(pos)
  222. var tag2 = "$"
  223. while c.buf[pos] in IdentChars:
  224. add(tag2, c.buf[pos])
  225. inc(pos)
  226. if c.buf[pos] == '$': inc(pos)
  227. if tag2 == tag: break
  228. add(tok.literal, tag2)
  229. add(tok.literal, '$')
  230. else:
  231. add(tok.literal, c.buf[pos])
  232. inc(pos)
  233. c.bufpos = pos
  234. proc getSymbol(c: var SqlLexer, tok: var Token) =
  235. var pos = c.bufpos
  236. while true:
  237. add(tok.literal, c.buf[pos])
  238. inc(pos)
  239. if c.buf[pos] notin {'a'..'z', 'A'..'Z', '0'..'9', '_', '$',
  240. '\128'..'\255'}:
  241. break
  242. c.bufpos = pos
  243. tok.kind = tkIdentifier
  244. proc getQuotedIdentifier(c: var SqlLexer, tok: var Token, quote = '\"') =
  245. var pos = c.bufpos + 1
  246. tok.kind = tkQuotedIdentifier
  247. while true:
  248. var ch = c.buf[pos]
  249. if ch == quote:
  250. if c.buf[pos+1] == quote:
  251. inc(pos, 2)
  252. add(tok.literal, quote)
  253. else:
  254. inc(pos)
  255. break
  256. elif ch in {'\c', '\L', lexbase.EndOfFile}:
  257. tok.kind = tkInvalid
  258. break
  259. else:
  260. add(tok.literal, ch)
  261. inc(pos)
  262. c.bufpos = pos
  263. proc getBitHexString(c: var SqlLexer, tok: var Token, validChars: set[char]) =
  264. var pos = c.bufpos + 1
  265. block parseLoop:
  266. while true:
  267. while true:
  268. var ch = c.buf[pos]
  269. if ch in validChars:
  270. add(tok.literal, ch)
  271. inc(pos)
  272. elif ch == '\'':
  273. inc(pos)
  274. break
  275. else:
  276. tok.kind = tkInvalid
  277. break parseLoop
  278. c.bufpos = pos
  279. var line = c.lineNumber
  280. skip(c)
  281. if c.lineNumber > line:
  282. # a new line whitespace has been parsed, so we check if the string
  283. # continues after the whitespace:
  284. pos = c.bufpos
  285. if c.buf[pos] == '\'': inc(pos)
  286. else: break parseLoop
  287. else: break parseLoop
  288. c.bufpos = pos
  289. proc getNumeric(c: var SqlLexer, tok: var Token) =
  290. tok.kind = tkInteger
  291. var pos = c.bufpos
  292. while c.buf[pos] in Digits:
  293. add(tok.literal, c.buf[pos])
  294. inc(pos)
  295. if c.buf[pos] == '.':
  296. tok.kind = tkNumeric
  297. add(tok.literal, c.buf[pos])
  298. inc(pos)
  299. while c.buf[pos] in Digits:
  300. add(tok.literal, c.buf[pos])
  301. inc(pos)
  302. if c.buf[pos] in {'E', 'e'}:
  303. tok.kind = tkNumeric
  304. add(tok.literal, c.buf[pos])
  305. inc(pos)
  306. if c.buf[pos] == '+':
  307. inc(pos)
  308. elif c.buf[pos] == '-':
  309. add(tok.literal, c.buf[pos])
  310. inc(pos)
  311. if c.buf[pos] in Digits:
  312. while c.buf[pos] in Digits:
  313. add(tok.literal, c.buf[pos])
  314. inc(pos)
  315. else:
  316. tok.kind = tkInvalid
  317. c.bufpos = pos
  318. proc getOperator(c: var SqlLexer, tok: var Token) =
  319. const operators = {'+', '-', '*', '/', '<', '>', '=', '~', '!', '@', '#', '%',
  320. '^', '&', '|', '`', '?'}
  321. tok.kind = tkOperator
  322. var pos = c.bufpos
  323. var trailingPlusMinus = false
  324. while true:
  325. case c.buf[pos]
  326. of '-':
  327. if c.buf[pos] == '-': break
  328. if not trailingPlusMinus and c.buf[pos+1] notin operators and
  329. tok.literal.len > 0: break
  330. of '/':
  331. if c.buf[pos] == '*': break
  332. of '~', '!', '@', '#', '%', '^', '&', '|', '`', '?':
  333. trailingPlusMinus = true
  334. of '+':
  335. if not trailingPlusMinus and c.buf[pos+1] notin operators and
  336. tok.literal.len > 0: break
  337. of '*', '<', '>', '=': discard
  338. else: break
  339. add(tok.literal, c.buf[pos])
  340. inc(pos)
  341. c.bufpos = pos
  342. proc getTok(c: var SqlLexer, tok: var Token) =
  343. tok.kind = tkInvalid
  344. setLen(tok.literal, 0)
  345. skip(c)
  346. case c.buf[c.bufpos]
  347. of ';':
  348. tok.kind = tkSemicolon
  349. inc(c.bufpos)
  350. add(tok.literal, ';')
  351. of ',':
  352. tok.kind = tkComma
  353. inc(c.bufpos)
  354. add(tok.literal, ',')
  355. of ':':
  356. tok.kind = tkColon
  357. inc(c.bufpos)
  358. add(tok.literal, ':')
  359. of 'e', 'E':
  360. if c.buf[c.bufpos + 1] == '\'':
  361. inc(c.bufpos)
  362. getString(c, tok, tkEscapeConstant)
  363. else:
  364. getSymbol(c, tok)
  365. of 'b', 'B':
  366. if c.buf[c.bufpos + 1] == '\'':
  367. tok.kind = tkBitStringConstant
  368. getBitHexString(c, tok, {'0'..'1'})
  369. else:
  370. getSymbol(c, tok)
  371. of 'x', 'X':
  372. if c.buf[c.bufpos + 1] == '\'':
  373. tok.kind = tkHexStringConstant
  374. getBitHexString(c, tok, {'a'..'f', 'A'..'F', '0'..'9'})
  375. else:
  376. getSymbol(c, tok)
  377. of '$': getDollarString(c, tok)
  378. of '[':
  379. tok.kind = tkBracketLe
  380. inc(c.bufpos)
  381. add(tok.literal, '[')
  382. of ']':
  383. tok.kind = tkBracketRi
  384. inc(c.bufpos)
  385. add(tok.literal, ']')
  386. of '(':
  387. tok.kind = tkParLe
  388. inc(c.bufpos)
  389. add(tok.literal, '(')
  390. of ')':
  391. tok.kind = tkParRi
  392. inc(c.bufpos)
  393. add(tok.literal, ')')
  394. of '.':
  395. if c.buf[c.bufpos + 1] in Digits:
  396. getNumeric(c, tok)
  397. else:
  398. tok.kind = tkDot
  399. inc(c.bufpos)
  400. add(tok.literal, '.')
  401. of '0'..'9': getNumeric(c, tok)
  402. of '\'': getString(c, tok, tkStringConstant)
  403. of '"': getQuotedIdentifier(c, tok, '"')
  404. of '`': getQuotedIdentifier(c, tok, '`')
  405. of lexbase.EndOfFile:
  406. tok.kind = tkEof
  407. tok.literal = "[EOF]"
  408. of 'a', 'c', 'd', 'f'..'w', 'y', 'z', 'A', 'C', 'D', 'F'..'W', 'Y', 'Z', '_',
  409. '\128'..'\255':
  410. getSymbol(c, tok)
  411. of '+', '-', '*', '/', '<', '>', '=', '~', '!', '@', '#', '%',
  412. '^', '&', '|', '?':
  413. getOperator(c, tok)
  414. else:
  415. add(tok.literal, c.buf[c.bufpos])
  416. inc(c.bufpos)
  417. proc errorStr(L: SqlLexer, msg: string): string =
  418. result = "$1($2, $3) Error: $4" % [L.filename, $getLine(L), $getColumn(L), msg]
  419. # ----------------------------- parser ----------------------------------------
  420. # Operator/Element Associativity Description
  421. # . left table/column name separator
  422. # :: left PostgreSQL-style typecast
  423. # [ ] left array element selection
  424. # - right unary minus
  425. # ^ left exponentiation
  426. # * / % left multiplication, division, modulo
  427. # + - left addition, subtraction
  428. # IS IS TRUE, IS FALSE, IS UNKNOWN, IS NULL
  429. # ISNULL test for null
  430. # NOTNULL test for not null
  431. # (any other) left all other native and user-defined oprs
  432. # IN set membership
  433. # BETWEEN range containment
  434. # OVERLAPS time interval overlap
  435. # LIKE ILIKE SIMILAR string pattern matching
  436. # < > less than, greater than
  437. # = right equality, assignment
  438. # NOT right logical negation
  439. # AND left logical conjunction
  440. # OR left logical disjunction
  441. type
  442. SqlNodeKind* = enum ## kind of SQL abstract syntax tree
  443. nkNone,
  444. nkIdent,
  445. nkQuotedIdent,
  446. nkStringLit,
  447. nkBitStringLit,
  448. nkHexStringLit,
  449. nkIntegerLit,
  450. nkNumericLit,
  451. nkPrimaryKey,
  452. nkForeignKey,
  453. nkNotNull,
  454. nkNull,
  455. nkStmtList,
  456. nkDot,
  457. nkDotDot,
  458. nkPrefix,
  459. nkInfix,
  460. nkCall,
  461. nkPrGroup,
  462. nkColumnReference,
  463. nkReferences,
  464. nkDefault,
  465. nkCheck,
  466. nkConstraint,
  467. nkUnique,
  468. nkIdentity,
  469. nkColumnDef, ## name, datatype, constraints
  470. nkInsert,
  471. nkUpdate,
  472. nkDelete,
  473. nkSelect,
  474. nkSelectDistinct,
  475. nkSelectColumns,
  476. nkSelectPair,
  477. nkAsgn,
  478. nkFrom,
  479. nkFromItemPair,
  480. nkJoin,
  481. nkNaturalJoin,
  482. nkUsing,
  483. nkGroup,
  484. nkLimit,
  485. nkOffset,
  486. nkHaving,
  487. nkOrder,
  488. nkDesc,
  489. nkUnion,
  490. nkIntersect,
  491. nkExcept,
  492. nkColumnList,
  493. nkValueList,
  494. nkWhere,
  495. nkCreateTable,
  496. nkCreateTableIfNotExists,
  497. nkCreateType,
  498. nkCreateTypeIfNotExists,
  499. nkCreateIndex,
  500. nkCreateIndexIfNotExists,
  501. nkEnumDef
  502. const
  503. LiteralNodes = {
  504. nkIdent, nkQuotedIdent, nkStringLit, nkBitStringLit, nkHexStringLit,
  505. nkIntegerLit, nkNumericLit
  506. }
  507. type
  508. SqlParseError* = object of ValueError ## Invalid SQL encountered
  509. SqlNode* = ref SqlNodeObj ## an SQL abstract syntax tree node
  510. SqlNodeObj* = object ## an SQL abstract syntax tree node
  511. case kind*: SqlNodeKind ## kind of syntax tree
  512. of LiteralNodes:
  513. strVal*: string ## AST leaf: the identifier, numeric literal
  514. ## string literal, etc.
  515. else:
  516. sons*: seq[SqlNode] ## the node's children
  517. SqlParser* = object of SqlLexer ## SQL parser object
  518. tok: Token
  519. proc newNode*(k: SqlNodeKind): SqlNode =
  520. when defined(js): # bug #14117
  521. case k
  522. of LiteralNodes:
  523. result = SqlNode(kind: k, strVal: "")
  524. else:
  525. result = SqlNode(kind: k, sons: @[])
  526. else:
  527. result = SqlNode(kind: k)
  528. proc newNode*(k: SqlNodeKind, s: string): SqlNode =
  529. result = SqlNode(kind: k)
  530. result.strVal = s
  531. proc newNode*(k: SqlNodeKind, sons: seq[SqlNode]): SqlNode =
  532. result = SqlNode(kind: k)
  533. result.sons = sons
  534. proc len*(n: SqlNode): int =
  535. if n.kind in LiteralNodes:
  536. result = 0
  537. else:
  538. result = n.sons.len
  539. proc `[]`*(n: SqlNode; i: int): SqlNode = n.sons[i]
  540. proc `[]`*(n: SqlNode; i: BackwardsIndex): SqlNode = n.sons[n.len - int(i)]
  541. proc add*(father, n: SqlNode) =
  542. add(father.sons, n)
  543. proc getTok(p: var SqlParser) =
  544. getTok(p, p.tok)
  545. proc sqlError(p: SqlParser, msg: string) =
  546. var e: ref SqlParseError
  547. new(e)
  548. e.msg = errorStr(p, msg)
  549. raise e
  550. proc isKeyw(p: SqlParser, keyw: string): bool =
  551. result = p.tok.kind == tkIdentifier and
  552. cmpIgnoreCase(p.tok.literal, keyw) == 0
  553. proc isOpr(p: SqlParser, opr: string): bool =
  554. result = p.tok.kind == tkOperator and
  555. cmpIgnoreCase(p.tok.literal, opr) == 0
  556. proc optKeyw(p: var SqlParser, keyw: string) =
  557. if p.tok.kind == tkIdentifier and cmpIgnoreCase(p.tok.literal, keyw) == 0:
  558. getTok(p)
  559. proc expectIdent(p: SqlParser) =
  560. if p.tok.kind != tkIdentifier and p.tok.kind != tkQuotedIdentifier:
  561. sqlError(p, "identifier expected")
  562. proc expect(p: SqlParser, kind: TokKind) =
  563. if p.tok.kind != kind:
  564. sqlError(p, tokKindToStr[kind] & " expected")
  565. proc eat(p: var SqlParser, kind: TokKind) =
  566. if p.tok.kind == kind:
  567. getTok(p)
  568. else:
  569. sqlError(p, tokKindToStr[kind] & " expected")
  570. proc eat(p: var SqlParser, keyw: string) =
  571. if isKeyw(p, keyw):
  572. getTok(p)
  573. else:
  574. sqlError(p, keyw.toUpperAscii() & " expected")
  575. proc opt(p: var SqlParser, kind: TokKind) =
  576. if p.tok.kind == kind: getTok(p)
  577. proc parseDataType(p: var SqlParser): SqlNode =
  578. if isKeyw(p, "enum"):
  579. result = newNode(nkEnumDef)
  580. getTok(p)
  581. if p.tok.kind == tkParLe:
  582. getTok(p)
  583. result.add(newNode(nkStringLit, p.tok.literal))
  584. getTok(p)
  585. while p.tok.kind == tkComma:
  586. getTok(p)
  587. result.add(newNode(nkStringLit, p.tok.literal))
  588. getTok(p)
  589. eat(p, tkParRi)
  590. else:
  591. expectIdent(p)
  592. result = newNode(nkIdent, p.tok.literal)
  593. getTok(p)
  594. # ignore (12, 13) part:
  595. if p.tok.kind == tkParLe:
  596. getTok(p)
  597. expect(p, tkInteger)
  598. getTok(p)
  599. while p.tok.kind == tkComma:
  600. getTok(p)
  601. expect(p, tkInteger)
  602. getTok(p)
  603. eat(p, tkParRi)
  604. proc getPrecedence(p: SqlParser): int =
  605. if isOpr(p, "*") or isOpr(p, "/") or isOpr(p, "%"):
  606. result = 6
  607. elif isOpr(p, "+") or isOpr(p, "-"):
  608. result = 5
  609. elif isOpr(p, "=") or isOpr(p, "<") or isOpr(p, ">") or isOpr(p, ">=") or
  610. isOpr(p, "<=") or isOpr(p, "<>") or isOpr(p, "!=") or isKeyw(p, "is") or
  611. isKeyw(p, "like") or isKeyw(p, "in"):
  612. result = 4
  613. elif isKeyw(p, "and"):
  614. result = 3
  615. elif isKeyw(p, "or"):
  616. result = 2
  617. elif isKeyw(p, "between"):
  618. result = 1
  619. elif p.tok.kind == tkOperator:
  620. # user-defined operator:
  621. result = 0
  622. else:
  623. result = - 1
  624. proc parseExpr(p: var SqlParser): SqlNode {.gcsafe.}
  625. proc parseSelect(p: var SqlParser): SqlNode {.gcsafe.}
  626. proc identOrLiteral(p: var SqlParser): SqlNode =
  627. case p.tok.kind
  628. of tkQuotedIdentifier:
  629. result = newNode(nkQuotedIdent, p.tok.literal)
  630. getTok(p)
  631. of tkIdentifier:
  632. result = newNode(nkIdent, p.tok.literal)
  633. getTok(p)
  634. of tkStringConstant, tkEscapeConstant, tkDollarQuotedConstant:
  635. result = newNode(nkStringLit, p.tok.literal)
  636. getTok(p)
  637. of tkBitStringConstant:
  638. result = newNode(nkBitStringLit, p.tok.literal)
  639. getTok(p)
  640. of tkHexStringConstant:
  641. result = newNode(nkHexStringLit, p.tok.literal)
  642. getTok(p)
  643. of tkInteger:
  644. result = newNode(nkIntegerLit, p.tok.literal)
  645. getTok(p)
  646. of tkNumeric:
  647. result = newNode(nkNumericLit, p.tok.literal)
  648. getTok(p)
  649. of tkParLe:
  650. getTok(p)
  651. result = newNode(nkPrGroup)
  652. while true:
  653. result.add(parseExpr(p))
  654. if p.tok.kind != tkComma: break
  655. getTok(p)
  656. eat(p, tkParRi)
  657. else:
  658. if p.tok.literal == "*":
  659. result = newNode(nkIdent, p.tok.literal)
  660. getTok(p)
  661. else:
  662. sqlError(p, "expression expected")
  663. getTok(p) # we must consume a token here to prevent endless loops!
  664. proc primary(p: var SqlParser): SqlNode =
  665. if (p.tok.kind == tkOperator and (p.tok.literal == "+" or p.tok.literal ==
  666. "-")) or isKeyw(p, "not"):
  667. result = newNode(nkPrefix)
  668. result.add(newNode(nkIdent, p.tok.literal))
  669. getTok(p)
  670. result.add(primary(p))
  671. return
  672. result = identOrLiteral(p)
  673. while true:
  674. case p.tok.kind
  675. of tkParLe:
  676. var a = result
  677. result = newNode(nkCall)
  678. result.add(a)
  679. getTok(p)
  680. while p.tok.kind != tkParRi:
  681. result.add(parseExpr(p))
  682. if p.tok.kind == tkComma: getTok(p)
  683. else: break
  684. eat(p, tkParRi)
  685. of tkDot:
  686. getTok(p)
  687. var a = result
  688. if p.tok.kind == tkDot:
  689. getTok(p)
  690. result = newNode(nkDotDot)
  691. else:
  692. result = newNode(nkDot)
  693. result.add(a)
  694. if isOpr(p, "*"):
  695. result.add(newNode(nkIdent, "*"))
  696. elif p.tok.kind in {tkIdentifier, tkQuotedIdentifier}:
  697. result.add(newNode(nkIdent, p.tok.literal))
  698. else:
  699. sqlError(p, "identifier expected")
  700. getTok(p)
  701. else: break
  702. proc lowestExprAux(p: var SqlParser, v: var SqlNode, limit: int): int =
  703. var
  704. v2, node, opNode: SqlNode
  705. v = primary(p) # expand while operators have priorities higher than 'limit'
  706. var opPred = getPrecedence(p)
  707. result = opPred
  708. while opPred > limit:
  709. node = newNode(nkInfix)
  710. opNode = newNode(nkIdent, p.tok.literal.toLowerAscii())
  711. getTok(p)
  712. result = lowestExprAux(p, v2, opPred)
  713. node.add(opNode)
  714. node.add(v)
  715. node.add(v2)
  716. v = node
  717. opPred = getPrecedence(p)
  718. proc parseExpr(p: var SqlParser): SqlNode =
  719. discard lowestExprAux(p, result, - 1)
  720. proc parseTableName(p: var SqlParser): SqlNode =
  721. expectIdent(p)
  722. result = primary(p)
  723. proc parseColumnReference(p: var SqlParser): SqlNode =
  724. result = parseTableName(p)
  725. if p.tok.kind == tkParLe:
  726. getTok(p)
  727. var a = result
  728. result = newNode(nkColumnReference)
  729. result.add(a)
  730. result.add(parseTableName(p))
  731. while p.tok.kind == tkComma:
  732. getTok(p)
  733. result.add(parseTableName(p))
  734. eat(p, tkParRi)
  735. proc parseCheck(p: var SqlParser): SqlNode =
  736. getTok(p)
  737. result = newNode(nkCheck)
  738. result.add(parseExpr(p))
  739. proc parseConstraint(p: var SqlParser): SqlNode =
  740. getTok(p)
  741. result = newNode(nkConstraint)
  742. expectIdent(p)
  743. result.add(newNode(nkIdent, p.tok.literal))
  744. getTok(p)
  745. optKeyw(p, "check")
  746. result.add(parseExpr(p))
  747. proc parseParIdentList(p: var SqlParser, father: SqlNode) =
  748. eat(p, tkParLe)
  749. while true:
  750. expectIdent(p)
  751. father.add(newNode(nkIdent, p.tok.literal))
  752. getTok(p)
  753. if p.tok.kind != tkComma: break
  754. getTok(p)
  755. eat(p, tkParRi)
  756. proc parseColumnConstraints(p: var SqlParser, result: SqlNode) =
  757. while true:
  758. if isKeyw(p, "default"):
  759. getTok(p)
  760. var n = newNode(nkDefault)
  761. n.add(parseExpr(p))
  762. result.add(n)
  763. elif isKeyw(p, "references"):
  764. getTok(p)
  765. var n = newNode(nkReferences)
  766. n.add(parseColumnReference(p))
  767. result.add(n)
  768. elif isKeyw(p, "not"):
  769. getTok(p)
  770. eat(p, "null")
  771. result.add(newNode(nkNotNull))
  772. elif isKeyw(p, "null"):
  773. getTok(p)
  774. result.add(newNode(nkNull))
  775. elif isKeyw(p, "identity"):
  776. getTok(p)
  777. result.add(newNode(nkIdentity))
  778. elif isKeyw(p, "primary"):
  779. getTok(p)
  780. eat(p, "key")
  781. result.add(newNode(nkPrimaryKey))
  782. elif isKeyw(p, "check"):
  783. result.add(parseCheck(p))
  784. elif isKeyw(p, "constraint"):
  785. result.add(parseConstraint(p))
  786. elif isKeyw(p, "unique"):
  787. getTok(p)
  788. result.add(newNode(nkUnique))
  789. else:
  790. break
  791. proc parseColumnDef(p: var SqlParser): SqlNode =
  792. expectIdent(p)
  793. result = newNode(nkColumnDef)
  794. result.add(newNode(nkIdent, p.tok.literal))
  795. getTok(p)
  796. result.add(parseDataType(p))
  797. parseColumnConstraints(p, result)
  798. proc parseIfNotExists(p: var SqlParser, k: SqlNodeKind): SqlNode =
  799. getTok(p)
  800. if isKeyw(p, "if"):
  801. getTok(p)
  802. eat(p, "not")
  803. eat(p, "exists")
  804. result = newNode(succ(k))
  805. else:
  806. result = newNode(k)
  807. proc parseTableConstraint(p: var SqlParser): SqlNode =
  808. if isKeyw(p, "primary"):
  809. getTok(p)
  810. eat(p, "key")
  811. result = newNode(nkPrimaryKey)
  812. parseParIdentList(p, result)
  813. elif isKeyw(p, "foreign"):
  814. getTok(p)
  815. eat(p, "key")
  816. result = newNode(nkForeignKey)
  817. parseParIdentList(p, result)
  818. eat(p, "references")
  819. var m = newNode(nkReferences)
  820. m.add(parseColumnReference(p))
  821. result.add(m)
  822. elif isKeyw(p, "unique"):
  823. getTok(p)
  824. eat(p, "key")
  825. result = newNode(nkUnique)
  826. parseParIdentList(p, result)
  827. elif isKeyw(p, "check"):
  828. result = parseCheck(p)
  829. elif isKeyw(p, "constraint"):
  830. result = parseConstraint(p)
  831. else:
  832. sqlError(p, "column definition expected")
  833. proc parseUnique(p: var SqlParser): SqlNode =
  834. result = parseExpr(p)
  835. if result.kind == nkCall: result.kind = nkUnique
  836. proc parseTableDef(p: var SqlParser): SqlNode =
  837. result = parseIfNotExists(p, nkCreateTable)
  838. expectIdent(p)
  839. result.add(newNode(nkIdent, p.tok.literal))
  840. getTok(p)
  841. if p.tok.kind == tkParLe:
  842. getTok(p)
  843. while p.tok.kind != tkParRi:
  844. if isKeyw(p, "constraint"):
  845. result.add parseConstraint(p)
  846. elif isKeyw(p, "primary") or isKeyw(p, "foreign"):
  847. result.add parseTableConstraint(p)
  848. elif isKeyw(p, "unique"):
  849. result.add parseUnique(p)
  850. elif p.tok.kind == tkIdentifier or p.tok.kind == tkQuotedIdentifier:
  851. result.add(parseColumnDef(p))
  852. else:
  853. result.add(parseTableConstraint(p))
  854. if p.tok.kind != tkComma: break
  855. getTok(p)
  856. eat(p, tkParRi)
  857. # skip additional crap after 'create table (...) crap;'
  858. while p.tok.kind notin {tkSemicolon, tkEof}:
  859. getTok(p)
  860. proc parseTypeDef(p: var SqlParser): SqlNode =
  861. result = parseIfNotExists(p, nkCreateType)
  862. expectIdent(p)
  863. result.add(newNode(nkIdent, p.tok.literal))
  864. getTok(p)
  865. eat(p, "as")
  866. result.add(parseDataType(p))
  867. proc parseWhere(p: var SqlParser): SqlNode =
  868. getTok(p)
  869. result = newNode(nkWhere)
  870. result.add(parseExpr(p))
  871. proc parseJoinType(p: var SqlParser): SqlNode =
  872. ## parse [ INNER ] JOIN | ( LEFT | RIGHT | FULL ) [ OUTER ] JOIN
  873. if isKeyw(p, "inner"):
  874. getTok(p)
  875. eat(p, "join")
  876. return newNode(nkIdent, "inner")
  877. elif isKeyw(p, "join"):
  878. getTok(p)
  879. return newNode(nkIdent, "")
  880. elif isKeyw(p, "left") or isKeyw(p, "full") or isKeyw(p, "right"):
  881. var joinType = newNode(nkIdent, p.tok.literal.toLowerAscii())
  882. getTok(p)
  883. optKeyw(p, "outer")
  884. eat(p, "join")
  885. return joinType
  886. else:
  887. sqlError(p, "join type expected")
  888. proc parseFromItem(p: var SqlParser): SqlNode =
  889. result = newNode(nkFromItemPair)
  890. var expectAs = true
  891. if p.tok.kind == tkParLe:
  892. getTok(p)
  893. if isKeyw(p, "select"):
  894. result.add(parseSelect(p))
  895. else:
  896. result = parseFromItem(p)
  897. expectAs = false
  898. eat(p, tkParRi)
  899. else:
  900. result.add(parseExpr(p))
  901. if expectAs and isKeyw(p, "as"):
  902. getTok(p)
  903. result.add(parseExpr(p))
  904. while true:
  905. if isKeyw(p, "cross"):
  906. var join = newNode(nkJoin)
  907. join.add(newNode(nkIdent, "cross"))
  908. join.add(result)
  909. getTok(p)
  910. eat(p, "join")
  911. join.add(parseFromItem(p))
  912. result = join
  913. elif isKeyw(p, "natural"):
  914. var join = newNode(nkNaturalJoin)
  915. getTok(p)
  916. join.add(parseJoinType(p))
  917. join.add(result)
  918. join.add(parseFromItem(p))
  919. result = join
  920. elif isKeyw(p, "inner") or isKeyw(p, "join") or isKeyw(p, "left") or
  921. iskeyw(p, "full") or isKeyw(p, "right"):
  922. var join = newNode(nkJoin)
  923. join.add(parseJoinType(p))
  924. join.add(result)
  925. join.add(parseFromItem(p))
  926. if isKeyw(p, "on"):
  927. getTok(p)
  928. join.add(parseExpr(p))
  929. elif isKeyw(p, "using"):
  930. getTok(p)
  931. var n = newNode(nkUsing)
  932. parseParIdentList(p, n)
  933. join.add n
  934. else:
  935. sqlError(p, "ON or USING expected")
  936. result = join
  937. else:
  938. break
  939. proc parseIndexDef(p: var SqlParser): SqlNode =
  940. result = parseIfNotExists(p, nkCreateIndex)
  941. if isKeyw(p, "primary"):
  942. getTok(p)
  943. eat(p, "key")
  944. result.add(newNode(nkPrimaryKey))
  945. else:
  946. expectIdent(p)
  947. result.add(newNode(nkIdent, p.tok.literal))
  948. getTok(p)
  949. eat(p, "on")
  950. expectIdent(p)
  951. result.add(newNode(nkIdent, p.tok.literal))
  952. getTok(p)
  953. eat(p, tkParLe)
  954. expectIdent(p)
  955. result.add(newNode(nkIdent, p.tok.literal))
  956. getTok(p)
  957. while p.tok.kind == tkComma:
  958. getTok(p)
  959. expectIdent(p)
  960. result.add(newNode(nkIdent, p.tok.literal))
  961. getTok(p)
  962. eat(p, tkParRi)
  963. proc parseInsert(p: var SqlParser): SqlNode =
  964. getTok(p)
  965. eat(p, "into")
  966. expectIdent(p)
  967. result = newNode(nkInsert)
  968. result.add(newNode(nkIdent, p.tok.literal))
  969. getTok(p)
  970. if p.tok.kind == tkParLe:
  971. var n = newNode(nkColumnList)
  972. parseParIdentList(p, n)
  973. result.add n
  974. else:
  975. result.add(newNode(nkNone))
  976. if isKeyw(p, "default"):
  977. getTok(p)
  978. eat(p, "values")
  979. result.add(newNode(nkDefault))
  980. else:
  981. eat(p, "values")
  982. eat(p, tkParLe)
  983. var n = newNode(nkValueList)
  984. while true:
  985. n.add(parseExpr(p))
  986. if p.tok.kind != tkComma: break
  987. getTok(p)
  988. result.add(n)
  989. eat(p, tkParRi)
  990. proc parseUpdate(p: var SqlParser): SqlNode =
  991. getTok(p)
  992. result = newNode(nkUpdate)
  993. result.add(primary(p))
  994. eat(p, "set")
  995. while true:
  996. var a = newNode(nkAsgn)
  997. expectIdent(p)
  998. a.add(newNode(nkIdent, p.tok.literal))
  999. getTok(p)
  1000. if isOpr(p, "="): getTok(p)
  1001. else: sqlError(p, "= expected")
  1002. a.add(parseExpr(p))
  1003. result.add(a)
  1004. if p.tok.kind != tkComma: break
  1005. getTok(p)
  1006. if isKeyw(p, "where"):
  1007. result.add(parseWhere(p))
  1008. else:
  1009. result.add(newNode(nkNone))
  1010. proc parseDelete(p: var SqlParser): SqlNode =
  1011. getTok(p)
  1012. if isOpr(p, "*"):
  1013. getTok(p)
  1014. result = newNode(nkDelete)
  1015. eat(p, "from")
  1016. result.add(primary(p))
  1017. if isKeyw(p, "where"):
  1018. result.add(parseWhere(p))
  1019. else:
  1020. result.add(newNode(nkNone))
  1021. proc parseSelect(p: var SqlParser): SqlNode =
  1022. getTok(p)
  1023. if isKeyw(p, "distinct"):
  1024. getTok(p)
  1025. result = newNode(nkSelectDistinct)
  1026. elif isKeyw(p, "all"):
  1027. getTok(p)
  1028. result = newNode(nkSelect)
  1029. var a = newNode(nkSelectColumns)
  1030. while true:
  1031. if isOpr(p, "*"):
  1032. a.add(newNode(nkIdent, "*"))
  1033. getTok(p)
  1034. else:
  1035. var pair = newNode(nkSelectPair)
  1036. pair.add(parseExpr(p))
  1037. a.add(pair)
  1038. if isKeyw(p, "as"):
  1039. getTok(p)
  1040. pair.add(parseExpr(p))
  1041. if p.tok.kind != tkComma: break
  1042. getTok(p)
  1043. result.add(a)
  1044. if isKeyw(p, "from"):
  1045. var f = newNode(nkFrom)
  1046. while true:
  1047. getTok(p)
  1048. f.add(parseFromItem(p))
  1049. if p.tok.kind != tkComma: break
  1050. result.add(f)
  1051. if isKeyw(p, "where"):
  1052. result.add(parseWhere(p))
  1053. if isKeyw(p, "group"):
  1054. getTok(p)
  1055. eat(p, "by")
  1056. var g = newNode(nkGroup)
  1057. while true:
  1058. g.add(parseExpr(p))
  1059. if p.tok.kind != tkComma: break
  1060. getTok(p)
  1061. result.add(g)
  1062. if isKeyw(p, "order"):
  1063. getTok(p)
  1064. eat(p, "by")
  1065. var n = newNode(nkOrder)
  1066. while true:
  1067. var e = parseExpr(p)
  1068. if isKeyw(p, "asc"):
  1069. getTok(p) # is default
  1070. elif isKeyw(p, "desc"):
  1071. getTok(p)
  1072. var x = newNode(nkDesc)
  1073. x.add(e)
  1074. e = x
  1075. n.add(e)
  1076. if p.tok.kind != tkComma: break
  1077. getTok(p)
  1078. result.add(n)
  1079. if isKeyw(p, "having"):
  1080. var h = newNode(nkHaving)
  1081. while true:
  1082. getTok(p)
  1083. h.add(parseExpr(p))
  1084. if p.tok.kind != tkComma: break
  1085. result.add(h)
  1086. if isKeyw(p, "union"):
  1087. result.add(newNode(nkUnion))
  1088. getTok(p)
  1089. elif isKeyw(p, "intersect"):
  1090. result.add(newNode(nkIntersect))
  1091. getTok(p)
  1092. elif isKeyw(p, "except"):
  1093. result.add(newNode(nkExcept))
  1094. getTok(p)
  1095. if isKeyw(p, "limit"):
  1096. getTok(p)
  1097. var l = newNode(nkLimit)
  1098. l.add(parseExpr(p))
  1099. result.add(l)
  1100. if isKeyw(p, "offset"):
  1101. getTok(p)
  1102. var o = newNode(nkOffset)
  1103. o.add(parseExpr(p))
  1104. result.add(o)
  1105. proc parseStmt(p: var SqlParser; parent: SqlNode) =
  1106. if isKeyw(p, "create"):
  1107. getTok(p)
  1108. optKeyw(p, "cached")
  1109. optKeyw(p, "memory")
  1110. optKeyw(p, "temp")
  1111. optKeyw(p, "global")
  1112. optKeyw(p, "local")
  1113. optKeyw(p, "temporary")
  1114. optKeyw(p, "unique")
  1115. optKeyw(p, "hash")
  1116. if isKeyw(p, "table"):
  1117. parent.add parseTableDef(p)
  1118. elif isKeyw(p, "type"):
  1119. parent.add parseTypeDef(p)
  1120. elif isKeyw(p, "index"):
  1121. parent.add parseIndexDef(p)
  1122. else:
  1123. sqlError(p, "TABLE expected")
  1124. elif isKeyw(p, "insert"):
  1125. parent.add parseInsert(p)
  1126. elif isKeyw(p, "update"):
  1127. parent.add parseUpdate(p)
  1128. elif isKeyw(p, "delete"):
  1129. parent.add parseDelete(p)
  1130. elif isKeyw(p, "select"):
  1131. parent.add parseSelect(p)
  1132. elif isKeyw(p, "begin"):
  1133. getTok(p)
  1134. else:
  1135. sqlError(p, "SELECT, CREATE, UPDATE or DELETE expected")
  1136. proc parse(p: var SqlParser): SqlNode =
  1137. ## parses the content of `p`'s input stream and returns the SQL AST.
  1138. ## Syntax errors raise an `SqlParseError` exception.
  1139. result = newNode(nkStmtList)
  1140. while p.tok.kind != tkEof:
  1141. parseStmt(p, result)
  1142. if p.tok.kind == tkEof:
  1143. break
  1144. eat(p, tkSemicolon)
  1145. proc close(p: var SqlParser) =
  1146. ## closes the parser `p`. The associated input stream is closed too.
  1147. close(SqlLexer(p))
  1148. type
  1149. SqlWriter = object
  1150. indent: int
  1151. upperCase: bool
  1152. buffer: string
  1153. proc add(s: var SqlWriter, thing: char) =
  1154. s.buffer.add(thing)
  1155. proc prepareAdd(s: var SqlWriter) {.inline.} =
  1156. if s.buffer.len > 0 and s.buffer[^1] notin {' ', '\L', '(', '.'}:
  1157. s.buffer.add(" ")
  1158. proc add(s: var SqlWriter, thing: string) =
  1159. s.prepareAdd
  1160. s.buffer.add(thing)
  1161. proc addKeyw(s: var SqlWriter, thing: string) =
  1162. var keyw = thing
  1163. if s.upperCase:
  1164. keyw = keyw.toUpperAscii()
  1165. s.add(keyw)
  1166. proc addIden(s: var SqlWriter, thing: string) =
  1167. var iden = thing
  1168. if iden.toLowerAscii() in reservedKeywords:
  1169. iden = '"' & iden & '"'
  1170. s.add(iden)
  1171. proc ra(n: SqlNode, s: var SqlWriter) {.gcsafe.}
  1172. proc rs(n: SqlNode, s: var SqlWriter, prefix = "(", suffix = ")", sep = ", ") =
  1173. if n.len > 0:
  1174. s.add(prefix)
  1175. for i in 0 .. n.len-1:
  1176. if i > 0: s.add(sep)
  1177. ra(n.sons[i], s)
  1178. s.add(suffix)
  1179. proc addMulti(s: var SqlWriter, n: SqlNode, sep = ',') =
  1180. if n.len > 0:
  1181. for i in 0 .. n.len-1:
  1182. if i > 0: s.add(sep)
  1183. ra(n.sons[i], s)
  1184. proc addMulti(s: var SqlWriter, n: SqlNode, sep = ',', prefix, suffix: char) =
  1185. if n.len > 0:
  1186. s.add(prefix)
  1187. for i in 0 .. n.len-1:
  1188. if i > 0: s.add(sep)
  1189. ra(n.sons[i], s)
  1190. s.add(suffix)
  1191. proc quoted(s: string): string =
  1192. "\"" & replace(s, "\"", "\"\"") & "\""
  1193. func escape(result: var string; s: string) =
  1194. result.add('\'')
  1195. for c in items(s):
  1196. case c
  1197. of '\0'..'\31':
  1198. result.add("\\x")
  1199. result.add(toHex(ord(c), 2))
  1200. of '\'': result.add("''")
  1201. else: result.add(c)
  1202. result.add('\'')
  1203. proc ra(n: SqlNode, s: var SqlWriter) =
  1204. if n == nil: return
  1205. case n.kind
  1206. of nkNone: discard
  1207. of nkIdent:
  1208. if allCharsInSet(n.strVal, {'\33'..'\127'}):
  1209. s.add(n.strVal)
  1210. else:
  1211. s.add(quoted(n.strVal))
  1212. of nkQuotedIdent:
  1213. s.add(quoted(n.strVal))
  1214. of nkStringLit:
  1215. s.prepareAdd
  1216. s.buffer.escape(n.strVal)
  1217. of nkBitStringLit:
  1218. s.add("b'" & n.strVal & "'")
  1219. of nkHexStringLit:
  1220. s.add("x'" & n.strVal & "'")
  1221. of nkIntegerLit, nkNumericLit:
  1222. s.add(n.strVal)
  1223. of nkPrimaryKey:
  1224. s.addKeyw("primary key")
  1225. rs(n, s)
  1226. of nkForeignKey:
  1227. s.addKeyw("foreign key")
  1228. rs(n, s)
  1229. of nkNotNull:
  1230. s.addKeyw("not null")
  1231. of nkNull:
  1232. s.addKeyw("null")
  1233. of nkDot:
  1234. ra(n.sons[0], s)
  1235. s.add('.')
  1236. ra(n.sons[1], s)
  1237. of nkDotDot:
  1238. ra(n.sons[0], s)
  1239. s.add(". .")
  1240. ra(n.sons[1], s)
  1241. of nkPrefix:
  1242. ra(n.sons[0], s)
  1243. s.add(' ')
  1244. ra(n.sons[1], s)
  1245. of nkInfix:
  1246. ra(n.sons[1], s)
  1247. s.add(' ')
  1248. ra(n.sons[0], s)
  1249. s.add(' ')
  1250. ra(n.sons[2], s)
  1251. of nkCall, nkColumnReference:
  1252. ra(n.sons[0], s)
  1253. s.add('(')
  1254. for i in 1..n.len-1:
  1255. if i > 1: s.add(',')
  1256. ra(n.sons[i], s)
  1257. s.add(')')
  1258. of nkPrGroup:
  1259. s.add('(')
  1260. s.addMulti(n)
  1261. s.add(')')
  1262. of nkReferences:
  1263. s.addKeyw("references")
  1264. ra(n.sons[0], s)
  1265. of nkDefault:
  1266. s.addKeyw("default")
  1267. ra(n.sons[0], s)
  1268. of nkCheck:
  1269. s.addKeyw("check")
  1270. ra(n.sons[0], s)
  1271. of nkConstraint:
  1272. s.addKeyw("constraint")
  1273. ra(n.sons[0], s)
  1274. s.addKeyw("check")
  1275. ra(n.sons[1], s)
  1276. of nkUnique:
  1277. s.addKeyw("unique")
  1278. rs(n, s)
  1279. of nkIdentity:
  1280. s.addKeyw("identity")
  1281. of nkColumnDef:
  1282. rs(n, s, "", "", " ")
  1283. of nkStmtList:
  1284. for i in 0..n.len-1:
  1285. ra(n.sons[i], s)
  1286. s.add(';')
  1287. of nkInsert:
  1288. assert n.len == 3
  1289. s.addKeyw("insert into")
  1290. ra(n.sons[0], s)
  1291. s.add(' ')
  1292. ra(n.sons[1], s)
  1293. if n.sons[2].kind == nkDefault:
  1294. s.addKeyw("default values")
  1295. else:
  1296. ra(n.sons[2], s)
  1297. of nkUpdate:
  1298. s.addKeyw("update")
  1299. ra(n.sons[0], s)
  1300. s.addKeyw("set")
  1301. var L = n.len
  1302. for i in 1 .. L-2:
  1303. if i > 1: s.add(", ")
  1304. var it = n.sons[i]
  1305. assert it.kind == nkAsgn
  1306. ra(it, s)
  1307. ra(n.sons[L-1], s)
  1308. of nkDelete:
  1309. s.addKeyw("delete from")
  1310. ra(n.sons[0], s)
  1311. ra(n.sons[1], s)
  1312. of nkSelect, nkSelectDistinct:
  1313. s.addKeyw("select")
  1314. if n.kind == nkSelectDistinct:
  1315. s.addKeyw("distinct")
  1316. for i in 0 ..< n.len:
  1317. ra(n.sons[i], s)
  1318. of nkSelectColumns:
  1319. for i, column in n.sons:
  1320. if i > 0: s.add(',')
  1321. ra(column, s)
  1322. of nkSelectPair:
  1323. ra(n.sons[0], s)
  1324. if n.sons.len == 2:
  1325. s.addKeyw("as")
  1326. ra(n.sons[1], s)
  1327. of nkFromItemPair:
  1328. if n.sons[0].kind in {nkIdent, nkQuotedIdent}:
  1329. ra(n.sons[0], s)
  1330. else:
  1331. assert n.sons[0].kind == nkSelect
  1332. s.add('(')
  1333. ra(n.sons[0], s)
  1334. s.add(')')
  1335. if n.sons.len == 2:
  1336. s.addKeyw("as")
  1337. ra(n.sons[1], s)
  1338. of nkAsgn:
  1339. ra(n.sons[0], s)
  1340. s.add(" = ")
  1341. ra(n.sons[1], s)
  1342. of nkFrom:
  1343. s.addKeyw("from")
  1344. s.addMulti(n)
  1345. of nkJoin, nkNaturalJoin:
  1346. var joinType = n.sons[0].strVal
  1347. if joinType == "":
  1348. joinType = "join"
  1349. else:
  1350. joinType &= " " & "join"
  1351. if n.kind == nkNaturalJoin:
  1352. joinType = "natural " & joinType
  1353. ra(n.sons[1], s)
  1354. s.addKeyw(joinType)
  1355. # If the right part of the join is not leaf, parenthesize it
  1356. if n.sons[2].kind != nkFromItemPair:
  1357. s.add('(')
  1358. ra(n.sons[2], s)
  1359. s.add(')')
  1360. else:
  1361. ra(n.sons[2], s)
  1362. if n.sons.len > 3:
  1363. if n.sons[3].kind != nkUsing:
  1364. s.addKeyw("on")
  1365. ra(n.sons[3], s)
  1366. of nkUsing:
  1367. s.addKeyw("using")
  1368. rs(n, s)
  1369. of nkGroup:
  1370. s.addKeyw("group by")
  1371. s.addMulti(n)
  1372. of nkLimit:
  1373. s.addKeyw("limit")
  1374. s.addMulti(n)
  1375. of nkOffset:
  1376. s.addKeyw("offset")
  1377. s.addMulti(n)
  1378. of nkHaving:
  1379. s.addKeyw("having")
  1380. s.addMulti(n)
  1381. of nkOrder:
  1382. s.addKeyw("order by")
  1383. s.addMulti(n)
  1384. of nkDesc:
  1385. ra(n.sons[0], s)
  1386. s.addKeyw("desc")
  1387. of nkUnion:
  1388. s.addKeyw("union")
  1389. of nkIntersect:
  1390. s.addKeyw("intersect")
  1391. of nkExcept:
  1392. s.addKeyw("except")
  1393. of nkColumnList:
  1394. rs(n, s)
  1395. of nkValueList:
  1396. s.addKeyw("values")
  1397. rs(n, s)
  1398. of nkWhere:
  1399. s.addKeyw("where")
  1400. ra(n.sons[0], s)
  1401. of nkCreateTable, nkCreateTableIfNotExists:
  1402. s.addKeyw("create table")
  1403. if n.kind == nkCreateTableIfNotExists:
  1404. s.addKeyw("if not exists")
  1405. ra(n.sons[0], s)
  1406. s.add('(')
  1407. for i in 1..n.len-1:
  1408. if i > 1: s.add(',')
  1409. ra(n.sons[i], s)
  1410. s.add(");")
  1411. of nkCreateType, nkCreateTypeIfNotExists:
  1412. s.addKeyw("create type")
  1413. if n.kind == nkCreateTypeIfNotExists:
  1414. s.addKeyw("if not exists")
  1415. ra(n.sons[0], s)
  1416. s.addKeyw("as")
  1417. ra(n.sons[1], s)
  1418. of nkCreateIndex, nkCreateIndexIfNotExists:
  1419. s.addKeyw("create index")
  1420. if n.kind == nkCreateIndexIfNotExists:
  1421. s.addKeyw("if not exists")
  1422. ra(n.sons[0], s)
  1423. s.addKeyw("on")
  1424. ra(n.sons[1], s)
  1425. s.add('(')
  1426. for i in 2..n.len-1:
  1427. if i > 2: s.add(", ")
  1428. ra(n.sons[i], s)
  1429. s.add(");")
  1430. of nkEnumDef:
  1431. s.addKeyw("enum")
  1432. rs(n, s)
  1433. proc renderSql*(n: SqlNode, upperCase = false): string =
  1434. ## Converts an SQL abstract syntax tree to its string representation.
  1435. var s: SqlWriter
  1436. s.buffer = ""
  1437. s.upperCase = upperCase
  1438. ra(n, s)
  1439. return s.buffer
  1440. proc `$`*(n: SqlNode): string =
  1441. ## an alias for `renderSql`.
  1442. renderSql(n)
  1443. proc treeReprAux(s: SqlNode, level: int, result: var string) =
  1444. result.add('\n')
  1445. for i in 0 ..< level: result.add(" ")
  1446. result.add($s.kind)
  1447. if s.kind in LiteralNodes:
  1448. result.add(' ')
  1449. result.add(s.strVal)
  1450. else:
  1451. for son in s.sons:
  1452. treeReprAux(son, level + 1, result)
  1453. proc treeRepr*(s: SqlNode): string =
  1454. result = newStringOfCap(128)
  1455. treeReprAux(s, 0, result)
  1456. import std/streams
  1457. proc open(L: var SqlLexer, input: Stream, filename: string) =
  1458. lexbase.open(L, input)
  1459. L.filename = filename
  1460. proc open(p: var SqlParser, input: Stream, filename: string) =
  1461. ## opens the parser `p` and assigns the input stream `input` to it.
  1462. ## `filename` is only used for error messages.
  1463. open(SqlLexer(p), input, filename)
  1464. p.tok.kind = tkInvalid
  1465. p.tok.literal = ""
  1466. getTok(p)
  1467. proc parseSql*(input: Stream, filename: string): SqlNode =
  1468. ## parses the SQL from `input` into an AST and returns the AST.
  1469. ## `filename` is only used for error messages.
  1470. ## Syntax errors raise an `SqlParseError` exception.
  1471. var p: SqlParser
  1472. open(p, input, filename)
  1473. try:
  1474. result = parse(p)
  1475. finally:
  1476. close(p)
  1477. proc parseSql*(input: string, filename = ""): SqlNode =
  1478. ## parses the SQL from `input` into an AST and returns the AST.
  1479. ## `filename` is only used for error messages.
  1480. ## Syntax errors raise an `SqlParseError` exception.
  1481. parseSql(newStringStream(input), "")