12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385 |
- #
- #
- # Nim's Runtime Library
- # (c) Copyright 2009 Andreas Rumpf
- #
- # See the file "copying.txt", included in this
- # distribution, for details about the copyright.
- #
- ## The ``parsesql`` module implements a high performance SQL file
- ## parser. It parses PostgreSQL syntax and the SQL ANSI standard.
- import
- hashes, strutils, lexbase, streams
- # ------------------- scanner -------------------------------------------------
- type
- TokKind = enum ## enumeration of all SQL tokens
- tkInvalid, ## invalid token
- tkEof, ## end of file reached
- tkIdentifier, ## abc
- tkQuotedIdentifier, ## "abc"
- tkStringConstant, ## 'abc'
- tkEscapeConstant, ## e'abc'
- tkDollarQuotedConstant, ## $tag$abc$tag$
- tkBitStringConstant, ## B'00011'
- tkHexStringConstant, ## x'00011'
- tkInteger,
- tkNumeric,
- tkOperator, ## + - * / < > = ~ ! @ # % ^ & | ` ?
- tkSemicolon, ## ';'
- tkColon, ## ':'
- tkComma, ## ','
- tkParLe, ## '('
- tkParRi, ## ')'
- tkBracketLe, ## '['
- tkBracketRi, ## ']'
- tkDot ## '.'
- Token = object # a token
- kind: TokKind # the type of the token
- literal: string # the parsed (string) literal
- SqlLexer* = object of BaseLexer ## the parser object.
- filename: string
- {.deprecated: [TToken: Token, TSqlLexer: SqlLexer].}
- const
- tokKindToStr: array[TokKind, string] = [
- "invalid", "[EOF]", "identifier", "quoted identifier", "string constant",
- "escape string constant", "dollar quoted constant", "bit string constant",
- "hex string constant", "integer constant", "numeric constant", "operator",
- ";", ":", ",", "(", ")", "[", "]", "."
- ]
- proc open(L: var SqlLexer, input: Stream, filename: string) =
- lexbase.open(L, input)
- L.filename = filename
- proc close(L: var SqlLexer) =
- lexbase.close(L)
- proc getColumn(L: SqlLexer): int =
- ## get the current column the parser has arrived at.
- result = getColNumber(L, L.bufpos)
- proc getLine(L: SqlLexer): int =
- result = L.lineNumber
- proc handleHexChar(c: var SqlLexer, xi: var int) =
- case c.buf[c.bufpos]
- of '0'..'9':
- xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0'))
- inc(c.bufpos)
- of 'a'..'f':
- xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10)
- inc(c.bufpos)
- of 'A'..'F':
- xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10)
- inc(c.bufpos)
- else:
- discard
- proc handleOctChar(c: var SqlLexer, xi: var int) =
- if c.buf[c.bufpos] in {'0'..'7'}:
- xi = (xi shl 3) or (ord(c.buf[c.bufpos]) - ord('0'))
- inc(c.bufpos)
- proc getEscapedChar(c: var SqlLexer, tok: var Token) =
- inc(c.bufpos)
- case c.buf[c.bufpos]
- of 'n', 'N':
- add(tok.literal, '\L')
- inc(c.bufpos)
- of 'r', 'R', 'c', 'C':
- add(tok.literal, '\c')
- inc(c.bufpos)
- of 'l', 'L':
- add(tok.literal, '\L')
- inc(c.bufpos)
- of 'f', 'F':
- add(tok.literal, '\f')
- inc(c.bufpos)
- of 'e', 'E':
- add(tok.literal, '\e')
- inc(c.bufpos)
- of 'a', 'A':
- add(tok.literal, '\a')
- inc(c.bufpos)
- of 'b', 'B':
- add(tok.literal, '\b')
- inc(c.bufpos)
- of 'v', 'V':
- add(tok.literal, '\v')
- inc(c.bufpos)
- of 't', 'T':
- add(tok.literal, '\t')
- inc(c.bufpos)
- of '\'', '\"':
- add(tok.literal, c.buf[c.bufpos])
- inc(c.bufpos)
- of '\\':
- add(tok.literal, '\\')
- inc(c.bufpos)
- of 'x', 'X':
- inc(c.bufpos)
- var xi = 0
- handleHexChar(c, xi)
- handleHexChar(c, xi)
- add(tok.literal, chr(xi))
- of '0'..'7':
- var xi = 0
- handleOctChar(c, xi)
- handleOctChar(c, xi)
- handleOctChar(c, xi)
- if (xi <= 255): add(tok.literal, chr(xi))
- else: tok.kind = tkInvalid
- else: tok.kind = tkInvalid
- proc handleCRLF(c: var SqlLexer, pos: int): int =
- case c.buf[pos]
- of '\c': result = lexbase.handleCR(c, pos)
- of '\L': result = lexbase.handleLF(c, pos)
- else: result = pos
- proc skip(c: var SqlLexer) =
- var pos = c.bufpos
- var buf = c.buf
- var nested = 0
- while true:
- case buf[pos]
- of ' ', '\t':
- inc(pos)
- of '-':
- if buf[pos+1] == '-':
- while not (buf[pos] in {'\c', '\L', lexbase.EndOfFile}): inc(pos)
- else:
- break
- of '/':
- if buf[pos+1] == '*':
- inc(pos,2)
- while true:
- case buf[pos]
- of '\0': break
- of '\c', '\L':
- pos = handleCRLF(c, pos)
- buf = c.buf
- of '*':
- if buf[pos+1] == '/':
- inc(pos, 2)
- if nested <= 0: break
- dec(nested)
- else:
- inc(pos)
- of '/':
- if buf[pos+1] == '*':
- inc(pos, 2)
- inc(nested)
- else:
- inc(pos)
- else: inc(pos)
- else: break
- of '\c', '\L':
- pos = handleCRLF(c, pos)
- buf = c.buf
- else:
- break # EndOfFile also leaves the loop
- c.bufpos = pos
- proc getString(c: var SqlLexer, tok: var Token, kind: TokKind) =
- var pos = c.bufpos + 1
- var buf = c.buf
- tok.kind = kind
- block parseLoop:
- while true:
- while true:
- var ch = buf[pos]
- if ch == '\'':
- if buf[pos+1] == '\'':
- inc(pos, 2)
- add(tok.literal, '\'')
- else:
- inc(pos)
- break
- elif ch in {'\c', '\L', lexbase.EndOfFile}:
- tok.kind = tkInvalid
- break parseLoop
- elif (ch == '\\') and kind == tkEscapeConstant:
- c.bufpos = pos
- getEscapedChar(c, tok)
- pos = c.bufpos
- else:
- add(tok.literal, ch)
- inc(pos)
- c.bufpos = pos
- var line = c.lineNumber
- skip(c)
- if c.lineNumber > line:
- # a new line whitespace has been parsed, so we check if the string
- # continues after the whitespace:
- buf = c.buf # may have been reallocated
- pos = c.bufpos
- if buf[pos] == '\'': inc(pos)
- else: break parseLoop
- else: break parseLoop
- c.bufpos = pos
- proc getDollarString(c: var SqlLexer, tok: var Token) =
- var pos = c.bufpos + 1
- var buf = c.buf
- tok.kind = tkDollarQuotedConstant
- var tag = "$"
- while buf[pos] in IdentChars:
- add(tag, buf[pos])
- inc(pos)
- if buf[pos] == '$': inc(pos)
- else:
- tok.kind = tkInvalid
- return
- while true:
- case buf[pos]
- of '\c', '\L':
- pos = handleCRLF(c, pos)
- buf = c.buf
- add(tok.literal, "\L")
- of '\0':
- tok.kind = tkInvalid
- break
- of '$':
- inc(pos)
- var tag2 = "$"
- while buf[pos] in IdentChars:
- add(tag2, buf[pos])
- inc(pos)
- if buf[pos] == '$': inc(pos)
- if tag2 == tag: break
- add(tok.literal, tag2)
- add(tok.literal, '$')
- else:
- add(tok.literal, buf[pos])
- inc(pos)
- c.bufpos = pos
- proc getSymbol(c: var SqlLexer, tok: var Token) =
- var pos = c.bufpos
- var buf = c.buf
- while true:
- add(tok.literal, buf[pos])
- inc(pos)
- if buf[pos] notin {'a'..'z','A'..'Z','0'..'9','_','$', '\128'..'\255'}:
- break
- c.bufpos = pos
- tok.kind = tkIdentifier
- proc getQuotedIdentifier(c: var SqlLexer, tok: var Token) =
- var pos = c.bufpos + 1
- var buf = c.buf
- tok.kind = tkQuotedIdentifier
- while true:
- var ch = buf[pos]
- if ch == '\"':
- if buf[pos+1] == '\"':
- inc(pos, 2)
- add(tok.literal, '\"')
- else:
- inc(pos)
- break
- elif ch in {'\c', '\L', lexbase.EndOfFile}:
- tok.kind = tkInvalid
- break
- else:
- add(tok.literal, ch)
- inc(pos)
- c.bufpos = pos
- proc getBitHexString(c: var SqlLexer, tok: var Token, validChars: set[char]) =
- var pos = c.bufpos + 1
- var buf = c.buf
- block parseLoop:
- while true:
- while true:
- var ch = buf[pos]
- if ch in validChars:
- add(tok.literal, ch)
- inc(pos)
- elif ch == '\'':
- inc(pos)
- break
- else:
- tok.kind = tkInvalid
- break parseLoop
- c.bufpos = pos
- var line = c.lineNumber
- skip(c)
- if c.lineNumber > line:
- # a new line whitespace has been parsed, so we check if the string
- # continues after the whitespace:
- buf = c.buf # may have been reallocated
- pos = c.bufpos
- if buf[pos] == '\'': inc(pos)
- else: break parseLoop
- else: break parseLoop
- c.bufpos = pos
- proc getNumeric(c: var SqlLexer, tok: var Token) =
- tok.kind = tkInteger
- var pos = c.bufpos
- var buf = c.buf
- while buf[pos] in Digits:
- add(tok.literal, buf[pos])
- inc(pos)
- if buf[pos] == '.':
- tok.kind = tkNumeric
- add(tok.literal, buf[pos])
- inc(pos)
- while buf[pos] in Digits:
- add(tok.literal, buf[pos])
- inc(pos)
- if buf[pos] in {'E', 'e'}:
- tok.kind = tkNumeric
- add(tok.literal, buf[pos])
- inc(pos)
- if buf[pos] == '+':
- inc(pos)
- elif buf[pos] == '-':
- add(tok.literal, buf[pos])
- inc(pos)
- if buf[pos] in Digits:
- while buf[pos] in Digits:
- add(tok.literal, buf[pos])
- inc(pos)
- else:
- tok.kind = tkInvalid
- c.bufpos = pos
- proc getOperator(c: var SqlLexer, tok: var Token) =
- const operators = {'+', '-', '*', '/', '<', '>', '=', '~', '!', '@', '#', '%',
- '^', '&', '|', '`', '?'}
- tok.kind = tkOperator
- var pos = c.bufpos
- var buf = c.buf
- var trailingPlusMinus = false
- while true:
- case buf[pos]
- of '-':
- if buf[pos] == '-': break
- if not trailingPlusMinus and buf[pos+1] notin operators and
- tok.literal.len > 0: break
- of '/':
- if buf[pos] == '*': break
- of '~', '!', '@', '#', '%', '^', '&', '|', '`', '?':
- trailingPlusMinus = true
- of '+':
- if not trailingPlusMinus and buf[pos+1] notin operators and
- tok.literal.len > 0: break
- of '*', '<', '>', '=': discard
- else: break
- add(tok.literal, buf[pos])
- inc(pos)
- c.bufpos = pos
- proc getTok(c: var SqlLexer, tok: var Token) =
- tok.kind = tkInvalid
- setLen(tok.literal, 0)
- skip(c)
- case c.buf[c.bufpos]
- of ';':
- tok.kind = tkSemicolon
- inc(c.bufpos)
- add(tok.literal, ';')
- of ',':
- tok.kind = tkComma
- inc(c.bufpos)
- add(tok.literal, ',')
- of ':':
- tok.kind = tkColon
- inc(c.bufpos)
- add(tok.literal, ':')
- of 'e', 'E':
- if c.buf[c.bufpos + 1] == '\'':
- inc(c.bufpos)
- getString(c, tok, tkEscapeConstant)
- else:
- getSymbol(c, tok)
- of 'b', 'B':
- if c.buf[c.bufpos + 1] == '\'':
- tok.kind = tkBitStringConstant
- getBitHexString(c, tok, {'0'..'1'})
- else:
- getSymbol(c, tok)
- of 'x', 'X':
- if c.buf[c.bufpos + 1] == '\'':
- tok.kind = tkHexStringConstant
- getBitHexString(c, tok, {'a'..'f','A'..'F','0'..'9'})
- else:
- getSymbol(c, tok)
- of '$': getDollarString(c, tok)
- of '[':
- tok.kind = tkBracketLe
- inc(c.bufpos)
- add(tok.literal, '[')
- of ']':
- tok.kind = tkBracketRi
- inc(c.bufpos)
- add(tok.literal, ']')
- of '(':
- tok.kind = tkParLe
- inc(c.bufpos)
- add(tok.literal, '(')
- of ')':
- tok.kind = tkParRi
- inc(c.bufpos)
- add(tok.literal, ')')
- of '.':
- if c.buf[c.bufpos + 1] in Digits:
- getNumeric(c, tok)
- else:
- tok.kind = tkDot
- inc(c.bufpos)
- add(tok.literal, '.')
- of '0'..'9': getNumeric(c, tok)
- of '\'': getString(c, tok, tkStringConstant)
- of '"': getQuotedIdentifier(c, tok)
- of lexbase.EndOfFile:
- tok.kind = tkEof
- tok.literal = "[EOF]"
- of 'a', 'c', 'd', 'f'..'w', 'y', 'z', 'A', 'C', 'D', 'F'..'W', 'Y', 'Z', '_',
- '\128'..'\255':
- getSymbol(c, tok)
- of '+', '-', '*', '/', '<', '>', '=', '~', '!', '@', '#', '%',
- '^', '&', '|', '`', '?':
- getOperator(c, tok)
- else:
- add(tok.literal, c.buf[c.bufpos])
- inc(c.bufpos)
- proc errorStr(L: SqlLexer, msg: string): string =
- result = "$1($2, $3) Error: $4" % [L.filename, $getLine(L), $getColumn(L), msg]
- # ----------------------------- parser ----------------------------------------
- # Operator/Element Associativity Description
- # . left table/column name separator
- # :: left PostgreSQL-style typecast
- # [ ] left array element selection
- # - right unary minus
- # ^ left exponentiation
- # * / % left multiplication, division, modulo
- # + - left addition, subtraction
- # IS IS TRUE, IS FALSE, IS UNKNOWN, IS NULL
- # ISNULL test for null
- # NOTNULL test for not null
- # (any other) left all other native and user-defined oprs
- # IN set membership
- # BETWEEN range containment
- # OVERLAPS time interval overlap
- # LIKE ILIKE SIMILAR string pattern matching
- # < > less than, greater than
- # = right equality, assignment
- # NOT right logical negation
- # AND left logical conjunction
- # OR left logical disjunction
- type
- SqlNodeKind* = enum ## kind of SQL abstract syntax tree
- nkNone,
- nkIdent,
- nkStringLit,
- nkBitStringLit,
- nkHexStringLit,
- nkIntegerLit,
- nkNumericLit,
- nkPrimaryKey,
- nkForeignKey,
- nkNotNull,
- nkNull,
- nkStmtList,
- nkDot,
- nkDotDot,
- nkPrefix,
- nkInfix,
- nkCall,
- nkColumnReference,
- nkReferences,
- nkDefault,
- nkCheck,
- nkConstraint,
- nkUnique,
- nkIdentity,
- nkColumnDef, ## name, datatype, constraints
- nkInsert,
- nkUpdate,
- nkDelete,
- nkSelect,
- nkSelectDistinct,
- nkSelectColumns,
- nkAsgn,
- nkFrom,
- nkGroup,
- nkHaving,
- nkOrder,
- nkDesc,
- nkUnion,
- nkIntersect,
- nkExcept,
- nkColumnList,
- nkValueList,
- nkWhere,
- nkCreateTable,
- nkCreateTableIfNotExists,
- nkCreateType,
- nkCreateTypeIfNotExists,
- nkCreateIndex,
- nkCreateIndexIfNotExists,
- nkEnumDef
- type
- SqlParseError* = object of ValueError ## Invalid SQL encountered
- SqlNode* = ref SqlNodeObj ## an SQL abstract syntax tree node
- SqlNodeObj* = object ## an SQL abstract syntax tree node
- case kind*: SqlNodeKind ## kind of syntax tree
- of nkIdent, nkStringLit, nkBitStringLit, nkHexStringLit,
- nkIntegerLit, nkNumericLit:
- strVal*: string ## AST leaf: the identifier, numeric literal
- ## string literal, etc.
- else:
- sons*: seq[SqlNode] ## the node's children
- SqlParser* = object of SqlLexer ## SQL parser object
- tok: Token
- {.deprecated: [EInvalidSql: SqlParseError, PSqlNode: SqlNode,
- TSqlNode: SqlNodeObj, TSqlParser: SqlParser, TSqlNodeKind: SqlNodeKind].}
- proc newNode(k: SqlNodeKind): SqlNode =
- new(result)
- result.kind = k
- proc newNode(k: SqlNodeKind, s: string): SqlNode =
- new(result)
- result.kind = k
- result.strVal = s
- proc len*(n: SqlNode): int =
- if n.kind in {nkIdent, nkStringLit, nkBitStringLit, nkHexStringLit,
- nkIntegerLit, nkNumericLit}:
- result = 0
- else:
- result = n.sons.len
- proc `[]`*(n: SqlNode; i: int): SqlNode = n.sons[i]
- proc add*(father, n: SqlNode) =
- if isNil(father.sons): father.sons = @[]
- add(father.sons, n)
- proc getTok(p: var SqlParser) =
- getTok(p, p.tok)
- proc sqlError(p: SqlParser, msg: string) =
- var e: ref SqlParseError
- new(e)
- e.msg = errorStr(p, msg)
- raise e
- proc isKeyw(p: SqlParser, keyw: string): bool =
- result = p.tok.kind == tkIdentifier and
- cmpIgnoreCase(p.tok.literal, keyw) == 0
- proc isOpr(p: SqlParser, opr: string): bool =
- result = p.tok.kind == tkOperator and
- cmpIgnoreCase(p.tok.literal, opr) == 0
- proc optKeyw(p: var SqlParser, keyw: string) =
- if p.tok.kind == tkIdentifier and cmpIgnoreCase(p.tok.literal, keyw) == 0:
- getTok(p)
- proc expectIdent(p: SqlParser) =
- if p.tok.kind != tkIdentifier and p.tok.kind != tkQuotedIdentifier:
- sqlError(p, "identifier expected")
- proc expect(p: SqlParser, kind: TokKind) =
- if p.tok.kind != kind:
- sqlError(p, tokKindToStr[kind] & " expected")
- proc eat(p: var SqlParser, kind: TokKind) =
- if p.tok.kind == kind:
- getTok(p)
- else:
- sqlError(p, tokKindToStr[kind] & " expected")
- proc eat(p: var SqlParser, keyw: string) =
- if isKeyw(p, keyw):
- getTok(p)
- else:
- sqlError(p, keyw.toUpper() & " expected")
- proc opt(p: var SqlParser, kind: TokKind) =
- if p.tok.kind == kind: getTok(p)
- proc parseDataType(p: var SqlParser): SqlNode =
- if isKeyw(p, "enum"):
- result = newNode(nkEnumDef)
- getTok(p)
- if p.tok.kind == tkParLe:
- getTok(p)
- result.add(newNode(nkStringLit, p.tok.literal))
- getTok(p)
- while p.tok.kind == tkComma:
- getTok(p)
- result.add(newNode(nkStringLit, p.tok.literal))
- getTok(p)
- eat(p, tkParRi)
- else:
- expectIdent(p)
- result = newNode(nkIdent, p.tok.literal)
- getTok(p)
- # ignore (12, 13) part:
- if p.tok.kind == tkParLe:
- getTok(p)
- expect(p, tkInteger)
- getTok(p)
- while p.tok.kind == tkComma:
- getTok(p)
- expect(p, tkInteger)
- getTok(p)
- eat(p, tkParRi)
- proc getPrecedence(p: SqlParser): int =
- if isOpr(p, "*") or isOpr(p, "/") or isOpr(p, "%"):
- result = 6
- elif isOpr(p, "+") or isOpr(p, "-"):
- result = 5
- elif isOpr(p, "=") or isOpr(p, "<") or isOpr(p, ">") or isOpr(p, ">=") or
- isOpr(p, "<=") or isOpr(p, "<>") or isOpr(p, "!=") or isKeyw(p, "is") or
- isKeyw(p, "like"):
- result = 3
- elif isKeyw(p, "and"):
- result = 2
- elif isKeyw(p, "or"):
- result = 1
- elif p.tok.kind == tkOperator:
- # user-defined operator:
- result = 0
- else:
- result = - 1
- proc parseExpr(p: var SqlParser): SqlNode
- proc identOrLiteral(p: var SqlParser): SqlNode =
- case p.tok.kind
- of tkIdentifier, tkQuotedIdentifier:
- result = newNode(nkIdent, p.tok.literal)
- getTok(p)
- of tkStringConstant, tkEscapeConstant, tkDollarQuotedConstant:
- result = newNode(nkStringLit, p.tok.literal)
- getTok(p)
- of tkBitStringConstant:
- result = newNode(nkBitStringLit, p.tok.literal)
- getTok(p)
- of tkHexStringConstant:
- result = newNode(nkHexStringLit, p.tok.literal)
- getTok(p)
- of tkInteger:
- result = newNode(nkIntegerLit, p.tok.literal)
- getTok(p)
- of tkNumeric:
- result = newNode(nkNumericLit, p.tok.literal)
- getTok(p)
- of tkParLe:
- getTok(p)
- result = parseExpr(p)
- eat(p, tkParRi)
- else:
- sqlError(p, "expression expected")
- getTok(p) # we must consume a token here to prevend endless loops!
- proc primary(p: var SqlParser): SqlNode =
- if p.tok.kind == tkOperator or isKeyw(p, "not"):
- result = newNode(nkPrefix)
- result.add(newNode(nkIdent, p.tok.literal))
- getTok(p)
- result.add(primary(p))
- return
- result = identOrLiteral(p)
- while true:
- case p.tok.kind
- of tkParLe:
- var a = result
- result = newNode(nkCall)
- result.add(a)
- getTok(p)
- while p.tok.kind != tkParRi:
- result.add(parseExpr(p))
- if p.tok.kind == tkComma: getTok(p)
- else: break
- eat(p, tkParRi)
- of tkDot:
- getTok(p)
- var a = result
- if p.tok.kind == tkDot:
- getTok(p)
- result = newNode(nkDotDot)
- else:
- result = newNode(nkDot)
- result.add(a)
- if isOpr(p, "*"):
- result.add(newNode(nkIdent, "*"))
- elif p.tok.kind in {tkIdentifier, tkQuotedIdentifier}:
- result.add(newNode(nkIdent, p.tok.literal))
- else:
- sqlError(p, "identifier expected")
- getTok(p)
- else: break
- proc lowestExprAux(p: var SqlParser, v: var SqlNode, limit: int): int =
- var
- v2, node, opNode: SqlNode
- v = primary(p) # expand while operators have priorities higher than 'limit'
- var opPred = getPrecedence(p)
- result = opPred
- while opPred > limit:
- node = newNode(nkInfix)
- opNode = newNode(nkIdent, p.tok.literal)
- getTok(p)
- result = lowestExprAux(p, v2, opPred)
- node.add(opNode)
- node.add(v)
- node.add(v2)
- v = node
- opPred = getPrecedence(p)
- proc parseExpr(p: var SqlParser): SqlNode =
- discard lowestExprAux(p, result, - 1)
- proc parseTableName(p: var SqlParser): SqlNode =
- expectIdent(p)
- result = primary(p)
- proc parseColumnReference(p: var SqlParser): SqlNode =
- result = parseTableName(p)
- if p.tok.kind == tkParLe:
- getTok(p)
- var a = result
- result = newNode(nkColumnReference)
- result.add(a)
- result.add(parseTableName(p))
- while p.tok.kind == tkComma:
- getTok(p)
- result.add(parseTableName(p))
- eat(p, tkParRi)
- proc parseCheck(p: var SqlParser): SqlNode =
- getTok(p)
- result = newNode(nkCheck)
- result.add(parseExpr(p))
- proc parseConstraint(p: var SqlParser): SqlNode =
- getTok(p)
- result = newNode(nkConstraint)
- expectIdent(p)
- result.add(newNode(nkIdent, p.tok.literal))
- getTok(p)
- optKeyw(p, "check")
- result.add(parseExpr(p))
- proc parseParIdentList(p: var SqlParser, father: SqlNode) =
- eat(p, tkParLe)
- while true:
- expectIdent(p)
- father.add(newNode(nkIdent, p.tok.literal))
- getTok(p)
- if p.tok.kind != tkComma: break
- getTok(p)
- eat(p, tkParRi)
- proc parseColumnConstraints(p: var SqlParser, result: SqlNode) =
- while true:
- if isKeyw(p, "default"):
- getTok(p)
- var n = newNode(nkDefault)
- n.add(parseExpr(p))
- result.add(n)
- elif isKeyw(p, "references"):
- getTok(p)
- var n = newNode(nkReferences)
- n.add(parseColumnReference(p))
- result.add(n)
- elif isKeyw(p, "not"):
- getTok(p)
- eat(p, "null")
- result.add(newNode(nkNotNull))
- elif isKeyw(p, "null"):
- getTok(p)
- result.add(newNode(nkNull))
- elif isKeyw(p, "identity"):
- getTok(p)
- result.add(newNode(nkIdentity))
- elif isKeyw(p, "primary"):
- getTok(p)
- eat(p, "key")
- result.add(newNode(nkPrimaryKey))
- elif isKeyw(p, "check"):
- result.add(parseCheck(p))
- elif isKeyw(p, "constraint"):
- result.add(parseConstraint(p))
- elif isKeyw(p, "unique"):
- getTok(p)
- result.add(newNode(nkUnique))
- else:
- break
- proc parseColumnDef(p: var SqlParser): SqlNode =
- expectIdent(p)
- result = newNode(nkColumnDef)
- result.add(newNode(nkIdent, p.tok.literal))
- getTok(p)
- result.add(parseDataType(p))
- parseColumnConstraints(p, result)
- proc parseIfNotExists(p: var SqlParser, k: SqlNodeKind): SqlNode =
- getTok(p)
- if isKeyw(p, "if"):
- getTok(p)
- eat(p, "not")
- eat(p, "exists")
- result = newNode(succ(k))
- else:
- result = newNode(k)
- proc parseTableConstraint(p: var SqlParser): SqlNode =
- if isKeyw(p, "primary"):
- getTok(p)
- eat(p, "key")
- result = newNode(nkPrimaryKey)
- parseParIdentList(p, result)
- elif isKeyw(p, "foreign"):
- getTok(p)
- eat(p, "key")
- result = newNode(nkForeignKey)
- parseParIdentList(p, result)
- eat(p, "references")
- var m = newNode(nkReferences)
- m.add(parseColumnReference(p))
- result.add(m)
- elif isKeyw(p, "unique"):
- getTok(p)
- eat(p, "key")
- result = newNode(nkUnique)
- parseParIdentList(p, result)
- elif isKeyw(p, "check"):
- result = parseCheck(p)
- elif isKeyw(p, "constraint"):
- result = parseConstraint(p)
- else:
- sqlError(p, "column definition expected")
- proc parseUnique(p: var SqlParser): SqlNode =
- result = parseExpr(p)
- if result.kind == nkCall: result.kind = nkUnique
- proc parseTableDef(p: var SqlParser): SqlNode =
- result = parseIfNotExists(p, nkCreateTable)
- expectIdent(p)
- result.add(newNode(nkIdent, p.tok.literal))
- getTok(p)
- if p.tok.kind == tkParLe:
- getTok(p)
- while p.tok.kind != tkParRi:
- if isKeyw(p, "constraint"):
- result.add parseConstraint(p)
- elif isKeyw(p, "primary") or isKeyw(p, "foreign"):
- result.add parseTableConstraint(p)
- elif isKeyw(p, "unique"):
- result.add parseUnique(p)
- elif p.tok.kind == tkIdentifier or p.tok.kind == tkQuotedIdentifier:
- result.add(parseColumnDef(p))
- else:
- result.add(parseTableConstraint(p))
- if p.tok.kind != tkComma: break
- getTok(p)
- eat(p, tkParRi)
- # skip additional crap after 'create table (...) crap;'
- while p.tok.kind notin {tkSemicolon, tkEof}:
- getTok(p)
- proc parseTypeDef(p: var SqlParser): SqlNode =
- result = parseIfNotExists(p, nkCreateType)
- expectIdent(p)
- result.add(newNode(nkIdent, p.tok.literal))
- getTok(p)
- eat(p, "as")
- result.add(parseDataType(p))
- proc parseWhere(p: var SqlParser): SqlNode =
- getTok(p)
- result = newNode(nkWhere)
- result.add(parseExpr(p))
- proc parseIndexDef(p: var SqlParser): SqlNode =
- result = parseIfNotExists(p, nkCreateIndex)
- if isKeyw(p, "primary"):
- getTok(p)
- eat(p, "key")
- result.add(newNode(nkPrimaryKey))
- else:
- expectIdent(p)
- result.add(newNode(nkIdent, p.tok.literal))
- getTok(p)
- eat(p, "on")
- expectIdent(p)
- result.add(newNode(nkIdent, p.tok.literal))
- getTok(p)
- eat(p, tkParLe)
- expectIdent(p)
- result.add(newNode(nkIdent, p.tok.literal))
- getTok(p)
- while p.tok.kind == tkComma:
- getTok(p)
- expectIdent(p)
- result.add(newNode(nkIdent, p.tok.literal))
- getTok(p)
- eat(p, tkParRi)
- proc parseInsert(p: var SqlParser): SqlNode =
- getTok(p)
- eat(p, "into")
- expectIdent(p)
- result = newNode(nkInsert)
- result.add(newNode(nkIdent, p.tok.literal))
- getTok(p)
- if p.tok.kind == tkParLe:
- var n = newNode(nkColumnList)
- parseParIdentList(p, n)
- else:
- result.add(nil)
- if isKeyw(p, "default"):
- getTok(p)
- eat(p, "values")
- result.add(newNode(nkDefault))
- else:
- eat(p, "values")
- eat(p, tkParLe)
- var n = newNode(nkValueList)
- while true:
- n.add(parseExpr(p))
- if p.tok.kind != tkComma: break
- getTok(p)
- result.add(n)
- eat(p, tkParRi)
- proc parseUpdate(p: var SqlParser): SqlNode =
- getTok(p)
- result = newNode(nkUpdate)
- result.add(primary(p))
- eat(p, "set")
- while true:
- var a = newNode(nkAsgn)
- expectIdent(p)
- a.add(newNode(nkIdent, p.tok.literal))
- getTok(p)
- if isOpr(p, "="): getTok(p)
- else: sqlError(p, "= expected")
- a.add(parseExpr(p))
- result.add(a)
- if p.tok.kind != tkComma: break
- getTok(p)
- if isKeyw(p, "where"):
- result.add(parseWhere(p))
- else:
- result.add(nil)
- proc parseDelete(p: var SqlParser): SqlNode =
- getTok(p)
- result = newNode(nkDelete)
- eat(p, "from")
- result.add(primary(p))
- if isKeyw(p, "where"):
- result.add(parseWhere(p))
- else:
- result.add(nil)
- proc parseSelect(p: var SqlParser): SqlNode =
- getTok(p)
- if isKeyw(p, "distinct"):
- getTok(p)
- result = newNode(nkSelectDistinct)
- elif isKeyw(p, "all"):
- getTok(p)
- result = newNode(nkSelect)
- var a = newNode(nkSelectColumns)
- while true:
- if isOpr(p, "*"):
- a.add(newNode(nkIdent, "*"))
- getTok(p)
- else:
- a.add(parseExpr(p))
- if p.tok.kind != tkComma: break
- getTok(p)
- result.add(a)
- if isKeyw(p, "from"):
- var f = newNode(nkFrom)
- while true:
- getTok(p)
- f.add(parseExpr(p))
- if p.tok.kind != tkComma: break
- result.add(f)
- if isKeyw(p, "where"):
- result.add(parseWhere(p))
- if isKeyw(p, "group"):
- getTok(p)
- eat(p, "by")
- var g = newNode(nkGroup)
- while true:
- g.add(parseExpr(p))
- if p.tok.kind != tkComma: break
- getTok(p)
- result.add(g)
- if isKeyw(p, "having"):
- var h = newNode(nkHaving)
- while true:
- getTok(p)
- h.add(parseExpr(p))
- if p.tok.kind != tkComma: break
- result.add(h)
- if isKeyw(p, "union"):
- result.add(newNode(nkUnion))
- getTok(p)
- elif isKeyw(p, "intersect"):
- result.add(newNode(nkIntersect))
- getTok(p)
- elif isKeyw(p, "except"):
- result.add(newNode(nkExcept))
- getTok(p)
- if isKeyw(p, "order"):
- getTok(p)
- eat(p, "by")
- var n = newNode(nkOrder)
- while true:
- var e = parseExpr(p)
- if isKeyw(p, "asc"): getTok(p) # is default
- elif isKeyw(p, "desc"):
- getTok(p)
- var x = newNode(nkDesc)
- x.add(e)
- e = x
- n.add(e)
- if p.tok.kind != tkComma: break
- getTok(p)
- result.add(n)
- proc parseStmt(p: var SqlParser; parent: SqlNode) =
- if isKeyw(p, "create"):
- getTok(p)
- optKeyw(p, "cached")
- optKeyw(p, "memory")
- optKeyw(p, "temp")
- optKeyw(p, "global")
- optKeyw(p, "local")
- optKeyw(p, "temporary")
- optKeyw(p, "unique")
- optKeyw(p, "hash")
- if isKeyw(p, "table"):
- parent.add parseTableDef(p)
- elif isKeyw(p, "type"):
- parent.add parseTypeDef(p)
- elif isKeyw(p, "index"):
- parent.add parseIndexDef(p)
- else:
- sqlError(p, "TABLE expected")
- elif isKeyw(p, "insert"):
- parent.add parseInsert(p)
- elif isKeyw(p, "update"):
- parent.add parseUpdate(p)
- elif isKeyw(p, "delete"):
- parent.add parseDelete(p)
- elif isKeyw(p, "select"):
- parent.add parseSelect(p)
- elif isKeyw(p, "begin"):
- getTok(p)
- else:
- sqlError(p, "CREATE expected")
- proc open(p: var SqlParser, input: Stream, filename: string) =
- ## opens the parser `p` and assigns the input stream `input` to it.
- ## `filename` is only used for error messages.
- open(SqlLexer(p), input, filename)
- p.tok.kind = tkInvalid
- p.tok.literal = ""
- getTok(p)
- proc parse(p: var SqlParser): SqlNode =
- ## parses the content of `p`'s input stream and returns the SQL AST.
- ## Syntax errors raise an `EInvalidSql` exception.
- result = newNode(nkStmtList)
- while p.tok.kind != tkEof:
- parseStmt(p, result)
- eat(p, tkSemicolon)
- if result.len == 1:
- result = result.sons[0]
- proc close(p: var SqlParser) =
- ## closes the parser `p`. The associated input stream is closed too.
- close(SqlLexer(p))
- proc parseSQL*(input: Stream, filename: string): SqlNode =
- ## parses the SQL from `input` into an AST and returns the AST.
- ## `filename` is only used for error messages.
- ## Syntax errors raise an `EInvalidSql` exception.
- var p: SqlParser
- open(p, input, filename)
- try:
- result = parse(p)
- finally:
- close(p)
- proc ra(n: SqlNode, s: var string, indent: int)
- proc rs(n: SqlNode, s: var string, indent: int,
- prefix = "(", suffix = ")",
- sep = ", ") =
- if n.len > 0:
- s.add(prefix)
- for i in 0 .. n.len-1:
- if i > 0: s.add(sep)
- ra(n.sons[i], s, indent)
- s.add(suffix)
- proc ra(n: SqlNode, s: var string, indent: int) =
- if n == nil: return
- case n.kind
- of nkNone: discard
- of nkIdent:
- if allCharsInSet(n.strVal, {'\33'..'\127'}):
- s.add(n.strVal)
- else:
- s.add("\"" & replace(n.strVal, "\"", "\"\"") & "\"")
- of nkStringLit:
- s.add(escape(n.strVal, "e'", "'"))
- of nkBitStringLit:
- s.add("b'" & n.strVal & "'")
- of nkHexStringLit:
- s.add("x'" & n.strVal & "'")
- of nkIntegerLit, nkNumericLit:
- s.add(n.strVal)
- of nkPrimaryKey:
- s.add(" primary key")
- rs(n, s, indent)
- of nkForeignKey:
- s.add(" foreign key")
- rs(n, s, indent)
- of nkNotNull:
- s.add(" not null")
- of nkNull:
- s.add(" null")
- of nkDot:
- ra(n.sons[0], s, indent)
- s.add(".")
- ra(n.sons[1], s, indent)
- of nkDotDot:
- ra(n.sons[0], s, indent)
- s.add(". .")
- ra(n.sons[1], s, indent)
- of nkPrefix:
- s.add('(')
- ra(n.sons[0], s, indent)
- s.add(' ')
- ra(n.sons[1], s, indent)
- s.add(')')
- of nkInfix:
- s.add('(')
- ra(n.sons[1], s, indent)
- s.add(' ')
- ra(n.sons[0], s, indent)
- s.add(' ')
- ra(n.sons[2], s, indent)
- s.add(')')
- of nkCall, nkColumnReference:
- ra(n.sons[0], s, indent)
- s.add('(')
- for i in 1..n.len-1:
- if i > 1: s.add(", ")
- ra(n.sons[i], s, indent)
- s.add(')')
- of nkReferences:
- s.add(" references ")
- ra(n.sons[0], s, indent)
- of nkDefault:
- s.add(" default ")
- ra(n.sons[0], s, indent)
- of nkCheck:
- s.add(" check ")
- ra(n.sons[0], s, indent)
- of nkConstraint:
- s.add(" constraint ")
- ra(n.sons[0], s, indent)
- s.add(" check ")
- ra(n.sons[1], s, indent)
- of nkUnique:
- s.add(" unique")
- rs(n, s, indent)
- of nkIdentity:
- s.add(" identity")
- of nkColumnDef:
- s.add("\n ")
- rs(n, s, indent, "", "", " ")
- of nkStmtList:
- for i in 0..n.len-1:
- ra(n.sons[i], s, indent)
- s.add("\n")
- of nkInsert:
- assert n.len == 3
- s.add("insert into ")
- ra(n.sons[0], s, indent)
- ra(n.sons[1], s, indent)
- if n.sons[2].kind == nkDefault:
- s.add("default values")
- else:
- s.add("\nvalues ")
- ra(n.sons[2], s, indent)
- s.add(';')
- of nkUpdate:
- s.add("update ")
- ra(n.sons[0], s, indent)
- s.add(" set ")
- var L = n.len
- for i in 1 .. L-2:
- if i > 1: s.add(", ")
- var it = n.sons[i]
- assert it.kind == nkAsgn
- ra(it, s, indent)
- ra(n.sons[L-1], s, indent)
- s.add(';')
- of nkDelete:
- s.add("delete from ")
- ra(n.sons[0], s, indent)
- ra(n.sons[1], s, indent)
- s.add(';')
- of nkSelect, nkSelectDistinct:
- s.add("select ")
- if n.kind == nkSelectDistinct:
- s.add("distinct ")
- rs(n.sons[0], s, indent, "", "", ", ")
- for i in 1 .. n.len-1: ra(n.sons[i], s, indent)
- s.add(';')
- of nkSelectColumns:
- assert(false)
- of nkAsgn:
- ra(n.sons[0], s, indent)
- s.add(" = ")
- ra(n.sons[1], s, indent)
- of nkFrom:
- s.add("\nfrom ")
- rs(n, s, indent, "", "", ", ")
- of nkGroup:
- s.add("\ngroup by")
- rs(n, s, indent, "", "", ", ")
- of nkHaving:
- s.add("\nhaving")
- rs(n, s, indent, "", "", ", ")
- of nkOrder:
- s.add("\norder by ")
- rs(n, s, indent, "", "", ", ")
- of nkDesc:
- ra(n.sons[0], s, indent)
- s.add(" desc")
- of nkUnion:
- s.add(" union")
- of nkIntersect:
- s.add(" intersect")
- of nkExcept:
- s.add(" except")
- of nkColumnList:
- rs(n, s, indent)
- of nkValueList:
- s.add("values ")
- rs(n, s, indent)
- of nkWhere:
- s.add("\nwhere ")
- ra(n.sons[0], s, indent)
- of nkCreateTable, nkCreateTableIfNotExists:
- s.add("create table ")
- if n.kind == nkCreateTableIfNotExists:
- s.add("if not exists ")
- ra(n.sons[0], s, indent)
- s.add('(')
- for i in 1..n.len-1:
- if i > 1: s.add(", ")
- ra(n.sons[i], s, indent)
- s.add(");")
- of nkCreateType, nkCreateTypeIfNotExists:
- s.add("create type ")
- if n.kind == nkCreateTypeIfNotExists:
- s.add("if not exists ")
- ra(n.sons[0], s, indent)
- s.add(" as ")
- ra(n.sons[1], s, indent)
- s.add(';')
- of nkCreateIndex, nkCreateIndexIfNotExists:
- s.add("create index ")
- if n.kind == nkCreateIndexIfNotExists:
- s.add("if not exists ")
- ra(n.sons[0], s, indent)
- s.add(" on ")
- ra(n.sons[1], s, indent)
- s.add('(')
- for i in 2..n.len-1:
- if i > 2: s.add(", ")
- ra(n.sons[i], s, indent)
- s.add(");")
- of nkEnumDef:
- s.add("enum ")
- rs(n, s, indent)
- # What I want:
- #
- #select(columns = [T1.all, T2.name],
- # fromm = [T1, T2],
- # where = T1.name ==. T2.name,
- # orderby = [name]):
- #
- #for row in dbQuery(db, """select x, y, z
- # from a, b
- # where a.name = b.name"""):
- #
- #select x, y, z:
- # fromm: Table1, Table2
- # where: x.name == y.name
- #db.select(fromm = [t1, t2], where = t1.name == t2.name):
- #for x, y, z in db.select(fromm = a, b where = a.name == b.name):
- # writeLine x, y, z
- proc renderSQL*(n: SqlNode): string =
- ## Converts an SQL abstract syntax tree to its string representation.
- result = ""
- ra(n, result, 0)
- proc `$`*(n: SqlNode): string =
- ## an alias for `renderSQL`.
- renderSQL(n)
- when not defined(testing) and isMainModule:
- echo(renderSQL(parseSQL(newStringStream("""
- CREATE TYPE happiness AS ENUM ('happy', 'very happy', 'ecstatic');
- CREATE TABLE holidays (
- num_weeks int,
- happiness happiness
- );
- CREATE INDEX table1_attr1 ON table1(attr1);
- SELECT * FROM myTab WHERE col1 = 'happy';
- """), "stdin")))
- # CREATE TYPE happiness AS ENUM ('happy', 'very happy', 'ecstatic');
- # CREATE TABLE holidays (
- # num_weeks int,
- # happiness happiness
- # );
- # CREATE INDEX table1_attr1 ON table1(attr1)
|