123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698 |
- #
- #
- # Nim's Runtime Library
- # (c) Copyright 2015 Andreas Rumpf, Dominik Picheta
- #
- # See the file "copying.txt", included in this
- # distribution, for details about the copyright.
- #
- import
- hashes, strutils, lexbase, streams, unicode, macros
- type
- SexpEventKind* = enum ## enumeration of all events that may occur when parsing
- sexpError, ## an error occurred during parsing
- sexpEof, ## end of file reached
- sexpString, ## a string literal
- sexpSymbol, ## a symbol
- sexpInt, ## an integer literal
- sexpFloat, ## a float literal
- sexpNil, ## the value ``nil``
- sexpDot, ## the dot to separate car/cdr
- sexpListStart, ## start of a list: the ``(`` token
- sexpListEnd, ## end of a list: the ``)`` token
- TTokKind = enum # must be synchronized with SexpEventKind!
- tkError,
- tkEof,
- tkString,
- tkSymbol,
- tkInt,
- tkFloat,
- tkNil,
- tkDot,
- tkParensLe,
- tkParensRi
- tkSpace
- SexpError* = enum ## enumeration that lists all errors that can occur
- errNone, ## no error
- errInvalidToken, ## invalid token
- errParensRiExpected, ## ``)`` expected
- errQuoteExpected, ## ``"`` expected
- errEofExpected, ## EOF expected
- SexpParser* = object of BaseLexer ## the parser object.
- a: string
- tok: TTokKind
- kind: SexpEventKind
- err: SexpError
- const
- errorMessages: array[SexpError, string] = [
- "no error",
- "invalid token",
- "')' expected",
- "'\"' or \"'\" expected",
- "EOF expected",
- ]
- tokToStr: array[TTokKind, string] = [
- "invalid token",
- "EOF",
- "string literal",
- "symbol",
- "int literal",
- "float literal",
- "nil",
- ".",
- "(", ")", "space"
- ]
- proc close*(my: var SexpParser) {.inline.} =
- ## closes the parser `my` and its associated input stream.
- lexbase.close(my)
- proc str*(my: SexpParser): string {.inline.} =
- ## returns the character data for the events: ``sexpInt``, ``sexpFloat``,
- ## ``sexpString``
- assert(my.kind in {sexpInt, sexpFloat, sexpString})
- result = my.a
- proc getInt*(my: SexpParser): BiggestInt {.inline.} =
- ## returns the number for the event: ``sexpInt``
- assert(my.kind == sexpInt)
- result = parseBiggestInt(my.a)
- proc getFloat*(my: SexpParser): float {.inline.} =
- ## returns the number for the event: ``sexpFloat``
- assert(my.kind == sexpFloat)
- result = parseFloat(my.a)
- proc kind*(my: SexpParser): SexpEventKind {.inline.} =
- ## returns the current event type for the SEXP parser
- result = my.kind
- proc getColumn*(my: SexpParser): int {.inline.} =
- ## get the current column the parser has arrived at.
- result = getColNumber(my, my.bufpos)
- proc getLine*(my: SexpParser): int {.inline.} =
- ## get the current line the parser has arrived at.
- result = my.lineNumber
- proc errorMsg*(my: SexpParser): string =
- ## returns a helpful error message for the event ``sexpError``
- assert(my.kind == sexpError)
- result = "($1, $2) Error: $3" % [$getLine(my), $getColumn(my), errorMessages[my.err]]
- proc errorMsgExpected*(my: SexpParser, e: string): string =
- ## returns an error message "`e` expected" in the same format as the
- ## other error messages
- result = "($1, $2) Error: $3" % [$getLine(my), $getColumn(my), e & " expected"]
- proc handleHexChar(c: char, x: var int): bool =
- result = true # Success
- case c
- of '0'..'9': x = (x shl 4) or (ord(c) - ord('0'))
- of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10)
- of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10)
- else: result = false # error
- proc parseString(my: var SexpParser): TTokKind =
- result = tkString
- var pos = my.bufpos + 1
- var buf = my.buf
- while true:
- case buf[pos]
- of '\0':
- my.err = errQuoteExpected
- result = tkError
- break
- of '"':
- inc(pos)
- break
- of '\\':
- case buf[pos+1]
- of '\\', '"', '\'', '/':
- add(my.a, buf[pos+1])
- inc(pos, 2)
- of 'b':
- add(my.a, '\b')
- inc(pos, 2)
- of 'f':
- add(my.a, '\f')
- inc(pos, 2)
- of 'n':
- add(my.a, '\L')
- inc(pos, 2)
- of 'r':
- add(my.a, '\C')
- inc(pos, 2)
- of 't':
- add(my.a, '\t')
- inc(pos, 2)
- of 'u':
- inc(pos, 2)
- var r: int
- if handleHexChar(buf[pos], r): inc(pos)
- if handleHexChar(buf[pos], r): inc(pos)
- if handleHexChar(buf[pos], r): inc(pos)
- if handleHexChar(buf[pos], r): inc(pos)
- add(my.a, toUTF8(Rune(r)))
- else:
- # don't bother with the error
- add(my.a, buf[pos])
- inc(pos)
- of '\c':
- pos = lexbase.handleCR(my, pos)
- buf = my.buf
- add(my.a, '\c')
- of '\L':
- pos = lexbase.handleLF(my, pos)
- buf = my.buf
- add(my.a, '\L')
- else:
- add(my.a, buf[pos])
- inc(pos)
- my.bufpos = pos # store back
- proc parseNumber(my: var SexpParser) =
- var pos = my.bufpos
- var buf = my.buf
- if buf[pos] == '-':
- add(my.a, '-')
- inc(pos)
- if buf[pos] == '.':
- add(my.a, "0.")
- inc(pos)
- else:
- while buf[pos] in Digits:
- add(my.a, buf[pos])
- inc(pos)
- if buf[pos] == '.':
- add(my.a, '.')
- inc(pos)
- # digits after the dot:
- while buf[pos] in Digits:
- add(my.a, buf[pos])
- inc(pos)
- if buf[pos] in {'E', 'e'}:
- add(my.a, buf[pos])
- inc(pos)
- if buf[pos] in {'+', '-'}:
- add(my.a, buf[pos])
- inc(pos)
- while buf[pos] in Digits:
- add(my.a, buf[pos])
- inc(pos)
- my.bufpos = pos
- proc parseSymbol(my: var SexpParser) =
- var pos = my.bufpos
- var buf = my.buf
- if buf[pos] in IdentStartChars:
- while buf[pos] in IdentChars:
- add(my.a, buf[pos])
- inc(pos)
- my.bufpos = pos
- proc getTok(my: var SexpParser): TTokKind =
- setLen(my.a, 0)
- case my.buf[my.bufpos]
- of '-', '0'..'9': # numbers that start with a . are not parsed
- # correctly.
- parseNumber(my)
- if {'.', 'e', 'E'} in my.a:
- result = tkFloat
- else:
- result = tkInt
- of '"': #" # gotta fix nim-mode
- result = parseString(my)
- of '(':
- inc(my.bufpos)
- result = tkParensLe
- of ')':
- inc(my.bufpos)
- result = tkParensRi
- of '\0':
- result = tkEof
- of 'a'..'z', 'A'..'Z', '_':
- parseSymbol(my)
- if my.a == "nil":
- result = tkNil
- else:
- result = tkSymbol
- of ' ':
- result = tkSpace
- inc(my.bufpos)
- of '.':
- result = tkDot
- inc(my.bufpos)
- else:
- inc(my.bufpos)
- result = tkError
- my.tok = result
- # ------------- higher level interface ---------------------------------------
- type
- SexpNodeKind* = enum ## possible SEXP node types
- SNil,
- SInt,
- SFloat,
- SString,
- SSymbol,
- SList,
- SCons
- SexpNode* = ref SexpNodeObj ## SEXP node
- SexpNodeObj* {.acyclic.} = object
- case kind*: SexpNodeKind
- of SString:
- str*: string
- of SSymbol:
- symbol*: string
- of SInt:
- num*: BiggestInt
- of SFloat:
- fnum*: float
- of SList:
- elems*: seq[SexpNode]
- of SCons:
- car: SexpNode
- cdr: SexpNode
- of SNil:
- discard
- Cons = tuple[car: SexpNode, cdr: SexpNode]
- SexpParsingError* = object of ValueError ## is raised for a SEXP error
- proc raiseParseErr*(p: SexpParser, msg: string) {.noinline, noreturn.} =
- ## raises an `ESexpParsingError` exception.
- raise newException(SexpParsingError, errorMsgExpected(p, msg))
- proc newSString*(s: string): SexpNode {.procvar.}=
- ## Creates a new `SString SexpNode`.
- new(result)
- result.kind = SString
- result.str = s
- proc newSStringMove(s: string): SexpNode =
- new(result)
- result.kind = SString
- shallowCopy(result.str, s)
- proc newSInt*(n: BiggestInt): SexpNode {.procvar.} =
- ## Creates a new `SInt SexpNode`.
- new(result)
- result.kind = SInt
- result.num = n
- proc newSFloat*(n: float): SexpNode {.procvar.} =
- ## Creates a new `SFloat SexpNode`.
- new(result)
- result.kind = SFloat
- result.fnum = n
- proc newSNil*(): SexpNode {.procvar.} =
- ## Creates a new `SNil SexpNode`.
- new(result)
- proc newSCons*(car, cdr: SexpNode): SexpNode {.procvar.} =
- ## Creates a new `SCons SexpNode`
- new(result)
- result.kind = SCons
- result.car = car
- result.cdr = cdr
- proc newSList*(): SexpNode {.procvar.} =
- ## Creates a new `SList SexpNode`
- new(result)
- result.kind = SList
- result.elems = @[]
- proc newSSymbol*(s: string): SexpNode {.procvar.} =
- new(result)
- result.kind = SSymbol
- result.symbol = s
- proc newSSymbolMove(s: string): SexpNode =
- new(result)
- result.kind = SSymbol
- shallowCopy(result.symbol, s)
- proc getStr*(n: SexpNode, default: string = ""): string =
- ## Retrieves the string value of a `SString SexpNode`.
- ##
- ## Returns ``default`` if ``n`` is not a ``SString``.
- if n.kind != SString: return default
- else: return n.str
- proc getNum*(n: SexpNode, default: BiggestInt = 0): BiggestInt =
- ## Retrieves the int value of a `SInt SexpNode`.
- ##
- ## Returns ``default`` if ``n`` is not a ``SInt``.
- if n.kind != SInt: return default
- else: return n.num
- proc getFNum*(n: SexpNode, default: float = 0.0): float =
- ## Retrieves the float value of a `SFloat SexpNode`.
- ##
- ## Returns ``default`` if ``n`` is not a ``SFloat``.
- if n.kind != SFloat: return default
- else: return n.fnum
- proc getSymbol*(n: SexpNode, default: string = ""): string =
- ## Retrieves the int value of a `SList SexpNode`.
- ##
- ## Returns ``default`` if ``n`` is not a ``SList``.
- if n.kind != SSymbol: return default
- else: return n.symbol
- proc getElems*(n: SexpNode, default: seq[SexpNode] = @[]): seq[SexpNode] =
- ## Retrieves the int value of a `SList SexpNode`.
- ##
- ## Returns ``default`` if ``n`` is not a ``SList``.
- if n.kind == SNil: return @[]
- elif n.kind != SList: return default
- else: return n.elems
- proc getCons*(n: SexpNode, defaults: Cons = (newSNil(), newSNil())): Cons =
- ## Retrieves the cons value of a `SList SexpNode`.
- ##
- ## Returns ``default`` if ``n`` is not a ``SList``.
- if n.kind == SCons: return (n.car, n.cdr)
- elif n.kind == SList: return (n.elems[0], n.elems[1])
- else: return defaults
- proc sexp*(s: string): SexpNode =
- ## Generic constructor for SEXP data. Creates a new `SString SexpNode`.
- new(result)
- result.kind = SString
- result.str = s
- proc sexp*(n: BiggestInt): SexpNode =
- ## Generic constructor for SEXP data. Creates a new `SInt SexpNode`.
- new(result)
- result.kind = SInt
- result.num = n
- proc sexp*(n: float): SexpNode =
- ## Generic constructor for SEXP data. Creates a new `SFloat SexpNode`.
- new(result)
- result.kind = SFloat
- result.fnum = n
- proc sexp*(b: bool): SexpNode =
- ## Generic constructor for SEXP data. Creates a new `SSymbol
- ## SexpNode` with value t or `SNil SexpNode`.
- new(result)
- if b:
- result.kind = SSymbol
- result.symbol = "t"
- else:
- result.kind = SNil
- proc sexp*(elements: openArray[SexpNode]): SexpNode =
- ## Generic constructor for SEXP data. Creates a new `SList SexpNode`
- new(result)
- result.kind = SList
- newSeq(result.elems, elements.len)
- for i, p in pairs(elements): result.elems[i] = p
- proc sexp*(s: SexpNode): SexpNode =
- result = s
- proc toSexp(x: NimNode): NimNode {.compiletime.} =
- case x.kind
- of nnkBracket:
- result = newNimNode(nnkBracket)
- for i in 0 .. <x.len:
- result.add(toSexp(x[i]))
- else:
- result = x
- result = prefix(result, "sexp")
- macro convertSexp*(x: untyped): untyped =
- ## Convert an expression to a SexpNode directly, without having to specify
- ## `%` for every element.
- result = toSexp(x)
- proc `==`* (a,b: SexpNode): bool =
- ## Check two nodes for equality
- if a.isNil:
- if b.isNil: return true
- return false
- elif b.isNil or a.kind != b.kind:
- return false
- else:
- return case a.kind
- of SString:
- a.str == b.str
- of SInt:
- a.num == b.num
- of SFloat:
- a.fnum == b.fnum
- of SNil:
- true
- of SList:
- a.elems == b.elems
- of SSymbol:
- a.symbol == b.symbol
- of SCons:
- a.car == b.car and a.cdr == b.cdr
- proc hash* (n:SexpNode): Hash =
- ## Compute the hash for a SEXP node
- case n.kind
- of SList:
- result = hash(n.elems)
- of SInt:
- result = hash(n.num)
- of SFloat:
- result = hash(n.fnum)
- of SString:
- result = hash(n.str)
- of SNil:
- result = hash(0)
- of SSymbol:
- result = hash(n.symbol)
- of SCons:
- result = hash(n.car) !& hash(n.cdr)
- proc len*(n: SexpNode): int =
- ## If `n` is a `SList`, it returns the number of elements.
- ## If `n` is a `JObject`, it returns the number of pairs.
- ## Else it returns 0.
- case n.kind
- of SList: result = n.elems.len
- else: discard
- proc `[]`*(node: SexpNode, index: int): SexpNode =
- ## Gets the node at `index` in a List. Result is undefined if `index`
- ## is out of bounds
- assert(not isNil(node))
- assert(node.kind == SList)
- return node.elems[index]
- proc add*(father, child: SexpNode) =
- ## Adds `child` to a SList node `father`.
- assert father.kind == SList
- father.elems.add(child)
- # ------------- pretty printing ----------------------------------------------
- proc indent(s: var string, i: int) =
- s.add(spaces(i))
- proc newIndent(curr, indent: int, ml: bool): int =
- if ml: return curr + indent
- else: return indent
- proc nl(s: var string, ml: bool) =
- if ml: s.add("\n")
- proc escapeJson*(s: string): string =
- ## Converts a string `s` to its JSON representation.
- result = newStringOfCap(s.len + s.len shr 3)
- result.add("\"")
- for x in runes(s):
- var r = int(x)
- if r >= 32 and r <= 127:
- var c = chr(r)
- case c
- of '"': result.add("\\\"") #" # gotta fix nim-mode
- of '\\': result.add("\\\\")
- else: result.add(c)
- else:
- result.add("\\u")
- result.add(toHex(r, 4))
- result.add("\"")
- proc copy*(p: SexpNode): SexpNode =
- ## Performs a deep copy of `a`.
- case p.kind
- of SString:
- result = newSString(p.str)
- of SInt:
- result = newSInt(p.num)
- of SFloat:
- result = newSFloat(p.fnum)
- of SNil:
- result = newSNil()
- of SSymbol:
- result = newSSymbol(p.symbol)
- of SList:
- result = newSList()
- for i in items(p.elems):
- result.elems.add(copy(i))
- of SCons:
- result = newSCons(copy(p.car), copy(p.cdr))
- proc toPretty(result: var string, node: SexpNode, indent = 2, ml = true,
- lstArr = false, currIndent = 0) =
- case node.kind
- of SString:
- if lstArr: result.indent(currIndent)
- result.add(escapeJson(node.str))
- of SInt:
- if lstArr: result.indent(currIndent)
- result.add(node.num)
- of SFloat:
- if lstArr: result.indent(currIndent)
- result.add(node.fnum)
- of SNil:
- if lstArr: result.indent(currIndent)
- result.add("nil")
- of SSymbol:
- if lstArr: result.indent(currIndent)
- result.add(node.symbol)
- of SList:
- if lstArr: result.indent(currIndent)
- if len(node.elems) != 0:
- result.add("(")
- result.nl(ml)
- for i in 0..len(node.elems)-1:
- if i > 0:
- result.add(" ")
- result.nl(ml) # New Line
- toPretty(result, node.elems[i], indent, ml,
- true, newIndent(currIndent, indent, ml))
- result.nl(ml)
- result.indent(currIndent)
- result.add(")")
- else: result.add("nil")
- of SCons:
- if lstArr: result.indent(currIndent)
- result.add("(")
- toPretty(result, node.car, indent, ml,
- true, newIndent(currIndent, indent, ml))
- result.add(" . ")
- toPretty(result, node.cdr, indent, ml,
- true, newIndent(currIndent, indent, ml))
- result.add(")")
- proc pretty*(node: SexpNode, indent = 2): string =
- ## Converts `node` to its Sexp Representation, with indentation and
- ## on multiple lines.
- result = ""
- toPretty(result, node, indent)
- proc `$`*(node: SexpNode): string =
- ## Converts `node` to its SEXP Representation on one line.
- result = ""
- toPretty(result, node, 0, false)
- iterator items*(node: SexpNode): SexpNode =
- ## Iterator for the items of `node`. `node` has to be a SList.
- assert node.kind == SList
- for i in items(node.elems):
- yield i
- iterator mitems*(node: var SexpNode): var SexpNode =
- ## Iterator for the items of `node`. `node` has to be a SList. Items can be
- ## modified.
- assert node.kind == SList
- for i in mitems(node.elems):
- yield i
- proc eat(p: var SexpParser, tok: TTokKind) =
- if p.tok == tok: discard getTok(p)
- else: raiseParseErr(p, tokToStr[tok])
- proc parseSexp(p: var SexpParser): SexpNode =
- ## Parses SEXP from a SEXP Parser `p`.
- case p.tok
- of tkString:
- # we capture 'p.a' here, so we need to give it a fresh buffer afterwards:
- result = newSStringMove(p.a)
- p.a = ""
- discard getTok(p)
- of tkInt:
- result = newSInt(parseBiggestInt(p.a))
- discard getTok(p)
- of tkFloat:
- result = newSFloat(parseFloat(p.a))
- discard getTok(p)
- of tkNil:
- result = newSNil()
- discard getTok(p)
- of tkSymbol:
- result = newSSymbolMove(p.a)
- p.a = ""
- discard getTok(p)
- of tkParensLe:
- result = newSList()
- discard getTok(p)
- while p.tok notin {tkParensRi, tkDot}:
- result.add(parseSexp(p))
- if p.tok != tkSpace: break
- discard getTok(p)
- if p.tok == tkDot:
- eat(p, tkDot)
- eat(p, tkSpace)
- result.add(parseSexp(p))
- result = newSCons(result[0], result[1])
- eat(p, tkParensRi)
- of tkSpace, tkDot, tkError, tkParensRi, tkEof:
- raiseParseErr(p, "(")
- proc open*(my: var SexpParser, input: Stream) =
- ## initializes the parser with an input stream.
- lexbase.open(my, input)
- my.kind = sexpError
- my.a = ""
- proc parseSexp*(s: Stream): SexpNode =
- ## Parses from a buffer `s` into a `SexpNode`.
- var p: SexpParser
- p.open(s)
- discard getTok(p) # read first token
- result = p.parseSexp()
- p.close()
- proc parseSexp*(buffer: string): SexpNode =
- ## Parses Sexp from `buffer`.
- result = parseSexp(newStringStream(buffer))
- when isMainModule:
- let testSexp = parseSexp("""(1 (98 2) nil (2) foobar "foo" 9.234)""")
- assert(testSexp[0].getNum == 1)
- assert(testSexp[1][0].getNum == 98)
- assert(testSexp[2].getElems == @[])
- assert(testSexp[4].getSymbol == "foobar")
- assert(testSexp[5].getStr == "foo")
- let alist = parseSexp("""((1 . 2) (2 . "foo"))""")
- assert(alist[0].getCons.car.getNum == 1)
- assert(alist[0].getCons.cdr.getNum == 2)
- assert(alist[1].getCons.cdr.getStr == "foo")
- # Generator:
- var j = convertSexp([true, false, "foobar", [1, 2, "baz"]])
- assert($j == """(t nil "foobar" (1 2 "baz"))""")
|