123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536 |
- #
- #
- # Nim's Runtime Library
- # (c) Copyright 2018 Nim contributors
- #
- # See the file "copying.txt", included in this
- # distribution, for details about the copyright.
- #
- ## This module implements a json parser. It is used
- ## and exported by the ``json`` standard library
- ## module, but can also be used in its own right.
- import
- strutils, lexbase, streams, unicode
- type
- JsonEventKind* = enum ## enumeration of all events that may occur when parsing
- jsonError, ## an error occurred during parsing
- jsonEof, ## end of file reached
- jsonString, ## a string literal
- jsonInt, ## an integer literal
- jsonFloat, ## a float literal
- jsonTrue, ## the value ``true``
- jsonFalse, ## the value ``false``
- jsonNull, ## the value ``null``
- jsonObjectStart, ## start of an object: the ``{`` token
- jsonObjectEnd, ## end of an object: the ``}`` token
- jsonArrayStart, ## start of an array: the ``[`` token
- jsonArrayEnd ## start of an array: the ``]`` token
- TokKind* = enum # must be synchronized with TJsonEventKind!
- tkError,
- tkEof,
- tkString,
- tkInt,
- tkFloat,
- tkTrue,
- tkFalse,
- tkNull,
- tkCurlyLe,
- tkCurlyRi,
- tkBracketLe,
- tkBracketRi,
- tkColon,
- tkComma
- JsonError* = enum ## enumeration that lists all errors that can occur
- errNone, ## no error
- errInvalidToken, ## invalid token
- errStringExpected, ## string expected
- errColonExpected, ## ``:`` expected
- errCommaExpected, ## ``,`` expected
- errBracketRiExpected, ## ``]`` expected
- errCurlyRiExpected, ## ``}`` expected
- errQuoteExpected, ## ``"`` or ``'`` expected
- errEOC_Expected, ## ``*/`` expected
- errEofExpected, ## EOF expected
- errExprExpected ## expr expected
- ParserState = enum
- stateEof, stateStart, stateObject, stateArray, stateExpectArrayComma,
- stateExpectObjectComma, stateExpectColon, stateExpectValue
- JsonParser* = object of BaseLexer ## the parser object.
- a*: string
- tok*: TokKind
- kind: JsonEventKind
- err: JsonError
- state: seq[ParserState]
- filename: string
- rawStringLiterals: bool
- JsonKindError* = object of ValueError ## raised by the ``to`` macro if the
- ## JSON kind is incorrect.
- JsonParsingError* = object of ValueError ## is raised for a JSON error
- const
- errorMessages*: array[JsonError, string] = [
- "no error",
- "invalid token",
- "string expected",
- "':' expected",
- "',' expected",
- "']' expected",
- "'}' expected",
- "'\"' or \"'\" expected",
- "'*/' expected",
- "EOF expected",
- "expression expected"
- ]
- tokToStr: array[TokKind, string] = [
- "invalid token",
- "EOF",
- "string literal",
- "int literal",
- "float literal",
- "true",
- "false",
- "null",
- "{", "}", "[", "]", ":", ","
- ]
- proc open*(my: var JsonParser, input: Stream, filename: string;
- rawStringLiterals = false) =
- ## initializes the parser with an input stream. `Filename` is only used
- ## for nice error messages. If `rawStringLiterals` is true, string literals
- ## are kepts with their surrounding quotes and escape sequences in them are
- ## left untouched too.
- lexbase.open(my, input)
- my.filename = filename
- my.state = @[stateStart]
- my.kind = jsonError
- my.a = ""
- my.rawStringLiterals = rawStringLiterals
- proc close*(my: var JsonParser) {.inline.} =
- ## closes the parser `my` and its associated input stream.
- lexbase.close(my)
- proc str*(my: JsonParser): string {.inline.} =
- ## returns the character data for the events: ``jsonInt``, ``jsonFloat``,
- ## ``jsonString``
- assert(my.kind in {jsonInt, jsonFloat, jsonString})
- return my.a
- proc getInt*(my: JsonParser): BiggestInt {.inline.} =
- ## returns the number for the event: ``jsonInt``
- assert(my.kind == jsonInt)
- return parseBiggestInt(my.a)
- proc getFloat*(my: JsonParser): float {.inline.} =
- ## returns the number for the event: ``jsonFloat``
- assert(my.kind == jsonFloat)
- return parseFloat(my.a)
- proc kind*(my: JsonParser): JsonEventKind {.inline.} =
- ## returns the current event type for the JSON parser
- return my.kind
- proc getColumn*(my: JsonParser): int {.inline.} =
- ## get the current column the parser has arrived at.
- result = getColNumber(my, my.bufpos)
- proc getLine*(my: JsonParser): int {.inline.} =
- ## get the current line the parser has arrived at.
- result = my.lineNumber
- proc getFilename*(my: JsonParser): string {.inline.} =
- ## get the filename of the file that the parser processes.
- result = my.filename
- proc errorMsg*(my: JsonParser): string =
- ## returns a helpful error message for the event ``jsonError``
- assert(my.kind == jsonError)
- result = "$1($2, $3) Error: $4" % [
- my.filename, $getLine(my), $getColumn(my), errorMessages[my.err]]
- proc errorMsgExpected*(my: JsonParser, e: string): string =
- ## returns an error message "`e` expected" in the same format as the
- ## other error messages
- result = "$1($2, $3) Error: $4" % [
- my.filename, $getLine(my), $getColumn(my), e & " expected"]
- proc handleHexChar(c: char, x: var int): bool =
- result = true # Success
- case c
- of '0'..'9': x = (x shl 4) or (ord(c) - ord('0'))
- of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10)
- of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10)
- else: result = false # error
- proc parseEscapedUTF16*(buf: cstring, pos: var int): int =
- result = 0
- #UTF-16 escape is always 4 bytes.
- for _ in 0..3:
- if handleHexChar(buf[pos], result):
- inc(pos)
- else:
- return -1
- proc parseString(my: var JsonParser): TokKind =
- result = tkString
- var pos = my.bufpos + 1
- var buf = my.buf
- if my.rawStringLiterals:
- add(my.a, '"')
- while true:
- case buf[pos]
- of '\0':
- my.err = errQuoteExpected
- result = tkError
- break
- of '"':
- if my.rawStringLiterals:
- add(my.a, '"')
- inc(pos)
- break
- of '\\':
- if my.rawStringLiterals:
- add(my.a, '\\')
- case buf[pos+1]
- of '\\', '"', '\'', '/':
- add(my.a, buf[pos+1])
- inc(pos, 2)
- of 'b':
- add(my.a, '\b')
- inc(pos, 2)
- of 'f':
- add(my.a, '\f')
- inc(pos, 2)
- of 'n':
- add(my.a, '\L')
- inc(pos, 2)
- of 'r':
- add(my.a, '\C')
- inc(pos, 2)
- of 't':
- add(my.a, '\t')
- inc(pos, 2)
- of 'u':
- if my.rawStringLiterals:
- add(my.a, 'u')
- inc(pos, 2)
- var pos2 = pos
- var r = parseEscapedUTF16(buf, pos)
- if r < 0:
- my.err = errInvalidToken
- break
- # Deal with surrogates
- if (r and 0xfc00) == 0xd800:
- if buf[pos] != '\\' or buf[pos+1] != 'u':
- my.err = errInvalidToken
- break
- inc(pos, 2)
- var s = parseEscapedUTF16(buf, pos)
- if (s and 0xfc00) == 0xdc00 and s > 0:
- r = 0x10000 + (((r - 0xd800) shl 10) or (s - 0xdc00))
- else:
- my.err = errInvalidToken
- break
- if my.rawStringLiterals:
- let length = pos - pos2
- for i in 1 .. length:
- if buf[pos2] in {'0'..'9', 'A'..'F', 'a'..'f'}:
- add(my.a, buf[pos2])
- inc pos2
- else:
- break
- else:
- add(my.a, toUTF8(Rune(r)))
- else:
- # don't bother with the error
- add(my.a, buf[pos])
- inc(pos)
- of '\c':
- pos = lexbase.handleCR(my, pos)
- buf = my.buf
- add(my.a, '\c')
- of '\L':
- pos = lexbase.handleLF(my, pos)
- buf = my.buf
- add(my.a, '\L')
- else:
- add(my.a, buf[pos])
- inc(pos)
- my.bufpos = pos # store back
- proc skip(my: var JsonParser) =
- var pos = my.bufpos
- var buf = my.buf
- while true:
- case buf[pos]
- of '/':
- if buf[pos+1] == '/':
- # skip line comment:
- inc(pos, 2)
- while true:
- case buf[pos]
- of '\0':
- break
- of '\c':
- pos = lexbase.handleCR(my, pos)
- buf = my.buf
- break
- of '\L':
- pos = lexbase.handleLF(my, pos)
- buf = my.buf
- break
- else:
- inc(pos)
- elif buf[pos+1] == '*':
- # skip long comment:
- inc(pos, 2)
- while true:
- case buf[pos]
- of '\0':
- my.err = errEOC_Expected
- break
- of '\c':
- pos = lexbase.handleCR(my, pos)
- buf = my.buf
- of '\L':
- pos = lexbase.handleLF(my, pos)
- buf = my.buf
- of '*':
- inc(pos)
- if buf[pos] == '/':
- inc(pos)
- break
- else:
- inc(pos)
- else:
- break
- of ' ', '\t':
- inc(pos)
- of '\c':
- pos = lexbase.handleCR(my, pos)
- buf = my.buf
- of '\L':
- pos = lexbase.handleLF(my, pos)
- buf = my.buf
- else:
- break
- my.bufpos = pos
- proc parseNumber(my: var JsonParser) =
- var pos = my.bufpos
- var buf = my.buf
- if buf[pos] == '-':
- add(my.a, '-')
- inc(pos)
- if buf[pos] == '.':
- add(my.a, "0.")
- inc(pos)
- else:
- while buf[pos] in Digits:
- add(my.a, buf[pos])
- inc(pos)
- if buf[pos] == '.':
- add(my.a, '.')
- inc(pos)
- # digits after the dot:
- while buf[pos] in Digits:
- add(my.a, buf[pos])
- inc(pos)
- if buf[pos] in {'E', 'e'}:
- add(my.a, buf[pos])
- inc(pos)
- if buf[pos] in {'+', '-'}:
- add(my.a, buf[pos])
- inc(pos)
- while buf[pos] in Digits:
- add(my.a, buf[pos])
- inc(pos)
- my.bufpos = pos
- proc parseName(my: var JsonParser) =
- var pos = my.bufpos
- var buf = my.buf
- if buf[pos] in IdentStartChars:
- while buf[pos] in IdentChars:
- add(my.a, buf[pos])
- inc(pos)
- my.bufpos = pos
- proc getTok*(my: var JsonParser): TokKind =
- setLen(my.a, 0)
- skip(my) # skip whitespace, comments
- case my.buf[my.bufpos]
- of '-', '.', '0'..'9':
- parseNumber(my)
- if {'.', 'e', 'E'} in my.a:
- result = tkFloat
- else:
- result = tkInt
- of '"':
- result = parseString(my)
- of '[':
- inc(my.bufpos)
- result = tkBracketLe
- of '{':
- inc(my.bufpos)
- result = tkCurlyLe
- of ']':
- inc(my.bufpos)
- result = tkBracketRi
- of '}':
- inc(my.bufpos)
- result = tkCurlyRi
- of ',':
- inc(my.bufpos)
- result = tkComma
- of ':':
- inc(my.bufpos)
- result = tkColon
- of '\0':
- result = tkEof
- of 'a'..'z', 'A'..'Z', '_':
- parseName(my)
- case my.a
- of "null": result = tkNull
- of "true": result = tkTrue
- of "false": result = tkFalse
- else: result = tkError
- else:
- inc(my.bufpos)
- result = tkError
- my.tok = result
- proc next*(my: var JsonParser) =
- ## retrieves the first/next event. This controls the parser.
- var tk = getTok(my)
- var i = my.state.len-1
- # the following code is a state machine. If we had proper coroutines,
- # the code could be much simpler.
- case my.state[i]
- of stateEof:
- if tk == tkEof:
- my.kind = jsonEof
- else:
- my.kind = jsonError
- my.err = errEofExpected
- of stateStart:
- # tokens allowed?
- case tk
- of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull:
- my.state[i] = stateEof # expect EOF next!
- my.kind = JsonEventKind(ord(tk))
- of tkBracketLe:
- my.state.add(stateArray) # we expect any
- my.kind = jsonArrayStart
- of tkCurlyLe:
- my.state.add(stateObject)
- my.kind = jsonObjectStart
- of tkEof:
- my.kind = jsonEof
- else:
- my.kind = jsonError
- my.err = errEofExpected
- of stateObject:
- case tk
- of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull:
- my.state.add(stateExpectColon)
- my.kind = JsonEventKind(ord(tk))
- of tkBracketLe:
- my.state.add(stateExpectColon)
- my.state.add(stateArray)
- my.kind = jsonArrayStart
- of tkCurlyLe:
- my.state.add(stateExpectColon)
- my.state.add(stateObject)
- my.kind = jsonObjectStart
- of tkCurlyRi:
- my.kind = jsonObjectEnd
- discard my.state.pop()
- else:
- my.kind = jsonError
- my.err = errCurlyRiExpected
- of stateArray:
- case tk
- of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull:
- my.state.add(stateExpectArrayComma) # expect value next!
- my.kind = JsonEventKind(ord(tk))
- of tkBracketLe:
- my.state.add(stateExpectArrayComma)
- my.state.add(stateArray)
- my.kind = jsonArrayStart
- of tkCurlyLe:
- my.state.add(stateExpectArrayComma)
- my.state.add(stateObject)
- my.kind = jsonObjectStart
- of tkBracketRi:
- my.kind = jsonArrayEnd
- discard my.state.pop()
- else:
- my.kind = jsonError
- my.err = errBracketRiExpected
- of stateExpectArrayComma:
- case tk
- of tkComma:
- discard my.state.pop()
- next(my)
- of tkBracketRi:
- my.kind = jsonArrayEnd
- discard my.state.pop() # pop stateExpectArrayComma
- discard my.state.pop() # pop stateArray
- else:
- my.kind = jsonError
- my.err = errBracketRiExpected
- of stateExpectObjectComma:
- case tk
- of tkComma:
- discard my.state.pop()
- next(my)
- of tkCurlyRi:
- my.kind = jsonObjectEnd
- discard my.state.pop() # pop stateExpectObjectComma
- discard my.state.pop() # pop stateObject
- else:
- my.kind = jsonError
- my.err = errCurlyRiExpected
- of stateExpectColon:
- case tk
- of tkColon:
- my.state[i] = stateExpectValue
- next(my)
- else:
- my.kind = jsonError
- my.err = errColonExpected
- of stateExpectValue:
- case tk
- of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull:
- my.state[i] = stateExpectObjectComma
- my.kind = JsonEventKind(ord(tk))
- of tkBracketLe:
- my.state[i] = stateExpectObjectComma
- my.state.add(stateArray)
- my.kind = jsonArrayStart
- of tkCurlyLe:
- my.state[i] = stateExpectObjectComma
- my.state.add(stateObject)
- my.kind = jsonObjectStart
- else:
- my.kind = jsonError
- my.err = errExprExpected
- proc raiseParseErr*(p: JsonParser, msg: string) {.noinline, noreturn.} =
- ## raises an `EJsonParsingError` exception.
- raise newException(JsonParsingError, errorMsgExpected(p, msg))
- proc eat*(p: var JsonParser, tok: TokKind) =
- if p.tok == tok: discard getTok(p)
- else: raiseParseErr(p, tokToStr[tok])
|