lexer.nim 36 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145
  1. #
  2. #
  3. # The Nim Compiler
  4. # (c) Copyright 2015 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. # This scanner is handwritten for efficiency. I used an elegant buffering
  10. # scheme which I have not seen anywhere else:
  11. # We guarantee that a whole line is in the buffer. Thus only when scanning
  12. # the \n or \r character we have to check wether we need to read in the next
  13. # chunk. (\n or \r already need special handling for incrementing the line
  14. # counter; choosing both \n and \r allows the scanner to properly read Unix,
  15. # DOS or Macintosh text files, even when it is not the native format.
  16. import
  17. hashes, options, msgs, strutils, platform, idents, nimlexbase, llstream,
  18. wordrecg
  19. const
  20. MaxLineLength* = 80 # lines longer than this lead to a warning
  21. numChars*: set[char] = {'0'..'9', 'a'..'z', 'A'..'Z'}
  22. SymChars*: set[char] = {'a'..'z', 'A'..'Z', '0'..'9', '\x80'..'\xFF'}
  23. SymStartChars*: set[char] = {'a'..'z', 'A'..'Z', '\x80'..'\xFF'}
  24. OpChars*: set[char] = {'+', '-', '*', '/', '\\', '<', '>', '!', '?', '^', '.',
  25. '|', '=', '%', '&', '$', '@', '~', ':', '\x80'..'\xFF'}
  26. # don't forget to update the 'highlite' module if these charsets should change
  27. type
  28. TTokType* = enum
  29. tkInvalid, tkEof, # order is important here!
  30. tkSymbol, # keywords:
  31. tkAddr, tkAnd, tkAs, tkAsm, tkAtomic,
  32. tkBind, tkBlock, tkBreak, tkCase, tkCast,
  33. tkConcept, tkConst, tkContinue, tkConverter,
  34. tkDefer, tkDiscard, tkDistinct, tkDiv, tkDo,
  35. tkElif, tkElse, tkEnd, tkEnum, tkExcept, tkExport,
  36. tkFinally, tkFor, tkFrom, tkFunc,
  37. tkGeneric, tkIf, tkImport, tkIn, tkInclude, tkInterface,
  38. tkIs, tkIsnot, tkIterator,
  39. tkLet,
  40. tkMacro, tkMethod, tkMixin, tkMod, tkNil, tkNot, tkNotin,
  41. tkObject, tkOf, tkOr, tkOut,
  42. tkProc, tkPtr, tkRaise, tkRef, tkReturn,
  43. tkShl, tkShr, tkStatic,
  44. tkTemplate,
  45. tkTry, tkTuple, tkType, tkUsing,
  46. tkVar, tkWhen, tkWhile, tkWith, tkWithout, tkXor,
  47. tkYield, # end of keywords
  48. tkIntLit, tkInt8Lit, tkInt16Lit, tkInt32Lit, tkInt64Lit,
  49. tkUIntLit, tkUInt8Lit, tkUInt16Lit, tkUInt32Lit, tkUInt64Lit,
  50. tkFloatLit, tkFloat32Lit, tkFloat64Lit, tkFloat128Lit,
  51. tkStrLit, tkRStrLit, tkTripleStrLit,
  52. tkGStrLit, tkGTripleStrLit, tkCharLit, tkParLe, tkParRi, tkBracketLe,
  53. tkBracketRi, tkCurlyLe, tkCurlyRi,
  54. tkBracketDotLe, tkBracketDotRi, # [. and .]
  55. tkCurlyDotLe, tkCurlyDotRi, # {. and .}
  56. tkParDotLe, tkParDotRi, # (. and .)
  57. tkComma, tkSemiColon,
  58. tkColon, tkColonColon, tkEquals, tkDot, tkDotDot,
  59. tkOpr, tkComment, tkAccent,
  60. tkSpaces, tkInfixOpr, tkPrefixOpr, tkPostfixOpr
  61. TTokTypes* = set[TTokType]
  62. const
  63. weakTokens = {tkComma, tkSemiColon, tkColon,
  64. tkParRi, tkParDotRi, tkBracketRi, tkBracketDotRi,
  65. tkCurlyRi} # \
  66. # tokens that should not be considered for previousToken
  67. tokKeywordLow* = succ(tkSymbol)
  68. tokKeywordHigh* = pred(tkIntLit)
  69. TokTypeToStr*: array[TTokType, string] = ["tkInvalid", "[EOF]",
  70. "tkSymbol",
  71. "addr", "and", "as", "asm", "atomic",
  72. "bind", "block", "break", "case", "cast",
  73. "concept", "const", "continue", "converter",
  74. "defer", "discard", "distinct", "div", "do",
  75. "elif", "else", "end", "enum", "except", "export",
  76. "finally", "for", "from", "func", "generic", "if",
  77. "import", "in", "include", "interface", "is", "isnot", "iterator",
  78. "let",
  79. "macro", "method", "mixin", "mod",
  80. "nil", "not", "notin", "object", "of", "or",
  81. "out", "proc", "ptr", "raise", "ref", "return",
  82. "shl", "shr", "static",
  83. "template",
  84. "try", "tuple", "type", "using",
  85. "var", "when", "while", "with", "without", "xor",
  86. "yield",
  87. "tkIntLit", "tkInt8Lit", "tkInt16Lit", "tkInt32Lit", "tkInt64Lit",
  88. "tkUIntLit", "tkUInt8Lit", "tkUInt16Lit", "tkUInt32Lit", "tkUInt64Lit",
  89. "tkFloatLit", "tkFloat32Lit", "tkFloat64Lit", "tkFloat128Lit",
  90. "tkStrLit", "tkRStrLit",
  91. "tkTripleStrLit", "tkGStrLit", "tkGTripleStrLit", "tkCharLit", "(",
  92. ")", "[", "]", "{", "}", "[.", ".]", "{.", ".}", "(.", ".)",
  93. ",", ";",
  94. ":", "::", "=", ".", "..",
  95. "tkOpr", "tkComment", "`",
  96. "tkSpaces", "tkInfixOpr",
  97. "tkPrefixOpr", "tkPostfixOpr"]
  98. type
  99. TNumericalBase* = enum
  100. base10, # base10 is listed as the first element,
  101. # so that it is the correct default value
  102. base2, base8, base16
  103. CursorPosition* {.pure.} = enum ## XXX remove this again
  104. None, InToken, BeforeToken, AfterToken
  105. TToken* = object # a Nim token
  106. tokType*: TTokType # the type of the token
  107. indent*: int # the indentation; != -1 if the token has been
  108. # preceded with indentation
  109. ident*: PIdent # the parsed identifier
  110. iNumber*: BiggestInt # the parsed integer literal
  111. fNumber*: BiggestFloat # the parsed floating point literal
  112. base*: TNumericalBase # the numerical base; only valid for int
  113. # or float literals
  114. strongSpaceA*: int8 # leading spaces of an operator
  115. strongSpaceB*: int8 # trailing spaces of an operator
  116. literal*: string # the parsed (string) literal; and
  117. # documentation comments are here too
  118. line*, col*: int
  119. TErrorHandler* = proc (info: TLineInfo; msg: TMsgKind; arg: string)
  120. TLexer* = object of TBaseLexer
  121. fileIdx*: int32
  122. indentAhead*: int # if > 0 an indendation has already been read
  123. # this is needed because scanning comments
  124. # needs so much look-ahead
  125. currLineIndent*: int
  126. strongSpaces*, allowTabs*: bool
  127. cursor*: CursorPosition
  128. errorHandler*: TErrorHandler
  129. cache*: IdentCache
  130. when defined(nimsuggest):
  131. previousToken: TLineInfo
  132. var gLinesCompiled*: int # all lines that have been compiled
  133. proc getLineInfo*(L: TLexer, tok: TToken): TLineInfo {.inline.} =
  134. newLineInfo(L.fileIdx, tok.line, tok.col)
  135. proc isKeyword*(kind: TTokType): bool =
  136. result = (kind >= tokKeywordLow) and (kind <= tokKeywordHigh)
  137. template ones(n): untyped = ((1 shl n)-1) # for utf-8 conversion
  138. proc isNimIdentifier*(s: string): bool =
  139. if s[0] in SymStartChars:
  140. var i = 1
  141. var sLen = s.len
  142. while i < sLen:
  143. if s[i] == '_':
  144. inc(i)
  145. if s[i] notin SymChars: return
  146. inc(i)
  147. result = true
  148. proc tokToStr*(tok: TToken): string =
  149. case tok.tokType
  150. of tkIntLit..tkInt64Lit: result = $tok.iNumber
  151. of tkFloatLit..tkFloat64Lit: result = $tok.fNumber
  152. of tkInvalid, tkStrLit..tkCharLit, tkComment: result = tok.literal
  153. of tkParLe..tkColon, tkEof, tkAccent:
  154. result = TokTypeToStr[tok.tokType]
  155. else:
  156. if tok.ident != nil:
  157. result = tok.ident.s
  158. else:
  159. result = ""
  160. proc prettyTok*(tok: TToken): string =
  161. if isKeyword(tok.tokType): result = "keyword " & tok.ident.s
  162. else: result = tokToStr(tok)
  163. proc printTok*(tok: TToken) =
  164. msgWriteln($tok.line & ":" & $tok.col & "\t" &
  165. TokTypeToStr[tok.tokType] & " " & tokToStr(tok))
  166. proc initToken*(L: var TToken) =
  167. L.tokType = tkInvalid
  168. L.iNumber = 0
  169. L.indent = 0
  170. L.strongSpaceA = 0
  171. L.literal = ""
  172. L.fNumber = 0.0
  173. L.base = base10
  174. L.ident = nil
  175. proc fillToken(L: var TToken) =
  176. L.tokType = tkInvalid
  177. L.iNumber = 0
  178. L.indent = 0
  179. L.strongSpaceA = 0
  180. setLen(L.literal, 0)
  181. L.fNumber = 0.0
  182. L.base = base10
  183. L.ident = nil
  184. proc openLexer*(lex: var TLexer, fileIdx: int32, inputstream: PLLStream;
  185. cache: IdentCache) =
  186. openBaseLexer(lex, inputstream)
  187. lex.fileIdx = fileidx
  188. lex.indentAhead = - 1
  189. lex.currLineIndent = 0
  190. inc(lex.lineNumber, inputstream.lineOffset)
  191. lex.cache = cache
  192. when defined(nimsuggest):
  193. lex.previousToken.fileIndex = fileIdx
  194. proc openLexer*(lex: var TLexer, filename: string, inputstream: PLLStream;
  195. cache: IdentCache) =
  196. openLexer(lex, filename.fileInfoIdx, inputstream, cache)
  197. proc closeLexer*(lex: var TLexer) =
  198. inc(gLinesCompiled, lex.lineNumber)
  199. closeBaseLexer(lex)
  200. proc getLineInfo(L: TLexer): TLineInfo =
  201. result = newLineInfo(L.fileIdx, L.lineNumber, getColNumber(L, L.bufpos))
  202. proc dispMessage(L: TLexer; info: TLineInfo; msg: TMsgKind; arg: string) =
  203. if L.errorHandler.isNil:
  204. msgs.message(info, msg, arg)
  205. else:
  206. L.errorHandler(info, msg, arg)
  207. proc lexMessage*(L: TLexer, msg: TMsgKind, arg = "") =
  208. L.dispMessage(getLineInfo(L), msg, arg)
  209. proc lexMessageTok*(L: TLexer, msg: TMsgKind, tok: TToken, arg = "") =
  210. var info = newLineInfo(L.fileIdx, tok.line, tok.col)
  211. L.dispMessage(info, msg, arg)
  212. proc lexMessagePos(L: var TLexer, msg: TMsgKind, pos: int, arg = "") =
  213. var info = newLineInfo(L.fileIdx, L.lineNumber, pos - L.lineStart)
  214. L.dispMessage(info, msg, arg)
  215. proc matchTwoChars(L: TLexer, first: char, second: set[char]): bool =
  216. result = (L.buf[L.bufpos] == first) and (L.buf[L.bufpos + 1] in second)
  217. template tokenBegin(pos) {.dirty.} =
  218. when defined(nimsuggest):
  219. var colA = getColNumber(L, pos)
  220. template tokenEnd(pos) {.dirty.} =
  221. when defined(nimsuggest):
  222. let colB = getColNumber(L, pos)+1
  223. if L.fileIdx == gTrackPos.fileIndex and gTrackPos.col in colA..colB and
  224. L.lineNumber == gTrackPos.line and gIdeCmd in {ideSug, ideCon}:
  225. L.cursor = CursorPosition.InToken
  226. gTrackPos.col = colA.int16
  227. colA = 0
  228. template tokenEndIgnore(pos) =
  229. when defined(nimsuggest):
  230. let colB = getColNumber(L, pos)
  231. if L.fileIdx == gTrackPos.fileIndex and gTrackPos.col in colA..colB and
  232. L.lineNumber == gTrackPos.line and gIdeCmd in {ideSug, ideCon}:
  233. gTrackPos.fileIndex = trackPosInvalidFileIdx
  234. gTrackPos.line = -1
  235. colA = 0
  236. template tokenEndPrevious(pos) =
  237. when defined(nimsuggest):
  238. # when we detect the cursor in whitespace, we attach the track position
  239. # to the token that came before that, but only if we haven't detected
  240. # the cursor in a string literal or comment:
  241. let colB = getColNumber(L, pos)
  242. if L.fileIdx == gTrackPos.fileIndex and gTrackPos.col in colA..colB and
  243. L.lineNumber == gTrackPos.line and gIdeCmd in {ideSug, ideCon}:
  244. L.cursor = CursorPosition.BeforeToken
  245. gTrackPos = L.previousToken
  246. gTrackPosAttached = true
  247. colA = 0
  248. {.push overflowChecks: off.}
  249. # We need to parse the largest uint literal without overflow checks
  250. proc unsafeParseUInt(s: string, b: var BiggestInt, start = 0): int =
  251. var i = start
  252. if s[i] in {'0'..'9'}:
  253. b = 0
  254. while s[i] in {'0'..'9'}:
  255. b = b * 10 + (ord(s[i]) - ord('0'))
  256. inc(i)
  257. while s[i] == '_': inc(i) # underscores are allowed and ignored
  258. result = i - start
  259. {.pop.} # overflowChecks
  260. template eatChar(L: var TLexer, t: var TToken, replacementChar: char) =
  261. add(t.literal, replacementChar)
  262. inc(L.bufpos)
  263. template eatChar(L: var TLexer, t: var TToken) =
  264. add(t.literal, L.buf[L.bufpos])
  265. inc(L.bufpos)
  266. proc getNumber(L: var TLexer, result: var TToken) =
  267. proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: set[char]) =
  268. var pos = L.bufpos # use registers for pos, buf
  269. var buf = L.buf
  270. while true:
  271. if buf[pos] in chars:
  272. add(tok.literal, buf[pos])
  273. inc(pos)
  274. else:
  275. break
  276. if buf[pos] == '_':
  277. if buf[pos+1] notin chars:
  278. lexMessage(L, errInvalidToken, "_")
  279. break
  280. add(tok.literal, '_')
  281. inc(pos)
  282. L.bufpos = pos
  283. proc matchChars(L: var TLexer, tok: var TToken, chars: set[char]) =
  284. var pos = L.bufpos # use registers for pos, buf
  285. var buf = L.buf
  286. while buf[pos] in chars:
  287. add(tok.literal, buf[pos])
  288. inc(pos)
  289. L.bufpos = pos
  290. proc lexMessageLitNum(L: var TLexer, msg: TMsgKind, startpos: int) =
  291. # Used to get slightly human friendlier err messages.
  292. # Note: the erroneous 'O' char in the character set is intentional
  293. const literalishChars = {'A'..'F', 'a'..'f', '0'..'9', 'X', 'x', 'o', 'O',
  294. 'c', 'C', 'b', 'B', '_', '.', '\'', 'd', 'i', 'u'}
  295. var msgPos = L.bufpos
  296. var t: TToken
  297. t.literal = ""
  298. L.bufpos = startpos # Use L.bufpos as pos because of matchChars
  299. matchChars(L, t, literalishChars)
  300. # We must verify +/- specifically so that we're not past the literal
  301. if L.buf[L.bufpos] in {'+', '-'} and
  302. L.buf[L.bufpos - 1] in {'e', 'E'}:
  303. add(t.literal, L.buf[L.bufpos])
  304. inc(L.bufpos)
  305. matchChars(L, t, literalishChars)
  306. if L.buf[L.bufpos] in {'\'', 'f', 'F', 'd', 'D', 'i', 'I', 'u', 'U'}:
  307. inc(L.bufpos)
  308. add(t.literal, L.buf[L.bufpos])
  309. matchChars(L, t, {'0'..'9'})
  310. L.bufpos = msgPos
  311. lexMessage(L, msg, t.literal)
  312. var
  313. startpos, endpos: int
  314. xi: BiggestInt
  315. isBase10 = true
  316. const
  317. baseCodeChars = {'X', 'x', 'o', 'c', 'C', 'b', 'B'}
  318. literalishChars = baseCodeChars + {'A'..'F', 'a'..'f', '0'..'9', '_', '\''}
  319. floatTypes = {tkFloatLit, tkFloat32Lit, tkFloat64Lit, tkFloat128Lit}
  320. result.tokType = tkIntLit # int literal until we know better
  321. result.literal = ""
  322. result.base = base10
  323. startpos = L.bufpos
  324. tokenBegin(startPos)
  325. # First stage: find out base, make verifications, build token literal string
  326. if L.buf[L.bufpos] == '0' and L.buf[L.bufpos + 1] in baseCodeChars + {'O'}:
  327. isBase10 = false
  328. eatChar(L, result, '0')
  329. case L.buf[L.bufpos]
  330. of 'O':
  331. lexMessageLitNum(L, errInvalidNumberOctalCode, startpos)
  332. of 'x', 'X':
  333. eatChar(L, result, 'x')
  334. matchUnderscoreChars(L, result, {'0'..'9', 'a'..'f', 'A'..'F'})
  335. of 'o', 'c', 'C':
  336. eatChar(L, result, 'c')
  337. matchUnderscoreChars(L, result, {'0'..'7'})
  338. of 'b', 'B':
  339. eatChar(L, result, 'b')
  340. matchUnderscoreChars(L, result, {'0'..'1'})
  341. else:
  342. internalError(getLineInfo(L), "getNumber")
  343. else:
  344. matchUnderscoreChars(L, result, {'0'..'9'})
  345. if (L.buf[L.bufpos] == '.') and (L.buf[L.bufpos + 1] in {'0'..'9'}):
  346. result.tokType = tkFloatLit
  347. eatChar(L, result, '.')
  348. matchUnderscoreChars(L, result, {'0'..'9'})
  349. if L.buf[L.bufpos] in {'e', 'E'}:
  350. result.tokType = tkFloatLit
  351. eatChar(L, result, 'e')
  352. if L.buf[L.bufpos] in {'+', '-'}:
  353. eatChar(L, result)
  354. matchUnderscoreChars(L, result, {'0'..'9'})
  355. endpos = L.bufpos
  356. # Second stage, find out if there's a datatype suffix and handle it
  357. var postPos = endpos
  358. if L.buf[postPos] in {'\'', 'f', 'F', 'd', 'D', 'i', 'I', 'u', 'U'}:
  359. if L.buf[postPos] == '\'':
  360. inc(postPos)
  361. case L.buf[postPos]
  362. of 'f', 'F':
  363. inc(postPos)
  364. if (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'):
  365. result.tokType = tkFloat32Lit
  366. inc(postPos, 2)
  367. elif (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'):
  368. result.tokType = tkFloat64Lit
  369. inc(postPos, 2)
  370. elif (L.buf[postPos] == '1') and
  371. (L.buf[postPos + 1] == '2') and
  372. (L.buf[postPos + 2] == '8'):
  373. result.tokType = tkFloat128Lit
  374. inc(postPos, 3)
  375. else: # "f" alone defaults to float32
  376. result.tokType = tkFloat32Lit
  377. of 'd', 'D': # ad hoc convenience shortcut for f64
  378. inc(postPos)
  379. result.tokType = tkFloat64Lit
  380. of 'i', 'I':
  381. inc(postPos)
  382. if (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'):
  383. result.tokType = tkInt64Lit
  384. inc(postPos, 2)
  385. elif (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'):
  386. result.tokType = tkInt32Lit
  387. inc(postPos, 2)
  388. elif (L.buf[postPos] == '1') and (L.buf[postPos + 1] == '6'):
  389. result.tokType = tkInt16Lit
  390. inc(postPos, 2)
  391. elif (L.buf[postPos] == '8'):
  392. result.tokType = tkInt8Lit
  393. inc(postPos)
  394. else:
  395. lexMessageLitNum(L, errInvalidNumber, startpos)
  396. of 'u', 'U':
  397. inc(postPos)
  398. if (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'):
  399. result.tokType = tkUInt64Lit
  400. inc(postPos, 2)
  401. elif (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'):
  402. result.tokType = tkUInt32Lit
  403. inc(postPos, 2)
  404. elif (L.buf[postPos] == '1') and (L.buf[postPos + 1] == '6'):
  405. result.tokType = tkUInt16Lit
  406. inc(postPos, 2)
  407. elif (L.buf[postPos] == '8'):
  408. result.tokType = tkUInt8Lit
  409. inc(postPos)
  410. else:
  411. result.tokType = tkUIntLit
  412. else:
  413. lexMessageLitNum(L, errInvalidNumber, startpos)
  414. # Is there still a literalish char awaiting? Then it's an error!
  415. if L.buf[postPos] in literalishChars or
  416. (L.buf[postPos] == '.' and L.buf[postPos + 1] in {'0'..'9'}):
  417. lexMessageLitNum(L, errInvalidNumber, startpos)
  418. # Third stage, extract actual number
  419. L.bufpos = startpos # restore position
  420. var pos: int = startpos
  421. try:
  422. if (L.buf[pos] == '0') and (L.buf[pos + 1] in baseCodeChars):
  423. inc(pos, 2)
  424. xi = 0 # it is a base prefix
  425. case L.buf[pos - 1]
  426. of 'b', 'B':
  427. result.base = base2
  428. while pos < endpos:
  429. if L.buf[pos] != '_':
  430. xi = `shl`(xi, 1) or (ord(L.buf[pos]) - ord('0'))
  431. inc(pos)
  432. of 'o', 'c', 'C':
  433. result.base = base8
  434. while pos < endpos:
  435. if L.buf[pos] != '_':
  436. xi = `shl`(xi, 3) or (ord(L.buf[pos]) - ord('0'))
  437. inc(pos)
  438. of 'x', 'X':
  439. result.base = base16
  440. while pos < endpos:
  441. case L.buf[pos]
  442. of '_':
  443. inc(pos)
  444. of '0'..'9':
  445. xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('0'))
  446. inc(pos)
  447. of 'a'..'f':
  448. xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('a') + 10)
  449. inc(pos)
  450. of 'A'..'F':
  451. xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('A') + 10)
  452. inc(pos)
  453. else:
  454. break
  455. else:
  456. internalError(getLineInfo(L), "getNumber")
  457. case result.tokType
  458. of tkIntLit, tkInt64Lit: result.iNumber = xi
  459. of tkInt8Lit: result.iNumber = BiggestInt(int8(toU8(int(xi))))
  460. of tkInt16Lit: result.iNumber = BiggestInt(int16(toU16(int(xi))))
  461. of tkInt32Lit: result.iNumber = BiggestInt(int32(toU32(int64(xi))))
  462. of tkUIntLit, tkUInt64Lit: result.iNumber = xi
  463. of tkUInt8Lit: result.iNumber = BiggestInt(uint8(toU8(int(xi))))
  464. of tkUInt16Lit: result.iNumber = BiggestInt(uint16(toU16(int(xi))))
  465. of tkUInt32Lit: result.iNumber = BiggestInt(uint32(toU32(int64(xi))))
  466. of tkFloat32Lit:
  467. result.fNumber = (cast[PFloat32](addr(xi)))[]
  468. # note: this code is endian neutral!
  469. # XXX: Test this on big endian machine!
  470. of tkFloat64Lit, tkFloatLit:
  471. result.fNumber = (cast[PFloat64](addr(xi)))[]
  472. else: internalError(getLineInfo(L), "getNumber")
  473. # Bounds checks. Non decimal literals are allowed to overflow the range of
  474. # the datatype as long as their pattern don't overflow _bitwise_, hence
  475. # below checks of signed sizes against uint*.high is deliberate:
  476. # (0x80'u8 = 128, 0x80'i8 = -128, etc == OK)
  477. if result.tokType notin floatTypes:
  478. let outOfRange = case result.tokType:
  479. of tkUInt8Lit, tkUInt16Lit, tkUInt32Lit: result.iNumber != xi
  480. of tkInt8Lit: (xi > BiggestInt(uint8.high))
  481. of tkInt16Lit: (xi > BiggestInt(uint16.high))
  482. of tkInt32Lit: (xi > BiggestInt(uint32.high))
  483. else: false
  484. if outOfRange:
  485. #echo "out of range num: ", result.iNumber, " vs ", xi
  486. lexMessageLitNum(L, errNumberOutOfRange, startpos)
  487. else:
  488. case result.tokType
  489. of floatTypes:
  490. result.fNumber = parseFloat(result.literal)
  491. of tkUint64Lit:
  492. xi = 0
  493. let len = unsafeParseUInt(result.literal, xi)
  494. if len != result.literal.len or len == 0:
  495. raise newException(ValueError, "invalid integer: " & $xi)
  496. result.iNumber = xi
  497. else:
  498. result.iNumber = parseBiggestInt(result.literal)
  499. # Explicit bounds checks
  500. let outOfRange = case result.tokType:
  501. of tkInt8Lit: (result.iNumber < int8.low or result.iNumber > int8.high)
  502. of tkUInt8Lit: (result.iNumber < BiggestInt(uint8.low) or
  503. result.iNumber > BiggestInt(uint8.high))
  504. of tkInt16Lit: (result.iNumber < int16.low or result.iNumber > int16.high)
  505. of tkUInt16Lit: (result.iNumber < BiggestInt(uint16.low) or
  506. result.iNumber > BiggestInt(uint16.high))
  507. of tkInt32Lit: (result.iNumber < int32.low or result.iNumber > int32.high)
  508. of tkUInt32Lit: (result.iNumber < BiggestInt(uint32.low) or
  509. result.iNumber > BiggestInt(uint32.high))
  510. else: false
  511. if outOfRange: lexMessageLitNum(L, errNumberOutOfRange, startpos)
  512. # Promote int literal to int64? Not always necessary, but more consistent
  513. if result.tokType == tkIntLit:
  514. if (result.iNumber < low(int32)) or (result.iNumber > high(int32)):
  515. result.tokType = tkInt64Lit
  516. except ValueError:
  517. lexMessageLitNum(L, errInvalidNumber, startpos)
  518. except OverflowError, RangeError:
  519. lexMessageLitNum(L, errNumberOutOfRange, startpos)
  520. tokenEnd(postPos-1)
  521. L.bufpos = postPos
  522. proc handleHexChar(L: var TLexer, xi: var int) =
  523. case L.buf[L.bufpos]
  524. of '0'..'9':
  525. xi = (xi shl 4) or (ord(L.buf[L.bufpos]) - ord('0'))
  526. inc(L.bufpos)
  527. of 'a'..'f':
  528. xi = (xi shl 4) or (ord(L.buf[L.bufpos]) - ord('a') + 10)
  529. inc(L.bufpos)
  530. of 'A'..'F':
  531. xi = (xi shl 4) or (ord(L.buf[L.bufpos]) - ord('A') + 10)
  532. inc(L.bufpos)
  533. else: discard
  534. proc handleDecChars(L: var TLexer, xi: var int) =
  535. while L.buf[L.bufpos] in {'0'..'9'}:
  536. xi = (xi * 10) + (ord(L.buf[L.bufpos]) - ord('0'))
  537. inc(L.bufpos)
  538. proc getEscapedChar(L: var TLexer, tok: var TToken) =
  539. inc(L.bufpos) # skip '\'
  540. case L.buf[L.bufpos]
  541. of 'n', 'N':
  542. if tok.tokType == tkCharLit: lexMessage(L, errNnotAllowedInCharacter)
  543. add(tok.literal, tnl)
  544. inc(L.bufpos)
  545. of 'r', 'R', 'c', 'C':
  546. add(tok.literal, CR)
  547. inc(L.bufpos)
  548. of 'l', 'L':
  549. add(tok.literal, LF)
  550. inc(L.bufpos)
  551. of 'f', 'F':
  552. add(tok.literal, FF)
  553. inc(L.bufpos)
  554. of 'e', 'E':
  555. add(tok.literal, ESC)
  556. inc(L.bufpos)
  557. of 'a', 'A':
  558. add(tok.literal, BEL)
  559. inc(L.bufpos)
  560. of 'b', 'B':
  561. add(tok.literal, BACKSPACE)
  562. inc(L.bufpos)
  563. of 'v', 'V':
  564. add(tok.literal, VT)
  565. inc(L.bufpos)
  566. of 't', 'T':
  567. add(tok.literal, '\t')
  568. inc(L.bufpos)
  569. of '\'', '\"':
  570. add(tok.literal, L.buf[L.bufpos])
  571. inc(L.bufpos)
  572. of '\\':
  573. add(tok.literal, '\\')
  574. inc(L.bufpos)
  575. of 'x', 'X', 'u', 'U':
  576. var tp = L.buf[L.bufpos]
  577. inc(L.bufpos)
  578. var xi = 0
  579. handleHexChar(L, xi)
  580. handleHexChar(L, xi)
  581. if tp in {'u', 'U'}:
  582. handleHexChar(L, xi)
  583. handleHexChar(L, xi)
  584. # inlined toUTF-8 to avoid unicode and strutils dependencies.
  585. if xi <=% 127:
  586. add(tok.literal, xi.char )
  587. elif xi <=% 0x07FF:
  588. add(tok.literal, ((xi shr 6) or 0b110_00000).char )
  589. add(tok.literal, ((xi and ones(6)) or 0b10_0000_00).char )
  590. elif xi <=% 0xFFFF:
  591. add(tok.literal, (xi shr 12 or 0b1110_0000).char )
  592. add(tok.literal, (xi shr 6 and ones(6) or 0b10_0000_00).char )
  593. add(tok.literal, (xi and ones(6) or 0b10_0000_00).char )
  594. else: # value is 0xFFFF
  595. add(tok.literal, "\xef\xbf\xbf" )
  596. else:
  597. add(tok.literal, chr(xi))
  598. of '0'..'9':
  599. if matchTwoChars(L, '0', {'0'..'9'}):
  600. lexMessage(L, warnOctalEscape)
  601. var xi = 0
  602. handleDecChars(L, xi)
  603. if (xi <= 255): add(tok.literal, chr(xi))
  604. else: lexMessage(L, errInvalidCharacterConstant)
  605. else: lexMessage(L, errInvalidCharacterConstant)
  606. proc newString(s: cstring, len: int): string =
  607. ## XXX, how come there is no support for this?
  608. result = newString(len)
  609. for i in 0 .. <len:
  610. result[i] = s[i]
  611. proc handleCRLF(L: var TLexer, pos: int): int =
  612. template registerLine =
  613. let col = L.getColNumber(pos)
  614. if col > MaxLineLength:
  615. lexMessagePos(L, hintLineTooLong, pos)
  616. if optEmbedOrigSrc in gGlobalOptions:
  617. let lineStart = cast[ByteAddress](L.buf) + L.lineStart
  618. let line = newString(cast[cstring](lineStart), col)
  619. addSourceLine(L.fileIdx, line)
  620. case L.buf[pos]
  621. of CR:
  622. registerLine()
  623. result = nimlexbase.handleCR(L, pos)
  624. of LF:
  625. registerLine()
  626. result = nimlexbase.handleLF(L, pos)
  627. else: result = pos
  628. proc getString(L: var TLexer, tok: var TToken, rawMode: bool) =
  629. var pos = L.bufpos + 1 # skip "
  630. var buf = L.buf # put `buf` in a register
  631. var line = L.lineNumber # save linenumber for better error message
  632. tokenBegin(pos)
  633. if buf[pos] == '\"' and buf[pos+1] == '\"':
  634. tok.tokType = tkTripleStrLit # long string literal:
  635. inc(pos, 2) # skip ""
  636. # skip leading newline:
  637. if buf[pos] in {' ', '\t'}:
  638. var newpos = pos+1
  639. while buf[newpos] in {' ', '\t'}: inc newpos
  640. if buf[newpos] in {CR, LF}: pos = newpos
  641. pos = handleCRLF(L, pos)
  642. buf = L.buf
  643. while true:
  644. case buf[pos]
  645. of '\"':
  646. if buf[pos+1] == '\"' and buf[pos+2] == '\"' and
  647. buf[pos+3] != '\"':
  648. tokenEndIgnore(pos+2)
  649. L.bufpos = pos + 3 # skip the three """
  650. break
  651. add(tok.literal, '\"')
  652. inc(pos)
  653. of CR, LF:
  654. tokenEndIgnore(pos)
  655. pos = handleCRLF(L, pos)
  656. buf = L.buf
  657. add(tok.literal, tnl)
  658. of nimlexbase.EndOfFile:
  659. tokenEndIgnore(pos)
  660. var line2 = L.lineNumber
  661. L.lineNumber = line
  662. lexMessagePos(L, errClosingTripleQuoteExpected, L.lineStart)
  663. L.lineNumber = line2
  664. L.bufpos = pos
  665. break
  666. else:
  667. add(tok.literal, buf[pos])
  668. inc(pos)
  669. else:
  670. # ordinary string literal
  671. if rawMode: tok.tokType = tkRStrLit
  672. else: tok.tokType = tkStrLit
  673. while true:
  674. var c = buf[pos]
  675. if c == '\"':
  676. if rawMode and buf[pos+1] == '\"':
  677. inc(pos, 2)
  678. add(tok.literal, '"')
  679. else:
  680. tokenEndIgnore(pos)
  681. inc(pos) # skip '"'
  682. break
  683. elif c in {CR, LF, nimlexbase.EndOfFile}:
  684. tokenEndIgnore(pos)
  685. lexMessage(L, errClosingQuoteExpected)
  686. break
  687. elif (c == '\\') and not rawMode:
  688. L.bufpos = pos
  689. getEscapedChar(L, tok)
  690. pos = L.bufpos
  691. else:
  692. add(tok.literal, c)
  693. inc(pos)
  694. L.bufpos = pos
  695. proc getCharacter(L: var TLexer, tok: var TToken) =
  696. tokenBegin(L.bufpos)
  697. inc(L.bufpos) # skip '
  698. var c = L.buf[L.bufpos]
  699. case c
  700. of '\0'..pred(' '), '\'': lexMessage(L, errInvalidCharacterConstant)
  701. of '\\': getEscapedChar(L, tok)
  702. else:
  703. tok.literal = $c
  704. inc(L.bufpos)
  705. if L.buf[L.bufpos] != '\'': lexMessage(L, errMissingFinalQuote)
  706. tokenEndIgnore(L.bufpos)
  707. inc(L.bufpos) # skip '
  708. proc getSymbol(L: var TLexer, tok: var TToken) =
  709. var h: Hash = 0
  710. var pos = L.bufpos
  711. var buf = L.buf
  712. tokenBegin(pos)
  713. while true:
  714. var c = buf[pos]
  715. case c
  716. of 'a'..'z', '0'..'9', '\x80'..'\xFF':
  717. h = h !& ord(c)
  718. inc(pos)
  719. of 'A'..'Z':
  720. c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
  721. h = h !& ord(c)
  722. inc(pos)
  723. of '_':
  724. if buf[pos+1] notin SymChars:
  725. lexMessage(L, errInvalidToken, "_")
  726. break
  727. inc(pos)
  728. else: break
  729. tokenEnd(pos-1)
  730. h = !$h
  731. tok.ident = L.cache.getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h)
  732. L.bufpos = pos
  733. if (tok.ident.id < ord(tokKeywordLow) - ord(tkSymbol)) or
  734. (tok.ident.id > ord(tokKeywordHigh) - ord(tkSymbol)):
  735. tok.tokType = tkSymbol
  736. else:
  737. tok.tokType = TTokType(tok.ident.id + ord(tkSymbol))
  738. proc endOperator(L: var TLexer, tok: var TToken, pos: int,
  739. hash: Hash) {.inline.} =
  740. var h = !$hash
  741. tok.ident = L.cache.getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h)
  742. if (tok.ident.id < oprLow) or (tok.ident.id > oprHigh): tok.tokType = tkOpr
  743. else: tok.tokType = TTokType(tok.ident.id - oprLow + ord(tkColon))
  744. L.bufpos = pos
  745. proc getOperator(L: var TLexer, tok: var TToken) =
  746. var pos = L.bufpos
  747. var buf = L.buf
  748. tokenBegin(pos)
  749. var h: Hash = 0
  750. while true:
  751. var c = buf[pos]
  752. if c notin OpChars: break
  753. h = h !& ord(c)
  754. inc(pos)
  755. endOperator(L, tok, pos, h)
  756. tokenEnd(pos-1)
  757. # advance pos but don't store it in L.bufpos so the next token (which might
  758. # be an operator too) gets the preceding spaces:
  759. tok.strongSpaceB = 0
  760. while buf[pos] == ' ':
  761. inc pos
  762. inc tok.strongSpaceB
  763. if buf[pos] in {CR, LF, nimlexbase.EndOfFile}:
  764. tok.strongSpaceB = -1
  765. proc skipMultiLineComment(L: var TLexer; tok: var TToken; start: int;
  766. isDoc: bool) =
  767. var pos = start
  768. var buf = L.buf
  769. var toStrip = 0
  770. tokenBegin(pos)
  771. # detect the amount of indentation:
  772. if isDoc:
  773. toStrip = getColNumber(L, pos)
  774. while buf[pos] == ' ': inc pos
  775. if buf[pos] in {CR, LF}:
  776. pos = handleCRLF(L, pos)
  777. buf = L.buf
  778. toStrip = 0
  779. while buf[pos] == ' ':
  780. inc pos
  781. inc toStrip
  782. var nesting = 0
  783. while true:
  784. case buf[pos]
  785. of '#':
  786. if isDoc:
  787. if buf[pos+1] == '#' and buf[pos+2] == '[':
  788. inc nesting
  789. tok.literal.add '#'
  790. elif buf[pos+1] == '[':
  791. inc nesting
  792. inc pos
  793. of ']':
  794. if isDoc:
  795. if buf[pos+1] == '#' and buf[pos+2] == '#':
  796. if nesting == 0:
  797. tokenEndIgnore(pos+2)
  798. inc(pos, 3)
  799. break
  800. dec nesting
  801. tok.literal.add ']'
  802. elif buf[pos+1] == '#':
  803. if nesting == 0:
  804. tokenEndIgnore(pos+1)
  805. inc(pos, 2)
  806. break
  807. dec nesting
  808. inc pos
  809. of CR, LF:
  810. tokenEndIgnore(pos)
  811. pos = handleCRLF(L, pos)
  812. buf = L.buf
  813. # strip leading whitespace:
  814. if isDoc:
  815. tok.literal.add "\n"
  816. inc tok.iNumber
  817. var c = toStrip
  818. while buf[pos] == ' ' and c > 0:
  819. inc pos
  820. dec c
  821. of nimlexbase.EndOfFile:
  822. tokenEndIgnore(pos)
  823. lexMessagePos(L, errGenerated, pos, "end of multiline comment expected")
  824. break
  825. else:
  826. if isDoc: tok.literal.add buf[pos]
  827. inc(pos)
  828. L.bufpos = pos
  829. proc scanComment(L: var TLexer, tok: var TToken) =
  830. var pos = L.bufpos
  831. var buf = L.buf
  832. tok.tokType = tkComment
  833. # iNumber contains the number of '\n' in the token
  834. tok.iNumber = 0
  835. assert buf[pos+1] == '#'
  836. if buf[pos+2] == '[':
  837. skipMultiLineComment(L, tok, pos+3, true)
  838. return
  839. tokenBegin(pos)
  840. inc(pos, 2)
  841. var toStrip = 0
  842. while buf[pos] == ' ':
  843. inc pos
  844. inc toStrip
  845. while true:
  846. var lastBackslash = -1
  847. while buf[pos] notin {CR, LF, nimlexbase.EndOfFile}:
  848. if buf[pos] == '\\': lastBackslash = pos+1
  849. add(tok.literal, buf[pos])
  850. inc(pos)
  851. tokenEndIgnore(pos)
  852. pos = handleCRLF(L, pos)
  853. buf = L.buf
  854. var indent = 0
  855. while buf[pos] == ' ':
  856. inc(pos)
  857. inc(indent)
  858. if buf[pos] == '#' and buf[pos+1] == '#':
  859. tok.literal.add "\n"
  860. inc(pos, 2)
  861. var c = toStrip
  862. while buf[pos] == ' ' and c > 0:
  863. inc pos
  864. dec c
  865. inc tok.iNumber
  866. else:
  867. if buf[pos] > ' ':
  868. L.indentAhead = indent
  869. tokenEndIgnore(pos)
  870. break
  871. L.bufpos = pos
  872. proc skip(L: var TLexer, tok: var TToken) =
  873. var pos = L.bufpos
  874. var buf = L.buf
  875. tokenBegin(pos)
  876. tok.strongSpaceA = 0
  877. while true:
  878. case buf[pos]
  879. of ' ':
  880. inc(pos)
  881. inc(tok.strongSpaceA)
  882. of '\t':
  883. if not L.allowTabs: lexMessagePos(L, errTabulatorsAreNotAllowed, pos)
  884. inc(pos)
  885. of CR, LF:
  886. tokenEndPrevious(pos)
  887. pos = handleCRLF(L, pos)
  888. buf = L.buf
  889. var indent = 0
  890. while true:
  891. if buf[pos] == ' ':
  892. inc(pos)
  893. inc(indent)
  894. elif buf[pos] == '#' and buf[pos+1] == '[':
  895. skipMultiLineComment(L, tok, pos+2, false)
  896. pos = L.bufpos
  897. buf = L.buf
  898. else:
  899. break
  900. tok.strongSpaceA = 0
  901. if buf[pos] > ' ' and (buf[pos] != '#' or buf[pos+1] == '#'):
  902. tok.indent = indent
  903. L.currLineIndent = indent
  904. break
  905. of '#':
  906. # do not skip documentation comment:
  907. if buf[pos+1] == '#': break
  908. if buf[pos+1] == '[':
  909. skipMultiLineComment(L, tok, pos+2, false)
  910. pos = L.bufpos
  911. buf = L.buf
  912. else:
  913. tokenBegin(pos)
  914. while buf[pos] notin {CR, LF, nimlexbase.EndOfFile}: inc(pos)
  915. tokenEndIgnore(pos+1)
  916. else:
  917. break # EndOfFile also leaves the loop
  918. tokenEndPrevious(pos-1)
  919. L.bufpos = pos
  920. proc rawGetTok*(L: var TLexer, tok: var TToken) =
  921. template atTokenEnd() {.dirty.} =
  922. when defined(nimsuggest):
  923. # we attach the cursor to the last *strong* token
  924. if tok.tokType notin weakTokens:
  925. L.previousToken.line = tok.line.int16
  926. L.previousToken.col = tok.col.int16
  927. when defined(nimsuggest):
  928. L.cursor = CursorPosition.None
  929. fillToken(tok)
  930. if L.indentAhead >= 0:
  931. tok.indent = L.indentAhead
  932. L.currLineIndent = L.indentAhead
  933. L.indentAhead = -1
  934. else:
  935. tok.indent = -1
  936. skip(L, tok)
  937. var c = L.buf[L.bufpos]
  938. tok.line = L.lineNumber
  939. tok.col = getColNumber(L, L.bufpos)
  940. if c in SymStartChars - {'r', 'R', 'l'}:
  941. getSymbol(L, tok)
  942. else:
  943. case c
  944. of '#':
  945. scanComment(L, tok)
  946. of '*':
  947. # '*:' is unfortunately a special case, because it is two tokens in
  948. # 'var v*: int'.
  949. if L.buf[L.bufpos+1] == ':' and L.buf[L.bufpos+2] notin OpChars:
  950. var h = 0 !& ord('*')
  951. endOperator(L, tok, L.bufpos+1, h)
  952. else:
  953. getOperator(L, tok)
  954. of ',':
  955. tok.tokType = tkComma
  956. inc(L.bufpos)
  957. of 'l':
  958. # if we parsed exactly one character and its a small L (l), this
  959. # is treated as a warning because it may be confused with the number 1
  960. if L.buf[L.bufpos+1] notin (SymChars + {'_'}):
  961. lexMessage(L, warnSmallLshouldNotBeUsed)
  962. getSymbol(L, tok)
  963. of 'r', 'R':
  964. if L.buf[L.bufpos + 1] == '\"':
  965. inc(L.bufpos)
  966. getString(L, tok, true)
  967. else:
  968. getSymbol(L, tok)
  969. of '(':
  970. inc(L.bufpos)
  971. if L.buf[L.bufpos] == '.' and L.buf[L.bufpos+1] != '.':
  972. tok.tokType = tkParDotLe
  973. inc(L.bufpos)
  974. else:
  975. tok.tokType = tkParLe
  976. when defined(nimsuggest):
  977. if L.fileIdx == gTrackPos.fileIndex and tok.col < gTrackPos.col and
  978. tok.line == gTrackPos.line and gIdeCmd == ideCon:
  979. gTrackPos.col = tok.col.int16
  980. of ')':
  981. tok.tokType = tkParRi
  982. inc(L.bufpos)
  983. of '[':
  984. inc(L.bufpos)
  985. if L.buf[L.bufpos] == '.' and L.buf[L.bufpos+1] != '.':
  986. tok.tokType = tkBracketDotLe
  987. inc(L.bufpos)
  988. else:
  989. tok.tokType = tkBracketLe
  990. of ']':
  991. tok.tokType = tkBracketRi
  992. inc(L.bufpos)
  993. of '.':
  994. when defined(nimsuggest):
  995. if L.fileIdx == gTrackPos.fileIndex and tok.col+1 == gTrackPos.col and
  996. tok.line == gTrackPos.line and gIdeCmd == ideSug:
  997. tok.tokType = tkDot
  998. L.cursor = CursorPosition.InToken
  999. gTrackPos.col = tok.col.int16
  1000. inc(L.bufpos)
  1001. atTokenEnd()
  1002. return
  1003. if L.buf[L.bufpos+1] == ']':
  1004. tok.tokType = tkBracketDotRi
  1005. inc(L.bufpos, 2)
  1006. elif L.buf[L.bufpos+1] == '}':
  1007. tok.tokType = tkCurlyDotRi
  1008. inc(L.bufpos, 2)
  1009. elif L.buf[L.bufpos+1] == ')':
  1010. tok.tokType = tkParDotRi
  1011. inc(L.bufpos, 2)
  1012. else:
  1013. getOperator(L, tok)
  1014. of '{':
  1015. inc(L.bufpos)
  1016. if L.buf[L.bufpos] == '.' and L.buf[L.bufpos+1] != '.':
  1017. tok.tokType = tkCurlyDotLe
  1018. inc(L.bufpos)
  1019. else:
  1020. tok.tokType = tkCurlyLe
  1021. of '}':
  1022. tok.tokType = tkCurlyRi
  1023. inc(L.bufpos)
  1024. of ';':
  1025. tok.tokType = tkSemiColon
  1026. inc(L.bufpos)
  1027. of '`':
  1028. tok.tokType = tkAccent
  1029. inc(L.bufpos)
  1030. of '_':
  1031. inc(L.bufpos)
  1032. if L.buf[L.bufpos] notin SymChars+{'_'}:
  1033. tok.tokType = tkSymbol
  1034. tok.ident = L.cache.getIdent("_")
  1035. else:
  1036. tok.literal = $c
  1037. tok.tokType = tkInvalid
  1038. lexMessage(L, errInvalidToken, c & " (\\" & $(ord(c)) & ')')
  1039. of '\"':
  1040. # check for extended raw string literal:
  1041. var rawMode = L.bufpos > 0 and L.buf[L.bufpos-1] in SymChars
  1042. getString(L, tok, rawMode)
  1043. if rawMode:
  1044. # tkRStrLit -> tkGStrLit
  1045. # tkTripleStrLit -> tkGTripleStrLit
  1046. inc(tok.tokType, 2)
  1047. of '\'':
  1048. tok.tokType = tkCharLit
  1049. getCharacter(L, tok)
  1050. tok.tokType = tkCharLit
  1051. of '0'..'9':
  1052. getNumber(L, tok)
  1053. let c = L.buf[L.bufpos]
  1054. if c in SymChars+{'_'}:
  1055. lexMessage(L, errInvalidToken, c & " (\\" & $(ord(c)) & ')')
  1056. else:
  1057. if c in OpChars:
  1058. getOperator(L, tok)
  1059. elif c == nimlexbase.EndOfFile:
  1060. tok.tokType = tkEof
  1061. tok.indent = 0
  1062. else:
  1063. tok.literal = $c
  1064. tok.tokType = tkInvalid
  1065. lexMessage(L, errInvalidToken, c & " (\\" & $(ord(c)) & ')')
  1066. inc(L.bufpos)
  1067. atTokenEnd()