lexer.nim 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177
  1. #
  2. #
  3. # The Nim Compiler
  4. # (c) Copyright 2015 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. # This scanner is handwritten for efficiency. I used an elegant buffering
  10. # scheme which I have not seen anywhere else:
  11. # We guarantee that a whole line is in the buffer. Thus only when scanning
  12. # the \n or \r character we have to check wether we need to read in the next
  13. # chunk. (\n or \r already need special handling for incrementing the line
  14. # counter; choosing both \n and \r allows the scanner to properly read Unix,
  15. # DOS or Macintosh text files, even when it is not the native format.
  16. import
  17. hashes, options, msgs, strutils, platform, idents, nimlexbase, llstream,
  18. wordrecg
  19. const
  20. MaxLineLength* = 80 # lines longer than this lead to a warning
  21. numChars*: set[char] = {'0'..'9', 'a'..'z', 'A'..'Z'}
  22. SymChars*: set[char] = {'a'..'z', 'A'..'Z', '0'..'9', '\x80'..'\xFF'}
  23. SymStartChars*: set[char] = {'a'..'z', 'A'..'Z', '\x80'..'\xFF'}
  24. OpChars*: set[char] = {'+', '-', '*', '/', '\\', '<', '>', '!', '?', '^', '.',
  25. '|', '=', '%', '&', '$', '@', '~', ':', '\x80'..'\xFF'}
  26. # don't forget to update the 'highlite' module if these charsets should change
  27. type
  28. TTokType* = enum
  29. tkInvalid, tkEof, # order is important here!
  30. tkSymbol, # keywords:
  31. tkAddr, tkAnd, tkAs, tkAsm, tkAtomic,
  32. tkBind, tkBlock, tkBreak, tkCase, tkCast,
  33. tkConcept, tkConst, tkContinue, tkConverter,
  34. tkDefer, tkDiscard, tkDistinct, tkDiv, tkDo,
  35. tkElif, tkElse, tkEnd, tkEnum, tkExcept, tkExport,
  36. tkFinally, tkFor, tkFrom, tkFunc,
  37. tkGeneric, tkIf, tkImport, tkIn, tkInclude, tkInterface,
  38. tkIs, tkIsnot, tkIterator,
  39. tkLet,
  40. tkMacro, tkMethod, tkMixin, tkMod, tkNil, tkNot, tkNotin,
  41. tkObject, tkOf, tkOr, tkOut,
  42. tkProc, tkPtr, tkRaise, tkRef, tkReturn,
  43. tkShl, tkShr, tkStatic,
  44. tkTemplate,
  45. tkTry, tkTuple, tkType, tkUsing,
  46. tkVar, tkWhen, tkWhile, tkXor,
  47. tkYield, # end of keywords
  48. tkIntLit, tkInt8Lit, tkInt16Lit, tkInt32Lit, tkInt64Lit,
  49. tkUIntLit, tkUInt8Lit, tkUInt16Lit, tkUInt32Lit, tkUInt64Lit,
  50. tkFloatLit, tkFloat32Lit, tkFloat64Lit, tkFloat128Lit,
  51. tkStrLit, tkRStrLit, tkTripleStrLit,
  52. tkGStrLit, tkGTripleStrLit, tkCharLit, tkParLe, tkParRi, tkBracketLe,
  53. tkBracketRi, tkCurlyLe, tkCurlyRi,
  54. tkBracketDotLe, tkBracketDotRi, # [. and .]
  55. tkCurlyDotLe, tkCurlyDotRi, # {. and .}
  56. tkParDotLe, tkParDotRi, # (. and .)
  57. tkComma, tkSemiColon,
  58. tkColon, tkColonColon, tkEquals, tkDot, tkDotDot,
  59. tkOpr, tkComment, tkAccent,
  60. tkSpaces, tkInfixOpr, tkPrefixOpr, tkPostfixOpr
  61. TTokTypes* = set[TTokType]
  62. const
  63. weakTokens = {tkComma, tkSemiColon, tkColon,
  64. tkParRi, tkParDotRi, tkBracketRi, tkBracketDotRi,
  65. tkCurlyRi} # \
  66. # tokens that should not be considered for previousToken
  67. tokKeywordLow* = succ(tkSymbol)
  68. tokKeywordHigh* = pred(tkIntLit)
  69. TokTypeToStr*: array[TTokType, string] = ["tkInvalid", "[EOF]",
  70. "tkSymbol",
  71. "addr", "and", "as", "asm", "atomic",
  72. "bind", "block", "break", "case", "cast",
  73. "concept", "const", "continue", "converter",
  74. "defer", "discard", "distinct", "div", "do",
  75. "elif", "else", "end", "enum", "except", "export",
  76. "finally", "for", "from", "func", "generic", "if",
  77. "import", "in", "include", "interface", "is", "isnot", "iterator",
  78. "let",
  79. "macro", "method", "mixin", "mod",
  80. "nil", "not", "notin", "object", "of", "or",
  81. "out", "proc", "ptr", "raise", "ref", "return",
  82. "shl", "shr", "static",
  83. "template",
  84. "try", "tuple", "type", "using",
  85. "var", "when", "while", "xor",
  86. "yield",
  87. "tkIntLit", "tkInt8Lit", "tkInt16Lit", "tkInt32Lit", "tkInt64Lit",
  88. "tkUIntLit", "tkUInt8Lit", "tkUInt16Lit", "tkUInt32Lit", "tkUInt64Lit",
  89. "tkFloatLit", "tkFloat32Lit", "tkFloat64Lit", "tkFloat128Lit",
  90. "tkStrLit", "tkRStrLit",
  91. "tkTripleStrLit", "tkGStrLit", "tkGTripleStrLit", "tkCharLit", "(",
  92. ")", "[", "]", "{", "}", "[.", ".]", "{.", ".}", "(.", ".)",
  93. ",", ";",
  94. ":", "::", "=", ".", "..",
  95. "tkOpr", "tkComment", "`",
  96. "tkSpaces", "tkInfixOpr",
  97. "tkPrefixOpr", "tkPostfixOpr"]
  98. type
  99. TNumericalBase* = enum
  100. base10, # base10 is listed as the first element,
  101. # so that it is the correct default value
  102. base2, base8, base16
  103. CursorPosition* {.pure.} = enum ## XXX remove this again
  104. None, InToken, BeforeToken, AfterToken
  105. TToken* = object # a Nim token
  106. tokType*: TTokType # the type of the token
  107. indent*: int # the indentation; != -1 if the token has been
  108. # preceded with indentation
  109. ident*: PIdent # the parsed identifier
  110. iNumber*: BiggestInt # the parsed integer literal
  111. fNumber*: BiggestFloat # the parsed floating point literal
  112. base*: TNumericalBase # the numerical base; only valid for int
  113. # or float literals
  114. strongSpaceA*: int8 # leading spaces of an operator
  115. strongSpaceB*: int8 # trailing spaces of an operator
  116. literal*: string # the parsed (string) literal; and
  117. # documentation comments are here too
  118. line*, col*: int
  119. when defined(nimpretty):
  120. offsetA*, offsetB*: int # used for pretty printing so that literals
  121. # like 0b01 or r"\L" are unaffected
  122. commentOffsetA*, commentOffsetB*: int
  123. TErrorHandler* = proc (info: TLineInfo; msg: TMsgKind; arg: string)
  124. TLexer* = object of TBaseLexer
  125. fileIdx*: int32
  126. indentAhead*: int # if > 0 an indendation has already been read
  127. # this is needed because scanning comments
  128. # needs so much look-ahead
  129. currLineIndent*: int
  130. strongSpaces*, allowTabs*: bool
  131. cursor*: CursorPosition
  132. errorHandler*: TErrorHandler
  133. cache*: IdentCache
  134. when defined(nimsuggest):
  135. previousToken: TLineInfo
  136. when defined(nimpretty):
  137. var
  138. gIndentationWidth*: int
  139. var gLinesCompiled*: int # all lines that have been compiled
  140. proc getLineInfo*(L: TLexer, tok: TToken): TLineInfo {.inline.} =
  141. result = newLineInfo(L.fileIdx, tok.line, tok.col)
  142. when defined(nimpretty):
  143. result.offsetA = tok.offsetA
  144. result.offsetB = tok.offsetB
  145. result.commentOffsetA = tok.commentOffsetA
  146. result.commentOffsetB = tok.commentOffsetB
  147. proc isKeyword*(kind: TTokType): bool =
  148. result = (kind >= tokKeywordLow) and (kind <= tokKeywordHigh)
  149. template ones(n): untyped = ((1 shl n)-1) # for utf-8 conversion
  150. proc isNimIdentifier*(s: string): bool =
  151. if s[0] in SymStartChars:
  152. var i = 1
  153. var sLen = s.len
  154. while i < sLen:
  155. if s[i] == '_':
  156. inc(i)
  157. if s[i] notin SymChars: return
  158. inc(i)
  159. result = true
  160. proc tokToStr*(tok: TToken): string =
  161. case tok.tokType
  162. of tkIntLit..tkInt64Lit: result = $tok.iNumber
  163. of tkFloatLit..tkFloat64Lit: result = $tok.fNumber
  164. of tkInvalid, tkStrLit..tkCharLit, tkComment: result = tok.literal
  165. of tkParLe..tkColon, tkEof, tkAccent:
  166. result = TokTypeToStr[tok.tokType]
  167. else:
  168. if tok.ident != nil:
  169. result = tok.ident.s
  170. else:
  171. result = ""
  172. proc prettyTok*(tok: TToken): string =
  173. if isKeyword(tok.tokType): result = "keyword " & tok.ident.s
  174. else: result = tokToStr(tok)
  175. proc printTok*(tok: TToken) =
  176. msgWriteln($tok.line & ":" & $tok.col & "\t" &
  177. TokTypeToStr[tok.tokType] & " " & tokToStr(tok))
  178. proc initToken*(L: var TToken) =
  179. L.tokType = tkInvalid
  180. L.iNumber = 0
  181. L.indent = 0
  182. L.strongSpaceA = 0
  183. L.literal = ""
  184. L.fNumber = 0.0
  185. L.base = base10
  186. L.ident = nil
  187. when defined(nimpretty):
  188. L.commentOffsetA = 0
  189. L.commentOffsetB = 0
  190. proc fillToken(L: var TToken) =
  191. L.tokType = tkInvalid
  192. L.iNumber = 0
  193. L.indent = 0
  194. L.strongSpaceA = 0
  195. setLen(L.literal, 0)
  196. L.fNumber = 0.0
  197. L.base = base10
  198. L.ident = nil
  199. when defined(nimpretty):
  200. L.commentOffsetA = 0
  201. L.commentOffsetB = 0
  202. proc openLexer*(lex: var TLexer, fileIdx: int32, inputstream: PLLStream;
  203. cache: IdentCache) =
  204. openBaseLexer(lex, inputstream)
  205. lex.fileIdx = fileidx
  206. lex.indentAhead = - 1
  207. lex.currLineIndent = 0
  208. inc(lex.lineNumber, inputstream.lineOffset)
  209. lex.cache = cache
  210. when defined(nimsuggest):
  211. lex.previousToken.fileIndex = fileIdx
  212. proc openLexer*(lex: var TLexer, filename: string, inputstream: PLLStream;
  213. cache: IdentCache) =
  214. openLexer(lex, filename.fileInfoIdx, inputstream, cache)
  215. proc closeLexer*(lex: var TLexer) =
  216. inc(gLinesCompiled, lex.lineNumber)
  217. closeBaseLexer(lex)
  218. proc getLineInfo(L: TLexer): TLineInfo =
  219. result = newLineInfo(L.fileIdx, L.lineNumber, getColNumber(L, L.bufpos))
  220. proc dispMessage(L: TLexer; info: TLineInfo; msg: TMsgKind; arg: string) =
  221. if L.errorHandler.isNil:
  222. msgs.message(info, msg, arg)
  223. else:
  224. L.errorHandler(info, msg, arg)
  225. proc lexMessage*(L: TLexer, msg: TMsgKind, arg = "") =
  226. L.dispMessage(getLineInfo(L), msg, arg)
  227. proc lexMessageTok*(L: TLexer, msg: TMsgKind, tok: TToken, arg = "") =
  228. var info = newLineInfo(L.fileIdx, tok.line, tok.col)
  229. L.dispMessage(info, msg, arg)
  230. proc lexMessagePos(L: var TLexer, msg: TMsgKind, pos: int, arg = "") =
  231. var info = newLineInfo(L.fileIdx, L.lineNumber, pos - L.lineStart)
  232. L.dispMessage(info, msg, arg)
  233. proc matchTwoChars(L: TLexer, first: char, second: set[char]): bool =
  234. result = (L.buf[L.bufpos] == first) and (L.buf[L.bufpos + 1] in second)
  235. template tokenBegin(tok, pos) {.dirty.} =
  236. when defined(nimsuggest):
  237. var colA = getColNumber(L, pos)
  238. when defined(nimpretty):
  239. tok.offsetA = L.offsetBase + pos
  240. template tokenEnd(tok, pos) {.dirty.} =
  241. when defined(nimsuggest):
  242. let colB = getColNumber(L, pos)+1
  243. if L.fileIdx == gTrackPos.fileIndex and gTrackPos.col in colA..colB and
  244. L.lineNumber == gTrackPos.line and gIdeCmd in {ideSug, ideCon}:
  245. L.cursor = CursorPosition.InToken
  246. gTrackPos.col = colA.int16
  247. colA = 0
  248. when defined(nimpretty):
  249. tok.offsetB = L.offsetBase + pos
  250. template tokenEndIgnore(tok, pos) =
  251. when defined(nimsuggest):
  252. let colB = getColNumber(L, pos)
  253. if L.fileIdx == gTrackPos.fileIndex and gTrackPos.col in colA..colB and
  254. L.lineNumber == gTrackPos.line and gIdeCmd in {ideSug, ideCon}:
  255. gTrackPos.fileIndex = trackPosInvalidFileIdx
  256. gTrackPos.line = -1
  257. colA = 0
  258. when defined(nimpretty):
  259. tok.offsetB = L.offsetBase + pos
  260. template tokenEndPrevious(tok, pos) =
  261. when defined(nimsuggest):
  262. # when we detect the cursor in whitespace, we attach the track position
  263. # to the token that came before that, but only if we haven't detected
  264. # the cursor in a string literal or comment:
  265. let colB = getColNumber(L, pos)
  266. if L.fileIdx == gTrackPos.fileIndex and gTrackPos.col in colA..colB and
  267. L.lineNumber == gTrackPos.line and gIdeCmd in {ideSug, ideCon}:
  268. L.cursor = CursorPosition.BeforeToken
  269. gTrackPos = L.previousToken
  270. gTrackPosAttached = true
  271. colA = 0
  272. when defined(nimpretty):
  273. tok.offsetB = L.offsetBase + pos
  274. {.push overflowChecks: off.}
  275. # We need to parse the largest uint literal without overflow checks
  276. proc unsafeParseUInt(s: string, b: var BiggestInt, start = 0): int =
  277. var i = start
  278. if s[i] in {'0'..'9'}:
  279. b = 0
  280. while s[i] in {'0'..'9'}:
  281. b = b * 10 + (ord(s[i]) - ord('0'))
  282. inc(i)
  283. while s[i] == '_': inc(i) # underscores are allowed and ignored
  284. result = i - start
  285. {.pop.} # overflowChecks
  286. template eatChar(L: var TLexer, t: var TToken, replacementChar: char) =
  287. add(t.literal, replacementChar)
  288. inc(L.bufpos)
  289. template eatChar(L: var TLexer, t: var TToken) =
  290. add(t.literal, L.buf[L.bufpos])
  291. inc(L.bufpos)
  292. proc getNumber(L: var TLexer, result: var TToken) =
  293. proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: set[char]) =
  294. var pos = L.bufpos # use registers for pos, buf
  295. var buf = L.buf
  296. while true:
  297. if buf[pos] in chars:
  298. add(tok.literal, buf[pos])
  299. inc(pos)
  300. else:
  301. break
  302. if buf[pos] == '_':
  303. if buf[pos+1] notin chars:
  304. lexMessage(L, errInvalidToken, "_")
  305. break
  306. add(tok.literal, '_')
  307. inc(pos)
  308. L.bufpos = pos
  309. proc matchChars(L: var TLexer, tok: var TToken, chars: set[char]) =
  310. var pos = L.bufpos # use registers for pos, buf
  311. var buf = L.buf
  312. while buf[pos] in chars:
  313. add(tok.literal, buf[pos])
  314. inc(pos)
  315. L.bufpos = pos
  316. proc lexMessageLitNum(L: var TLexer, msg: TMsgKind, startpos: int) =
  317. # Used to get slightly human friendlier err messages.
  318. # Note: the erroneous 'O' char in the character set is intentional
  319. const literalishChars = {'A'..'F', 'a'..'f', '0'..'9', 'X', 'x', 'o', 'O',
  320. 'c', 'C', 'b', 'B', '_', '.', '\'', 'd', 'i', 'u'}
  321. var msgPos = L.bufpos
  322. var t: TToken
  323. t.literal = ""
  324. L.bufpos = startpos # Use L.bufpos as pos because of matchChars
  325. matchChars(L, t, literalishChars)
  326. # We must verify +/- specifically so that we're not past the literal
  327. if L.buf[L.bufpos] in {'+', '-'} and
  328. L.buf[L.bufpos - 1] in {'e', 'E'}:
  329. add(t.literal, L.buf[L.bufpos])
  330. inc(L.bufpos)
  331. matchChars(L, t, literalishChars)
  332. if L.buf[L.bufpos] in {'\'', 'f', 'F', 'd', 'D', 'i', 'I', 'u', 'U'}:
  333. inc(L.bufpos)
  334. add(t.literal, L.buf[L.bufpos])
  335. matchChars(L, t, {'0'..'9'})
  336. L.bufpos = msgPos
  337. lexMessage(L, msg, t.literal)
  338. var
  339. startpos, endpos: int
  340. xi: BiggestInt
  341. isBase10 = true
  342. const
  343. baseCodeChars = {'X', 'x', 'o', 'c', 'C', 'b', 'B'}
  344. literalishChars = baseCodeChars + {'A'..'F', 'a'..'f', '0'..'9', '_', '\''}
  345. floatTypes = {tkFloatLit, tkFloat32Lit, tkFloat64Lit, tkFloat128Lit}
  346. result.tokType = tkIntLit # int literal until we know better
  347. result.literal = ""
  348. result.base = base10
  349. startpos = L.bufpos
  350. tokenBegin(result, startPos)
  351. # First stage: find out base, make verifications, build token literal string
  352. if L.buf[L.bufpos] == '0' and L.buf[L.bufpos + 1] in baseCodeChars + {'O'}:
  353. isBase10 = false
  354. eatChar(L, result, '0')
  355. case L.buf[L.bufpos]
  356. of 'O':
  357. lexMessageLitNum(L, errInvalidNumberOctalCode, startpos)
  358. of 'x', 'X':
  359. eatChar(L, result, 'x')
  360. matchUnderscoreChars(L, result, {'0'..'9', 'a'..'f', 'A'..'F'})
  361. of 'o', 'c', 'C':
  362. eatChar(L, result, 'c')
  363. matchUnderscoreChars(L, result, {'0'..'7'})
  364. of 'b', 'B':
  365. eatChar(L, result, 'b')
  366. matchUnderscoreChars(L, result, {'0'..'1'})
  367. else:
  368. internalError(getLineInfo(L), "getNumber")
  369. else:
  370. matchUnderscoreChars(L, result, {'0'..'9'})
  371. if (L.buf[L.bufpos] == '.') and (L.buf[L.bufpos + 1] in {'0'..'9'}):
  372. result.tokType = tkFloatLit
  373. eatChar(L, result, '.')
  374. matchUnderscoreChars(L, result, {'0'..'9'})
  375. if L.buf[L.bufpos] in {'e', 'E'}:
  376. result.tokType = tkFloatLit
  377. eatChar(L, result, 'e')
  378. if L.buf[L.bufpos] in {'+', '-'}:
  379. eatChar(L, result)
  380. matchUnderscoreChars(L, result, {'0'..'9'})
  381. endpos = L.bufpos
  382. # Second stage, find out if there's a datatype suffix and handle it
  383. var postPos = endpos
  384. if L.buf[postPos] in {'\'', 'f', 'F', 'd', 'D', 'i', 'I', 'u', 'U'}:
  385. if L.buf[postPos] == '\'':
  386. inc(postPos)
  387. case L.buf[postPos]
  388. of 'f', 'F':
  389. inc(postPos)
  390. if (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'):
  391. result.tokType = tkFloat32Lit
  392. inc(postPos, 2)
  393. elif (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'):
  394. result.tokType = tkFloat64Lit
  395. inc(postPos, 2)
  396. elif (L.buf[postPos] == '1') and
  397. (L.buf[postPos + 1] == '2') and
  398. (L.buf[postPos + 2] == '8'):
  399. result.tokType = tkFloat128Lit
  400. inc(postPos, 3)
  401. else: # "f" alone defaults to float32
  402. result.tokType = tkFloat32Lit
  403. of 'd', 'D': # ad hoc convenience shortcut for f64
  404. inc(postPos)
  405. result.tokType = tkFloat64Lit
  406. of 'i', 'I':
  407. inc(postPos)
  408. if (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'):
  409. result.tokType = tkInt64Lit
  410. inc(postPos, 2)
  411. elif (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'):
  412. result.tokType = tkInt32Lit
  413. inc(postPos, 2)
  414. elif (L.buf[postPos] == '1') and (L.buf[postPos + 1] == '6'):
  415. result.tokType = tkInt16Lit
  416. inc(postPos, 2)
  417. elif (L.buf[postPos] == '8'):
  418. result.tokType = tkInt8Lit
  419. inc(postPos)
  420. else:
  421. lexMessageLitNum(L, errInvalidNumber, startpos)
  422. of 'u', 'U':
  423. inc(postPos)
  424. if (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'):
  425. result.tokType = tkUInt64Lit
  426. inc(postPos, 2)
  427. elif (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'):
  428. result.tokType = tkUInt32Lit
  429. inc(postPos, 2)
  430. elif (L.buf[postPos] == '1') and (L.buf[postPos + 1] == '6'):
  431. result.tokType = tkUInt16Lit
  432. inc(postPos, 2)
  433. elif (L.buf[postPos] == '8'):
  434. result.tokType = tkUInt8Lit
  435. inc(postPos)
  436. else:
  437. result.tokType = tkUIntLit
  438. else:
  439. lexMessageLitNum(L, errInvalidNumber, startpos)
  440. # Is there still a literalish char awaiting? Then it's an error!
  441. if L.buf[postPos] in literalishChars or
  442. (L.buf[postPos] == '.' and L.buf[postPos + 1] in {'0'..'9'}):
  443. lexMessageLitNum(L, errInvalidNumber, startpos)
  444. # Third stage, extract actual number
  445. L.bufpos = startpos # restore position
  446. var pos: int = startpos
  447. try:
  448. if (L.buf[pos] == '0') and (L.buf[pos + 1] in baseCodeChars):
  449. inc(pos, 2)
  450. xi = 0 # it is a base prefix
  451. case L.buf[pos - 1]
  452. of 'b', 'B':
  453. result.base = base2
  454. while pos < endpos:
  455. if L.buf[pos] != '_':
  456. xi = `shl`(xi, 1) or (ord(L.buf[pos]) - ord('0'))
  457. inc(pos)
  458. of 'o', 'c', 'C':
  459. result.base = base8
  460. while pos < endpos:
  461. if L.buf[pos] != '_':
  462. xi = `shl`(xi, 3) or (ord(L.buf[pos]) - ord('0'))
  463. inc(pos)
  464. of 'x', 'X':
  465. result.base = base16
  466. while pos < endpos:
  467. case L.buf[pos]
  468. of '_':
  469. inc(pos)
  470. of '0'..'9':
  471. xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('0'))
  472. inc(pos)
  473. of 'a'..'f':
  474. xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('a') + 10)
  475. inc(pos)
  476. of 'A'..'F':
  477. xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('A') + 10)
  478. inc(pos)
  479. else:
  480. break
  481. else:
  482. internalError(getLineInfo(L), "getNumber")
  483. case result.tokType
  484. of tkIntLit, tkInt64Lit: result.iNumber = xi
  485. of tkInt8Lit: result.iNumber = BiggestInt(int8(toU8(int(xi))))
  486. of tkInt16Lit: result.iNumber = BiggestInt(int16(toU16(int(xi))))
  487. of tkInt32Lit: result.iNumber = BiggestInt(int32(toU32(int64(xi))))
  488. of tkUIntLit, tkUInt64Lit: result.iNumber = xi
  489. of tkUInt8Lit: result.iNumber = BiggestInt(uint8(toU8(int(xi))))
  490. of tkUInt16Lit: result.iNumber = BiggestInt(uint16(toU16(int(xi))))
  491. of tkUInt32Lit: result.iNumber = BiggestInt(uint32(toU32(int64(xi))))
  492. of tkFloat32Lit:
  493. result.fNumber = (cast[PFloat32](addr(xi)))[]
  494. # note: this code is endian neutral!
  495. # XXX: Test this on big endian machine!
  496. of tkFloat64Lit, tkFloatLit:
  497. result.fNumber = (cast[PFloat64](addr(xi)))[]
  498. else: internalError(getLineInfo(L), "getNumber")
  499. # Bounds checks. Non decimal literals are allowed to overflow the range of
  500. # the datatype as long as their pattern don't overflow _bitwise_, hence
  501. # below checks of signed sizes against uint*.high is deliberate:
  502. # (0x80'u8 = 128, 0x80'i8 = -128, etc == OK)
  503. if result.tokType notin floatTypes:
  504. let outOfRange = case result.tokType:
  505. of tkUInt8Lit, tkUInt16Lit, tkUInt32Lit: result.iNumber != xi
  506. of tkInt8Lit: (xi > BiggestInt(uint8.high))
  507. of tkInt16Lit: (xi > BiggestInt(uint16.high))
  508. of tkInt32Lit: (xi > BiggestInt(uint32.high))
  509. else: false
  510. if outOfRange:
  511. #echo "out of range num: ", result.iNumber, " vs ", xi
  512. lexMessageLitNum(L, errNumberOutOfRange, startpos)
  513. else:
  514. case result.tokType
  515. of floatTypes:
  516. result.fNumber = parseFloat(result.literal)
  517. of tkUint64Lit:
  518. xi = 0
  519. let len = unsafeParseUInt(result.literal, xi)
  520. if len != result.literal.len or len == 0:
  521. raise newException(ValueError, "invalid integer: " & $xi)
  522. result.iNumber = xi
  523. else:
  524. result.iNumber = parseBiggestInt(result.literal)
  525. # Explicit bounds checks
  526. let outOfRange = case result.tokType:
  527. of tkInt8Lit: (result.iNumber < int8.low or result.iNumber > int8.high)
  528. of tkUInt8Lit: (result.iNumber < BiggestInt(uint8.low) or
  529. result.iNumber > BiggestInt(uint8.high))
  530. of tkInt16Lit: (result.iNumber < int16.low or result.iNumber > int16.high)
  531. of tkUInt16Lit: (result.iNumber < BiggestInt(uint16.low) or
  532. result.iNumber > BiggestInt(uint16.high))
  533. of tkInt32Lit: (result.iNumber < int32.low or result.iNumber > int32.high)
  534. of tkUInt32Lit: (result.iNumber < BiggestInt(uint32.low) or
  535. result.iNumber > BiggestInt(uint32.high))
  536. else: false
  537. if outOfRange: lexMessageLitNum(L, errNumberOutOfRange, startpos)
  538. # Promote int literal to int64? Not always necessary, but more consistent
  539. if result.tokType == tkIntLit:
  540. if (result.iNumber < low(int32)) or (result.iNumber > high(int32)):
  541. result.tokType = tkInt64Lit
  542. except ValueError:
  543. lexMessageLitNum(L, errInvalidNumber, startpos)
  544. except OverflowError, RangeError:
  545. lexMessageLitNum(L, errNumberOutOfRange, startpos)
  546. tokenEnd(result, postPos-1)
  547. L.bufpos = postPos
  548. proc handleHexChar(L: var TLexer, xi: var int) =
  549. case L.buf[L.bufpos]
  550. of '0'..'9':
  551. xi = (xi shl 4) or (ord(L.buf[L.bufpos]) - ord('0'))
  552. inc(L.bufpos)
  553. of 'a'..'f':
  554. xi = (xi shl 4) or (ord(L.buf[L.bufpos]) - ord('a') + 10)
  555. inc(L.bufpos)
  556. of 'A'..'F':
  557. xi = (xi shl 4) or (ord(L.buf[L.bufpos]) - ord('A') + 10)
  558. inc(L.bufpos)
  559. else: discard
  560. proc handleDecChars(L: var TLexer, xi: var int) =
  561. while L.buf[L.bufpos] in {'0'..'9'}:
  562. xi = (xi * 10) + (ord(L.buf[L.bufpos]) - ord('0'))
  563. inc(L.bufpos)
  564. proc getEscapedChar(L: var TLexer, tok: var TToken) =
  565. inc(L.bufpos) # skip '\'
  566. case L.buf[L.bufpos]
  567. of 'n', 'N':
  568. if tok.tokType == tkCharLit: lexMessage(L, errNnotAllowedInCharacter)
  569. add(tok.literal, tnl)
  570. inc(L.bufpos)
  571. of 'r', 'R', 'c', 'C':
  572. add(tok.literal, CR)
  573. inc(L.bufpos)
  574. of 'l', 'L':
  575. add(tok.literal, LF)
  576. inc(L.bufpos)
  577. of 'f', 'F':
  578. add(tok.literal, FF)
  579. inc(L.bufpos)
  580. of 'e', 'E':
  581. add(tok.literal, ESC)
  582. inc(L.bufpos)
  583. of 'a', 'A':
  584. add(tok.literal, BEL)
  585. inc(L.bufpos)
  586. of 'b', 'B':
  587. add(tok.literal, BACKSPACE)
  588. inc(L.bufpos)
  589. of 'v', 'V':
  590. add(tok.literal, VT)
  591. inc(L.bufpos)
  592. of 't', 'T':
  593. add(tok.literal, '\t')
  594. inc(L.bufpos)
  595. of '\'', '\"':
  596. add(tok.literal, L.buf[L.bufpos])
  597. inc(L.bufpos)
  598. of '\\':
  599. add(tok.literal, '\\')
  600. inc(L.bufpos)
  601. of 'x', 'X', 'u', 'U':
  602. var tp = L.buf[L.bufpos]
  603. inc(L.bufpos)
  604. var xi = 0
  605. handleHexChar(L, xi)
  606. handleHexChar(L, xi)
  607. if tp in {'u', 'U'}:
  608. handleHexChar(L, xi)
  609. handleHexChar(L, xi)
  610. # inlined toUTF-8 to avoid unicode and strutils dependencies.
  611. if xi <=% 127:
  612. add(tok.literal, xi.char )
  613. elif xi <=% 0x07FF:
  614. add(tok.literal, ((xi shr 6) or 0b110_00000).char )
  615. add(tok.literal, ((xi and ones(6)) or 0b10_0000_00).char )
  616. elif xi <=% 0xFFFF:
  617. add(tok.literal, (xi shr 12 or 0b1110_0000).char )
  618. add(tok.literal, (xi shr 6 and ones(6) or 0b10_0000_00).char )
  619. add(tok.literal, (xi and ones(6) or 0b10_0000_00).char )
  620. else: # value is 0xFFFF
  621. add(tok.literal, "\xef\xbf\xbf" )
  622. else:
  623. add(tok.literal, chr(xi))
  624. of '0'..'9':
  625. if matchTwoChars(L, '0', {'0'..'9'}):
  626. lexMessage(L, warnOctalEscape)
  627. var xi = 0
  628. handleDecChars(L, xi)
  629. if (xi <= 255): add(tok.literal, chr(xi))
  630. else: lexMessage(L, errInvalidCharacterConstant)
  631. else: lexMessage(L, errInvalidCharacterConstant)
  632. proc newString(s: cstring, len: int): string =
  633. ## XXX, how come there is no support for this?
  634. result = newString(len)
  635. for i in 0 .. <len:
  636. result[i] = s[i]
  637. proc handleCRLF(L: var TLexer, pos: int): int =
  638. template registerLine =
  639. let col = L.getColNumber(pos)
  640. if col > MaxLineLength:
  641. lexMessagePos(L, hintLineTooLong, pos)
  642. if optEmbedOrigSrc in gGlobalOptions:
  643. let lineStart = cast[ByteAddress](L.buf) + L.lineStart
  644. let line = newString(cast[cstring](lineStart), col)
  645. addSourceLine(L.fileIdx, line)
  646. case L.buf[pos]
  647. of CR:
  648. registerLine()
  649. result = nimlexbase.handleCR(L, pos)
  650. of LF:
  651. registerLine()
  652. result = nimlexbase.handleLF(L, pos)
  653. else: result = pos
  654. proc getString(L: var TLexer, tok: var TToken, rawMode: bool) =
  655. var pos = L.bufpos
  656. var buf = L.buf # put `buf` in a register
  657. var line = L.lineNumber # save linenumber for better error message
  658. tokenBegin(tok, pos)
  659. inc pos # skip "
  660. if buf[pos] == '\"' and buf[pos+1] == '\"':
  661. tok.tokType = tkTripleStrLit # long string literal:
  662. inc(pos, 2) # skip ""
  663. # skip leading newline:
  664. if buf[pos] in {' ', '\t'}:
  665. var newpos = pos+1
  666. while buf[newpos] in {' ', '\t'}: inc newpos
  667. if buf[newpos] in {CR, LF}: pos = newpos
  668. pos = handleCRLF(L, pos)
  669. buf = L.buf
  670. while true:
  671. case buf[pos]
  672. of '\"':
  673. if buf[pos+1] == '\"' and buf[pos+2] == '\"' and
  674. buf[pos+3] != '\"':
  675. tokenEndIgnore(tok, pos+2)
  676. L.bufpos = pos + 3 # skip the three """
  677. break
  678. add(tok.literal, '\"')
  679. inc(pos)
  680. of CR, LF:
  681. tokenEndIgnore(tok, pos)
  682. pos = handleCRLF(L, pos)
  683. buf = L.buf
  684. add(tok.literal, tnl)
  685. of nimlexbase.EndOfFile:
  686. tokenEndIgnore(tok, pos)
  687. var line2 = L.lineNumber
  688. L.lineNumber = line
  689. lexMessagePos(L, errClosingTripleQuoteExpected, L.lineStart)
  690. L.lineNumber = line2
  691. L.bufpos = pos
  692. break
  693. else:
  694. add(tok.literal, buf[pos])
  695. inc(pos)
  696. else:
  697. # ordinary string literal
  698. if rawMode: tok.tokType = tkRStrLit
  699. else: tok.tokType = tkStrLit
  700. while true:
  701. var c = buf[pos]
  702. if c == '\"':
  703. if rawMode and buf[pos+1] == '\"':
  704. inc(pos, 2)
  705. add(tok.literal, '"')
  706. else:
  707. tokenEndIgnore(tok, pos)
  708. inc(pos) # skip '"'
  709. break
  710. elif c in {CR, LF, nimlexbase.EndOfFile}:
  711. tokenEndIgnore(tok, pos)
  712. lexMessage(L, errClosingQuoteExpected)
  713. break
  714. elif (c == '\\') and not rawMode:
  715. L.bufpos = pos
  716. getEscapedChar(L, tok)
  717. pos = L.bufpos
  718. else:
  719. add(tok.literal, c)
  720. inc(pos)
  721. L.bufpos = pos
  722. proc getCharacter(L: var TLexer, tok: var TToken) =
  723. tokenBegin(tok, L.bufpos)
  724. inc(L.bufpos) # skip '
  725. var c = L.buf[L.bufpos]
  726. case c
  727. of '\0'..pred(' '), '\'': lexMessage(L, errInvalidCharacterConstant)
  728. of '\\': getEscapedChar(L, tok)
  729. else:
  730. tok.literal = $c
  731. inc(L.bufpos)
  732. if L.buf[L.bufpos] != '\'': lexMessage(L, errMissingFinalQuote)
  733. tokenEndIgnore(tok, L.bufpos)
  734. inc(L.bufpos) # skip '
  735. proc getSymbol(L: var TLexer, tok: var TToken) =
  736. var h: Hash = 0
  737. var pos = L.bufpos
  738. var buf = L.buf
  739. tokenBegin(tok, pos)
  740. while true:
  741. var c = buf[pos]
  742. case c
  743. of 'a'..'z', '0'..'9', '\x80'..'\xFF':
  744. h = h !& ord(c)
  745. inc(pos)
  746. of 'A'..'Z':
  747. c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
  748. h = h !& ord(c)
  749. inc(pos)
  750. of '_':
  751. if buf[pos+1] notin SymChars:
  752. lexMessage(L, errInvalidToken, "_")
  753. break
  754. inc(pos)
  755. else: break
  756. tokenEnd(tok, pos-1)
  757. h = !$h
  758. tok.ident = L.cache.getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h)
  759. L.bufpos = pos
  760. if (tok.ident.id < ord(tokKeywordLow) - ord(tkSymbol)) or
  761. (tok.ident.id > ord(tokKeywordHigh) - ord(tkSymbol)):
  762. tok.tokType = tkSymbol
  763. else:
  764. tok.tokType = TTokType(tok.ident.id + ord(tkSymbol))
  765. proc endOperator(L: var TLexer, tok: var TToken, pos: int,
  766. hash: Hash) {.inline.} =
  767. var h = !$hash
  768. tok.ident = L.cache.getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h)
  769. if (tok.ident.id < oprLow) or (tok.ident.id > oprHigh): tok.tokType = tkOpr
  770. else: tok.tokType = TTokType(tok.ident.id - oprLow + ord(tkColon))
  771. L.bufpos = pos
  772. proc getOperator(L: var TLexer, tok: var TToken) =
  773. var pos = L.bufpos
  774. var buf = L.buf
  775. tokenBegin(tok, pos)
  776. var h: Hash = 0
  777. while true:
  778. var c = buf[pos]
  779. if c notin OpChars: break
  780. h = h !& ord(c)
  781. inc(pos)
  782. endOperator(L, tok, pos, h)
  783. tokenEnd(tok, pos-1)
  784. # advance pos but don't store it in L.bufpos so the next token (which might
  785. # be an operator too) gets the preceding spaces:
  786. tok.strongSpaceB = 0
  787. while buf[pos] == ' ':
  788. inc pos
  789. inc tok.strongSpaceB
  790. if buf[pos] in {CR, LF, nimlexbase.EndOfFile}:
  791. tok.strongSpaceB = -1
  792. proc skipMultiLineComment(L: var TLexer; tok: var TToken; start: int;
  793. isDoc: bool) =
  794. var pos = start
  795. var buf = L.buf
  796. var toStrip = 0
  797. tokenBegin(tok, pos)
  798. # detect the amount of indentation:
  799. if isDoc:
  800. toStrip = getColNumber(L, pos)
  801. while buf[pos] == ' ': inc pos
  802. if buf[pos] in {CR, LF}:
  803. pos = handleCRLF(L, pos)
  804. buf = L.buf
  805. toStrip = 0
  806. while buf[pos] == ' ':
  807. inc pos
  808. inc toStrip
  809. var nesting = 0
  810. while true:
  811. case buf[pos]
  812. of '#':
  813. if isDoc:
  814. if buf[pos+1] == '#' and buf[pos+2] == '[':
  815. inc nesting
  816. tok.literal.add '#'
  817. elif buf[pos+1] == '[':
  818. inc nesting
  819. inc pos
  820. of ']':
  821. if isDoc:
  822. if buf[pos+1] == '#' and buf[pos+2] == '#':
  823. if nesting == 0:
  824. tokenEndIgnore(tok, pos+2)
  825. inc(pos, 3)
  826. break
  827. dec nesting
  828. tok.literal.add ']'
  829. elif buf[pos+1] == '#':
  830. if nesting == 0:
  831. tokenEndIgnore(tok, pos+1)
  832. inc(pos, 2)
  833. break
  834. dec nesting
  835. inc pos
  836. of CR, LF:
  837. tokenEndIgnore(tok, pos)
  838. pos = handleCRLF(L, pos)
  839. buf = L.buf
  840. # strip leading whitespace:
  841. when defined(nimpretty): tok.literal.add "\L"
  842. if isDoc:
  843. when not defined(nimpretty): tok.literal.add "\n"
  844. inc tok.iNumber
  845. var c = toStrip
  846. while buf[pos] == ' ' and c > 0:
  847. inc pos
  848. dec c
  849. of nimlexbase.EndOfFile:
  850. tokenEndIgnore(tok, pos)
  851. lexMessagePos(L, errGenerated, pos, "end of multiline comment expected")
  852. break
  853. else:
  854. if isDoc or defined(nimpretty): tok.literal.add buf[pos]
  855. inc(pos)
  856. L.bufpos = pos
  857. proc scanComment(L: var TLexer, tok: var TToken) =
  858. var pos = L.bufpos
  859. var buf = L.buf
  860. tok.tokType = tkComment
  861. # iNumber contains the number of '\n' in the token
  862. tok.iNumber = 0
  863. assert buf[pos+1] == '#'
  864. if buf[pos+2] == '[':
  865. skipMultiLineComment(L, tok, pos+3, true)
  866. return
  867. tokenBegin(tok, pos)
  868. inc(pos, 2)
  869. var toStrip = 0
  870. while buf[pos] == ' ':
  871. inc pos
  872. inc toStrip
  873. while true:
  874. var lastBackslash = -1
  875. while buf[pos] notin {CR, LF, nimlexbase.EndOfFile}:
  876. if buf[pos] == '\\': lastBackslash = pos+1
  877. add(tok.literal, buf[pos])
  878. inc(pos)
  879. tokenEndIgnore(tok, pos)
  880. pos = handleCRLF(L, pos)
  881. buf = L.buf
  882. var indent = 0
  883. while buf[pos] == ' ':
  884. inc(pos)
  885. inc(indent)
  886. if buf[pos] == '#' and buf[pos+1] == '#':
  887. tok.literal.add "\n"
  888. inc(pos, 2)
  889. var c = toStrip
  890. while buf[pos] == ' ' and c > 0:
  891. inc pos
  892. dec c
  893. inc tok.iNumber
  894. else:
  895. if buf[pos] > ' ':
  896. L.indentAhead = indent
  897. tokenEndIgnore(tok, pos)
  898. break
  899. L.bufpos = pos
  900. proc skip(L: var TLexer, tok: var TToken) =
  901. var pos = L.bufpos
  902. var buf = L.buf
  903. tokenBegin(tok, pos)
  904. tok.strongSpaceA = 0
  905. while true:
  906. case buf[pos]
  907. of ' ':
  908. inc(pos)
  909. inc(tok.strongSpaceA)
  910. of '\t':
  911. if not L.allowTabs: lexMessagePos(L, errTabulatorsAreNotAllowed, pos)
  912. inc(pos)
  913. of CR, LF:
  914. tokenEndPrevious(tok, pos)
  915. pos = handleCRLF(L, pos)
  916. buf = L.buf
  917. var indent = 0
  918. while true:
  919. if buf[pos] == ' ':
  920. inc(pos)
  921. inc(indent)
  922. elif buf[pos] == '#' and buf[pos+1] == '[':
  923. skipMultiLineComment(L, tok, pos+2, false)
  924. pos = L.bufpos
  925. buf = L.buf
  926. else:
  927. break
  928. tok.strongSpaceA = 0
  929. if buf[pos] > ' ' and (buf[pos] != '#' or buf[pos+1] == '#'):
  930. tok.indent = indent
  931. L.currLineIndent = indent
  932. break
  933. of '#':
  934. # do not skip documentation comment:
  935. if buf[pos+1] == '#': break
  936. when defined(nimpretty):
  937. tok.commentOffsetA = L.offsetBase + pos
  938. if buf[pos+1] == '[':
  939. skipMultiLineComment(L, tok, pos+2, false)
  940. pos = L.bufpos
  941. buf = L.buf
  942. when defined(nimpretty):
  943. tok.commentOffsetB = L.offsetBase + pos
  944. else:
  945. tokenBegin(tok, pos)
  946. while buf[pos] notin {CR, LF, nimlexbase.EndOfFile}: inc(pos)
  947. tokenEndIgnore(tok, pos+1)
  948. when defined(nimpretty):
  949. tok.commentOffsetB = L.offsetBase + pos + 1
  950. else:
  951. break # EndOfFile also leaves the loop
  952. tokenEndPrevious(tok, pos-1)
  953. L.bufpos = pos
  954. when defined(nimpretty):
  955. if gIndentationWidth <= 0:
  956. gIndentationWidth = tok.indent
  957. proc rawGetTok*(L: var TLexer, tok: var TToken) =
  958. template atTokenEnd() {.dirty.} =
  959. when defined(nimsuggest):
  960. # we attach the cursor to the last *strong* token
  961. if tok.tokType notin weakTokens:
  962. L.previousToken.line = tok.line.int16
  963. L.previousToken.col = tok.col.int16
  964. when defined(nimsuggest):
  965. L.cursor = CursorPosition.None
  966. fillToken(tok)
  967. if L.indentAhead >= 0:
  968. tok.indent = L.indentAhead
  969. L.currLineIndent = L.indentAhead
  970. L.indentAhead = -1
  971. else:
  972. tok.indent = -1
  973. skip(L, tok)
  974. var c = L.buf[L.bufpos]
  975. tok.line = L.lineNumber
  976. tok.col = getColNumber(L, L.bufpos)
  977. if c in SymStartChars - {'r', 'R'}:
  978. getSymbol(L, tok)
  979. else:
  980. case c
  981. of '#':
  982. scanComment(L, tok)
  983. of '*':
  984. # '*:' is unfortunately a special case, because it is two tokens in
  985. # 'var v*: int'.
  986. if L.buf[L.bufpos+1] == ':' and L.buf[L.bufpos+2] notin OpChars:
  987. var h = 0 !& ord('*')
  988. endOperator(L, tok, L.bufpos+1, h)
  989. else:
  990. getOperator(L, tok)
  991. of ',':
  992. tok.tokType = tkComma
  993. inc(L.bufpos)
  994. of 'r', 'R':
  995. if L.buf[L.bufpos + 1] == '\"':
  996. inc(L.bufpos)
  997. getString(L, tok, true)
  998. else:
  999. getSymbol(L, tok)
  1000. of '(':
  1001. inc(L.bufpos)
  1002. if L.buf[L.bufpos] == '.' and L.buf[L.bufpos+1] != '.':
  1003. tok.tokType = tkParDotLe
  1004. inc(L.bufpos)
  1005. else:
  1006. tok.tokType = tkParLe
  1007. when defined(nimsuggest):
  1008. if L.fileIdx == gTrackPos.fileIndex and tok.col < gTrackPos.col and
  1009. tok.line == gTrackPos.line and gIdeCmd == ideCon:
  1010. gTrackPos.col = tok.col.int16
  1011. of ')':
  1012. tok.tokType = tkParRi
  1013. inc(L.bufpos)
  1014. of '[':
  1015. inc(L.bufpos)
  1016. if L.buf[L.bufpos] == '.' and L.buf[L.bufpos+1] != '.':
  1017. tok.tokType = tkBracketDotLe
  1018. inc(L.bufpos)
  1019. else:
  1020. tok.tokType = tkBracketLe
  1021. of ']':
  1022. tok.tokType = tkBracketRi
  1023. inc(L.bufpos)
  1024. of '.':
  1025. when defined(nimsuggest):
  1026. if L.fileIdx == gTrackPos.fileIndex and tok.col+1 == gTrackPos.col and
  1027. tok.line == gTrackPos.line and gIdeCmd == ideSug:
  1028. tok.tokType = tkDot
  1029. L.cursor = CursorPosition.InToken
  1030. gTrackPos.col = tok.col.int16
  1031. inc(L.bufpos)
  1032. atTokenEnd()
  1033. return
  1034. if L.buf[L.bufpos+1] == ']':
  1035. tok.tokType = tkBracketDotRi
  1036. inc(L.bufpos, 2)
  1037. elif L.buf[L.bufpos+1] == '}':
  1038. tok.tokType = tkCurlyDotRi
  1039. inc(L.bufpos, 2)
  1040. elif L.buf[L.bufpos+1] == ')':
  1041. tok.tokType = tkParDotRi
  1042. inc(L.bufpos, 2)
  1043. else:
  1044. getOperator(L, tok)
  1045. of '{':
  1046. inc(L.bufpos)
  1047. if L.buf[L.bufpos] == '.' and L.buf[L.bufpos+1] != '.':
  1048. tok.tokType = tkCurlyDotLe
  1049. inc(L.bufpos)
  1050. else:
  1051. tok.tokType = tkCurlyLe
  1052. of '}':
  1053. tok.tokType = tkCurlyRi
  1054. inc(L.bufpos)
  1055. of ';':
  1056. tok.tokType = tkSemiColon
  1057. inc(L.bufpos)
  1058. of '`':
  1059. tok.tokType = tkAccent
  1060. inc(L.bufpos)
  1061. of '_':
  1062. inc(L.bufpos)
  1063. if L.buf[L.bufpos] notin SymChars+{'_'}:
  1064. tok.tokType = tkSymbol
  1065. tok.ident = L.cache.getIdent("_")
  1066. else:
  1067. tok.literal = $c
  1068. tok.tokType = tkInvalid
  1069. lexMessage(L, errInvalidToken, c & " (\\" & $(ord(c)) & ')')
  1070. of '\"':
  1071. # check for extended raw string literal:
  1072. var rawMode = L.bufpos > 0 and L.buf[L.bufpos-1] in SymChars
  1073. getString(L, tok, rawMode)
  1074. if rawMode:
  1075. # tkRStrLit -> tkGStrLit
  1076. # tkTripleStrLit -> tkGTripleStrLit
  1077. inc(tok.tokType, 2)
  1078. of '\'':
  1079. tok.tokType = tkCharLit
  1080. getCharacter(L, tok)
  1081. tok.tokType = tkCharLit
  1082. of '0'..'9':
  1083. getNumber(L, tok)
  1084. let c = L.buf[L.bufpos]
  1085. if c in SymChars+{'_'}:
  1086. lexMessage(L, errInvalidToken, c & " (\\" & $(ord(c)) & ')')
  1087. else:
  1088. if c in OpChars:
  1089. getOperator(L, tok)
  1090. elif c == nimlexbase.EndOfFile:
  1091. tok.tokType = tkEof
  1092. tok.indent = 0
  1093. else:
  1094. tok.literal = $c
  1095. tok.tokType = tkInvalid
  1096. lexMessage(L, errInvalidToken, c & " (\\" & $(ord(c)) & ')')
  1097. inc(L.bufpos)
  1098. atTokenEnd()