clexer.nim 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736
  1. #
  2. #
  3. # C Optimizer
  4. # (c) Copyright 2020 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module implements a C scanner for our C optimizer.
  10. ## Keywords are not handled here, because there is no need.
  11. import std / memfiles
  12. import ".." / compiler / [options, llstream, msgs, lineinfos, pathutils]
  13. const
  14. SymChars = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\x80'..'\xFF'}
  15. SymStartChars = {'a'..'z', 'A'..'Z', '_', '\x80'..'\xFF'}
  16. type
  17. TokenKind* = enum
  18. tkInvalid, tkEof,
  19. tkMacroParam, # fake token: macro parameter (with its index)
  20. tkMacroParamToStr, # macro parameter (with its index) applied to the
  21. # toString operator (#) in a #define: #param
  22. tkStarComment, # /* */ comment
  23. tkLineComment, # // comment
  24. tkWhitespace,
  25. tkDirective, # #define, etc.
  26. tkDirConc, # ##
  27. tkNewLine, # newline: end of directive
  28. tkAmp, # &
  29. tkAmpAmp, # &&
  30. tkAmpAsgn, # &=
  31. tkAmpAmpAsgn, # &&=
  32. tkBar, # |
  33. tkBarBar, # ||
  34. tkBarAsgn, # |=
  35. tkBarBarAsgn, # ||=
  36. tkNot, # !
  37. tkPlusPlus, # ++
  38. tkMinusMinus, # --
  39. tkPlus, # +
  40. tkPlusAsgn, # +=
  41. tkMinus, # -
  42. tkMinusAsgn, # -=
  43. tkMod, # %
  44. tkModAsgn, # %=
  45. tkSlash, # /
  46. tkSlashAsgn, # /=
  47. tkStar, # *
  48. tkStarAsgn, # *=
  49. tkHat, # ^
  50. tkHatAsgn, # ^=
  51. tkAsgn, # =
  52. tkEquals, # ==
  53. tkDot, # .
  54. tkDotDotDot, # ...
  55. tkLe, # <=
  56. tkLt, # <
  57. tkGe, # >=
  58. tkGt, # >
  59. tkNeq, # !=
  60. tkConditional, # ?
  61. tkShl, # <<
  62. tkShlAsgn, # <<=
  63. tkShr, # >>
  64. tkShrAsgn, # >>=
  65. tkTilde, # ~
  66. tkTildeAsgn, # ~=
  67. tkArrow, # ->
  68. tkArrowStar, # ->*
  69. tkScope, # ::
  70. tkLit,
  71. tkSymbol, # a symbol
  72. tkParLe, tkBracketLe, tkCurlyLe, # this order is important
  73. tkParRi, tkBracketRi, tkCurlyRi, # for macro argument parsing!
  74. tkComma, tkSemiColon, tkColon,
  75. tkAngleRi # '>' but determined to be the end of a
  76. # template's angle bracket
  77. type
  78. Token* = object
  79. kind*: TokenKind # the type of the token
  80. s*: string # parsed symbol, integer, char or string literal
  81. Lexer* = object
  82. f: MemFile
  83. buf: cstring
  84. bufpos: int
  85. fileIdx*: FileIndex
  86. inDirective, debugMode*: bool
  87. config: ConfigRef
  88. proc fillToken(L: var Token) =
  89. L.kind = tkInvalid
  90. L.s.setLen 0
  91. proc openLexer*(lex: var Lexer, filename: AbsoluteFile, inputstream: PLLStream;
  92. config: ConfigRef) =
  93. #openBaseLexer(lex, inputstream, 2*1024*1024)
  94. lex.f = memfiles.open(filename.string)
  95. lex.fileIdx = fileInfoIdx(config, filename)
  96. lex.config = config
  97. lex.bufpos = 0
  98. lex.buf = cast[cstring](lex.f.mem)
  99. proc closeLexer*(lex: var Lexer) =
  100. close lex.f
  101. #closeBaseLexer(lex)
  102. template myadd(a, b): untyped =
  103. add(a, b)
  104. when false:
  105. proc getColumn*(L: Lexer): int =
  106. result = getColNumber(L, L.bufPos)
  107. proc getLineInfo*(L: Lexer): TLineInfo =
  108. result = newLineInfo(L.fileIdx, L.linenumber, getColNumber(L, L.bufpos))
  109. proc lexMessage*(L: Lexer, msg: TMsgKind, arg = "") =
  110. if L.debugMode: writeStackTrace()
  111. msgs.globalError(L.config, getLineInfo(L), msg, arg)
  112. proc lexMessagePos(L: var Lexer, msg: TMsgKind, pos: int, arg = "") =
  113. var info = newLineInfo(L.fileIdx, L.linenumber, pos - L.lineStart)
  114. if L.debugMode: writeStackTrace()
  115. msgs.globalError(L.config, info, msg, arg)
  116. proc tokKindToStr*(k: TokenKind): string =
  117. case k
  118. of tkEof: result = "[EOF]"
  119. of tkInvalid: result = "[invalid]"
  120. of tkMacroParam, tkMacroParamToStr: result = "[macro param]"
  121. of tkStarComment, tkLineComment: result = "[comment]"
  122. of tkLit: result = "[literal]"
  123. of tkWhitespace: result = "[whitespace]"
  124. of tkDirective: result = "#" # #define, etc.
  125. of tkDirConc: result = "##"
  126. of tkNewLine: result = "[NewLine]"
  127. of tkAmp: result = "&" # &
  128. of tkAmpAmp: result = "&&" # &&
  129. of tkAmpAsgn: result = "&=" # &=
  130. of tkAmpAmpAsgn: result = "&&=" # &&=
  131. of tkBar: result = "|" # |
  132. of tkBarBar: result = "||" # ||
  133. of tkBarAsgn: result = "|=" # |=
  134. of tkBarBarAsgn: result = "||=" # ||=
  135. of tkNot: result = "!" # !
  136. of tkPlusPlus: result = "++" # ++
  137. of tkMinusMinus: result = "--" # --
  138. of tkPlus: result = "+" # +
  139. of tkPlusAsgn: result = "+=" # +=
  140. of tkMinus: result = "-" # -
  141. of tkMinusAsgn: result = "-=" # -=
  142. of tkMod: result = "%" # %
  143. of tkModAsgn: result = "%=" # %=
  144. of tkSlash: result = "/" # /
  145. of tkSlashAsgn: result = "/=" # /=
  146. of tkStar: result = "*" # *
  147. of tkStarAsgn: result = "*=" # *=
  148. of tkHat: result = "^" # ^
  149. of tkHatAsgn: result = "^=" # ^=
  150. of tkAsgn: result = "=" # =
  151. of tkEquals: result = "==" # ==
  152. of tkDot: result = "." # .
  153. of tkDotDotDot: result = "..." # ...
  154. of tkLe: result = "<=" # <=
  155. of tkLt: result = "<" # <
  156. of tkGe: result = ">=" # >=
  157. of tkGt: result = ">" # >
  158. of tkNeq: result = "!=" # !=
  159. of tkConditional: result = "?"
  160. of tkShl: result = "<<"
  161. of tkShlAsgn: result = "<<="
  162. of tkShr: result = ">>"
  163. of tkShrAsgn: result = ">>="
  164. of tkTilde: result = "~"
  165. of tkTildeAsgn: result = "~="
  166. of tkArrow: result = "->"
  167. of tkArrowStar: result = "->*"
  168. of tkScope: result = "::"
  169. of tkSymbol: result = "[identifier]"
  170. of tkParLe: result = "("
  171. of tkParRi: result = ")"
  172. of tkBracketLe: result = "["
  173. of tkBracketRi: result = "]"
  174. of tkComma: result = ","
  175. of tkSemiColon: result = ";"
  176. of tkColon: result = ":"
  177. of tkCurlyLe: result = "{"
  178. of tkCurlyRi: result = "}"
  179. of tkAngleRi: result = "> [end of template]"
  180. proc `$`*(tok: Token): string =
  181. case tok.kind
  182. of tkSymbol, tkInvalid, tkStarComment, tkLineComment, tkLit, tkNewLine, tkWhitespace:
  183. result = tok.s
  184. else: result = tokKindToStr(tok.kind)
  185. proc debugTok*(L: Lexer; tok: Token): string =
  186. result = $tok
  187. if L.debugMode: result.add(" (" & $tok.kind & ")")
  188. proc printTok*(tok: Token) =
  189. writeLine(stdout, $tok)
  190. proc matchUnderscoreChars(L: var Lexer, tok: var Token, chars: set[char]) =
  191. # matches ([chars]_)*
  192. var pos = L.bufpos # use registers for pos, buf
  193. var buf = L.buf
  194. while true:
  195. if buf[pos] in chars:
  196. myadd(tok.s, buf[pos])
  197. inc(pos)
  198. else:
  199. break
  200. if buf[pos] == '_':
  201. myadd(tok.s, '_')
  202. inc(pos)
  203. L.bufPos = pos
  204. when false:
  205. proc getNumber(L: var Lexer, tok: var Token) =
  206. var pos = L.bufpos + 2 # skip 0b
  207. while true:
  208. case L.buf[pos]
  209. of 'A'..'Z', 'a'..'z', '0'..'9', '.', '_':
  210. myadd(tok.s, L.buf[pos])
  211. inc(pos)
  212. else: break
  213. L.bufpos = pos
  214. proc getFloating(L: var Lexer, tok: var Token) =
  215. matchUnderscoreChars(L, tok, {'0'..'9'})
  216. if L.buf[L.bufpos] in {'e', 'E'}:
  217. myadd(tok.s, L.buf[L.bufpos])
  218. inc(L.bufpos)
  219. if L.buf[L.bufpos] in {'+', '-'}:
  220. myadd(tok.s, L.buf[L.bufpos])
  221. inc(L.bufpos)
  222. matchUnderscoreChars(L, tok, {'0'..'9'})
  223. proc getNumber(L: var Lexer, tok: var Token) =
  224. tok.kind = tkLit
  225. if L.buf[L.bufpos] == '.':
  226. myadd(tok.s, '.')
  227. inc(L.bufpos)
  228. getFloating(L, tok)
  229. else:
  230. matchUnderscoreChars(L, tok, {'0'..'9'})
  231. if L.buf[L.bufpos] in {'.','e','E'}:
  232. if L.buf[L.bufpos] == '.':
  233. myadd(tok.s, L.buf[L.bufpos])
  234. inc(L.bufpos)
  235. getFloating(L, tok)
  236. # ignore type suffix:
  237. while L.buf[L.bufpos] in {'A'..'Z', 'a'..'z'}:
  238. myadd(tok.s, L.buf[L.bufpos])
  239. inc(L.bufpos)
  240. proc handleCRLF(L: var Lexer, pos: int): int =
  241. result = pos+1
  242. if pos >= L.f.size:
  243. L.buf = "\0"
  244. L.bufpos = 0
  245. L.f.size = 0
  246. result = 0
  247. proc escape(L: var Lexer, tok: var Token, allowEmpty=false) =
  248. myadd(tok.s, L.buf[L.bufpos])
  249. inc(L.bufpos) # skip \
  250. case L.buf[L.bufpos]
  251. of 'b', 'B', 't', 'T', 'n', 'N', 'f', 'F', 'r', 'R', '\'', '"', '\\':
  252. myadd(tok.s, L.buf[L.bufpos])
  253. inc(L.bufpos)
  254. of '0'..'7':
  255. myadd(tok.s, L.buf[L.bufpos])
  256. inc(L.bufpos)
  257. if L.buf[L.bufpos] in {'0'..'7'}:
  258. myadd(tok.s, L.buf[L.bufpos])
  259. inc(L.bufpos)
  260. if L.buf[L.bufpos] in {'0'..'7'}:
  261. myadd(tok.s, L.buf[L.bufpos])
  262. inc(L.bufpos)
  263. of 'x':
  264. myadd(tok.s, L.buf[L.bufpos])
  265. inc(L.bufpos)
  266. while true:
  267. case L.buf[L.bufpos]
  268. of '0'..'9', 'a'..'f', 'A'..'F':
  269. myadd(tok.s, L.buf[L.bufpos])
  270. inc(L.bufpos)
  271. else:
  272. break
  273. else: discard
  274. #elif not allowEmpty:
  275. # lexMessage(L, errGenerated, "invalid character constant")
  276. proc getCharLit(L: var Lexer, tok: var Token) =
  277. myadd(tok.s, L.buf[L.bufpos])
  278. inc(L.bufpos)
  279. if L.buf[L.bufpos] == '\\':
  280. escape(L, tok)
  281. else:
  282. myadd(tok.s, L.buf[L.bufpos])
  283. inc(L.bufpos)
  284. if L.buf[L.bufpos] == '\'':
  285. myadd(tok.s, L.buf[L.bufpos])
  286. inc(L.bufpos)
  287. else:
  288. discard
  289. #lexMessage(L, errGenerated, "missing closing single quote")
  290. tok.kind = tkLit
  291. proc getString(L: var Lexer, tok: var Token) =
  292. myadd(tok.s, L.buf[L.bufpos])
  293. var pos = L.bufPos + 1 # skip "
  294. var buf = L.buf # put `buf` in a register
  295. #var line = L.linenumber # save linenumber for better error message
  296. while true:
  297. case buf[pos]
  298. of '\"':
  299. myadd(tok.s, buf[pos])
  300. inc(pos)
  301. break
  302. of '\10':
  303. myadd(tok.s, buf[pos])
  304. pos = handleCRLF(L, pos)
  305. buf = L.buf
  306. of '\13':
  307. myadd(tok.s, buf[pos])
  308. pos = handleCRLF(L, pos)
  309. buf = L.buf
  310. of '\0':
  311. #var line2 = L.linenumber
  312. #L.lineNumber = line
  313. #lexMessagePos(L, errGenerated, L.lineStart, "closing \" expected, but end of file reached")
  314. #L.lineNumber = line2
  315. break
  316. of '\\':
  317. # we allow an empty \ for line concatenation, but we don't require it
  318. # for line concatenation
  319. L.bufpos = pos
  320. escape(L, tok, allowEmpty=true)
  321. pos = L.bufpos
  322. else:
  323. myadd(tok.s, buf[pos])
  324. inc(pos)
  325. L.bufpos = pos
  326. tok.kind = tkLit
  327. when false:
  328. const
  329. intrin = "<x86intrin.h>"
  330. {.localPassC: "-msse4.2".}
  331. type
  332. M128i {.importc: "__m128i", header: intrin, bycopy.} = object
  333. const
  334. SIDD_CMP_RANGES = 0b0000_0100'i32
  335. proc mm_loadu_si128(p: pointer): M128i {.importc: "_mm_loadu_si128", header: intrin.}
  336. proc mm_cmpestri(a: M128i; alen: int32; b: M128i; blen: int32;
  337. options: int32): int32 {.importc: "_mm_cmpestri", header: intrin.}
  338. template `+!`(p: pointer, s: int): pointer =
  339. cast[pointer](cast[int](p) +% s)
  340. proc inSillyRanges(c: char; ranges: string): bool =
  341. # Since C did win nobody knows anymore how to represent set[char] properly so
  342. # we have to do this crap for SSE4.2.
  343. var i = 0
  344. while i < ranges.len:
  345. if ranges[i] <= c and c <= ranges[i+1]: return true
  346. inc i, 2
  347. proc scan(haystack: string; ranges: string): int =
  348. result = 0
  349. if haystack.len >= 16:
  350. let ranges16 = mm_loadu_si128(unsafeAddr(ranges[0]))
  351. var left = haystack.len and (not 15)
  352. var buf = cast[pointer](unsafeAddr haystack[0])
  353. while true:
  354. let b16 = mm_loadu_si128(buf)
  355. let r = mm_cmpestri(ranges16, ranges.len.int32, b16, 16, SIDD_CMP_RANGES)
  356. inc result, r
  357. if r != 16:
  358. return result
  359. buf = buf +! 16
  360. dec left, 16
  361. if left == 0: break
  362. else:
  363. for i in 0 ..< haystack.len:
  364. if haystack[i].inSillyRanges(ranges):
  365. return i
  366. result = -1
  367. proc getSymbol(L: var Lexer, tok: var Token) =
  368. var pos = L.bufpos
  369. var buf = L.buf
  370. while true:
  371. var c = buf[pos]
  372. # speed hack, parse 4 letters at once:
  373. if c in SymChars and buf[pos+1] in SymChars and buf[pos+2] in SymChars and buf[pos+3] in SymChars:
  374. let L = tok.s.len
  375. setLen(tok.s, L+4)
  376. tok.s[L] = c
  377. tok.s[L+1] = buf[pos+1]
  378. tok.s[L+2] = buf[pos+2]
  379. tok.s[L+3] = buf[pos+3]
  380. inc(pos, 4)
  381. else:
  382. if c notin SymChars: break
  383. myadd(tok.s, c)
  384. inc(pos)
  385. L.bufpos = pos
  386. tok.kind = tkSymbol
  387. proc scanLineComment(L: var Lexer, tok: var Token) =
  388. var pos = L.bufpos
  389. var buf = L.buf
  390. # a comment ends if the next line does not start with the // on the same
  391. # column after only whitespace
  392. tok.kind = tkLineComment
  393. #var col = getColNumber(L, pos)
  394. while true:
  395. myadd(tok.s, buf[pos])
  396. myadd(tok.s, buf[pos+1])
  397. inc(pos, 2) # skip //
  398. #myadd(tok.s, '#')
  399. while buf[pos] notin {'\13', '\10'}:
  400. myadd(tok.s, buf[pos])
  401. inc(pos)
  402. myadd(tok.s, buf[pos])
  403. pos = handleCRLF(L, pos)
  404. buf = L.buf
  405. while buf[pos] == ' ':
  406. myadd(tok.s, buf[pos])
  407. inc(pos)
  408. if buf[pos] == '/' and buf[pos+1] == '/':
  409. discard
  410. else:
  411. break
  412. #while tok.s.len > 0 and tok.s[^1] in {'\t', ' '}: setLen(tok.s, tok.s.len-1)
  413. L.bufpos = pos
  414. proc scanStarComment(L: var Lexer, tok: var Token) =
  415. var pos = L.bufpos
  416. var buf = L.buf
  417. tok.s = ""
  418. tok.kind = tkStarComment
  419. while true:
  420. case buf[pos]
  421. of '\13', '\10':
  422. myadd(tok.s, buf[pos])
  423. pos = handleCRLF(L, pos)
  424. buf = L.buf
  425. of '*':
  426. myadd(tok.s, buf[pos])
  427. inc(pos)
  428. if buf[pos] == '/':
  429. myadd(tok.s, buf[pos])
  430. inc(pos)
  431. break
  432. of '\0':
  433. #lexMessage(L, errGenerated, "expected closing '*/'")
  434. break
  435. else:
  436. myadd(tok.s, buf[pos])
  437. inc(pos)
  438. # strip trailing whitespace
  439. #while tok.s.len > 0 and tok.s[^1] in {'\t', ' '}: setLen(tok.s, tok.s.len-1)
  440. L.bufpos = pos
  441. proc skip(L: var Lexer, tok: var Token) =
  442. var pos = L.bufpos
  443. var buf = L.buf
  444. while true:
  445. case buf[pos]
  446. of '\\':
  447. # Ignore \ line continuation characters when not inDirective
  448. myadd(tok.s, buf[pos])
  449. inc(pos)
  450. if L.inDirective:
  451. while buf[pos] in {' ', '\t'}:
  452. myadd(tok.s, buf[pos])
  453. inc(pos)
  454. if buf[pos] in {'\13', '\10'}:
  455. myadd(tok.s, buf[pos])
  456. pos = handleCRLF(L, pos)
  457. buf = L.buf
  458. of ' ', '\t':
  459. myadd(tok.s, buf[pos])
  460. inc(pos) # newline is special:
  461. of '\13', '\10':
  462. myadd(tok.s, buf[pos])
  463. pos = handleCRLF(L, pos)
  464. buf = L.buf
  465. if L.inDirective:
  466. tok.kind = tkNewLine
  467. L.inDirective = false
  468. else:
  469. break # EndOfFile also leaves the loop
  470. L.bufpos = pos
  471. proc getDirective(L: var Lexer, tok: var Token) =
  472. var pos = L.bufpos + 1
  473. var buf = L.buf
  474. myadd(tok.s, buf[pos-1])
  475. when false:
  476. while buf[pos] in {' ', '\t'}:
  477. myadd(tok.s, buf[pos])
  478. inc(pos)
  479. while buf[pos] in SymChars:
  480. myadd(tok.s, buf[pos])
  481. inc(pos)
  482. L.bufpos = pos
  483. tok.kind = tkDirective
  484. L.inDirective = true
  485. proc getTok*(L: var Lexer, tok: var Token) =
  486. tok.kind = tkInvalid
  487. fillToken(tok)
  488. let pos = L.bufpos
  489. skip(L, tok)
  490. if tok.kind == tkNewLine: return
  491. if L.bufpos != pos:
  492. tok.kind = tkWhitespace
  493. return
  494. var c = L.buf[L.bufpos]
  495. if c in SymStartChars:
  496. getSymbol(L, tok)
  497. elif c in {'0'..'9'} or (c == '.' and L.buf[L.bufpos+1] in {'0'..'9'}):
  498. getNumber(L, tok)
  499. else:
  500. case c
  501. of ';':
  502. tok.kind = tkSemicolon
  503. inc(L.bufpos)
  504. of '/':
  505. if L.buf[L.bufpos + 1] == '/':
  506. scanLineComment(L, tok)
  507. elif L.buf[L.bufpos+1] == '*':
  508. scanStarComment(L, tok)
  509. elif L.buf[L.bufpos+1] == '=':
  510. inc(L.bufpos, 2)
  511. tok.kind = tkSlashAsgn
  512. else:
  513. tok.kind = tkSlash
  514. inc(L.bufpos)
  515. of ',':
  516. tok.kind = tkComma
  517. inc(L.bufpos)
  518. of '(':
  519. inc(L.bufpos)
  520. tok.kind = tkParLe
  521. of '*':
  522. inc(L.bufpos)
  523. if L.buf[L.bufpos] == '=':
  524. inc(L.bufpos)
  525. tok.kind = tkStarAsgn
  526. else:
  527. tok.kind = tkStar
  528. of ')':
  529. inc(L.bufpos)
  530. tok.kind = tkParRi
  531. of '[':
  532. inc(L.bufpos)
  533. tok.kind = tkBracketLe
  534. of ']':
  535. inc(L.bufpos)
  536. tok.kind = tkBracketRi
  537. of '.':
  538. inc(L.bufpos)
  539. if L.buf[L.bufpos] == '.' and L.buf[L.bufpos+1] == '.':
  540. tok.kind = tkDotDotDot
  541. inc(L.bufpos, 2)
  542. else:
  543. tok.kind = tkDot
  544. of '{':
  545. inc(L.bufpos)
  546. tok.kind = tkCurlyLe
  547. of '}':
  548. inc(L.bufpos)
  549. tok.kind = tkCurlyRi
  550. of '+':
  551. inc(L.bufpos)
  552. if L.buf[L.bufpos] == '=':
  553. tok.kind = tkPlusAsgn
  554. inc(L.bufpos)
  555. elif L.buf[L.bufpos] == '+':
  556. tok.kind = tkPlusPlus
  557. inc(L.bufpos)
  558. else:
  559. tok.kind = tkPlus
  560. of '-':
  561. inc(L.bufpos)
  562. case L.buf[L.bufpos]
  563. of '>':
  564. tok.kind = tkArrow
  565. inc(L.bufpos)
  566. if L.buf[L.bufpos] == '*':
  567. tok.kind = tkArrowStar
  568. inc(L.bufpos)
  569. of '=':
  570. tok.kind = tkMinusAsgn
  571. inc(L.bufpos)
  572. of '-':
  573. tok.kind = tkMinusMinus
  574. inc(L.bufpos)
  575. else:
  576. tok.kind = tkMinus
  577. of '?':
  578. inc(L.bufpos)
  579. tok.kind = tkConditional
  580. of ':':
  581. inc(L.bufpos)
  582. if L.buf[L.bufpos] == ':':
  583. tok.kind = tkScope
  584. inc(L.bufpos)
  585. else:
  586. tok.kind = tkColon
  587. of '!':
  588. inc(L.bufpos)
  589. if L.buf[L.bufpos] == '=':
  590. tok.kind = tkNeq
  591. inc(L.bufpos)
  592. else:
  593. tok.kind = tkNot
  594. of '<':
  595. inc(L.bufpos)
  596. if L.buf[L.bufpos] == '=':
  597. inc(L.bufpos)
  598. tok.kind = tkLe
  599. elif L.buf[L.bufpos] == '<':
  600. inc(L.bufpos)
  601. if L.buf[L.bufpos] == '=':
  602. inc(L.bufpos)
  603. tok.kind = tkShlAsgn
  604. else:
  605. tok.kind = tkShl
  606. else:
  607. tok.kind = tkLt
  608. of '>':
  609. inc(L.bufpos)
  610. if L.buf[L.bufpos] == '=':
  611. inc(L.bufpos)
  612. tok.kind = tkGe
  613. elif L.buf[L.bufpos] == '>':
  614. inc(L.bufpos)
  615. if L.buf[L.bufpos] == '=':
  616. inc(L.bufpos)
  617. tok.kind = tkShrAsgn
  618. else:
  619. tok.kind = tkShr
  620. else:
  621. tok.kind = tkGt
  622. of '=':
  623. inc(L.bufpos)
  624. if L.buf[L.bufpos] == '=':
  625. tok.kind = tkEquals
  626. inc(L.bufpos)
  627. else:
  628. tok.kind = tkAsgn
  629. of '&':
  630. inc(L.bufpos)
  631. if L.buf[L.bufpos] == '=':
  632. tok.kind = tkAmpAsgn
  633. inc(L.bufpos)
  634. elif L.buf[L.bufpos] == '&':
  635. inc(L.bufpos)
  636. if L.buf[L.bufpos] == '=':
  637. inc(L.bufpos)
  638. tok.kind = tkAmpAmpAsgn
  639. else:
  640. tok.kind = tkAmpAmp
  641. else:
  642. tok.kind = tkAmp
  643. of '|':
  644. inc(L.bufpos)
  645. if L.buf[L.bufpos] == '=':
  646. tok.kind = tkBarAsgn
  647. inc(L.bufpos)
  648. elif L.buf[L.bufpos] == '|':
  649. inc(L.bufpos)
  650. if L.buf[L.bufpos] == '=':
  651. inc(L.bufpos)
  652. tok.kind = tkBarBarAsgn
  653. else:
  654. tok.kind = tkBarBar
  655. else:
  656. tok.kind = tkBar
  657. of '^':
  658. inc(L.bufpos)
  659. if L.buf[L.bufpos] == '=':
  660. tok.kind = tkHatAsgn
  661. inc(L.bufpos)
  662. else:
  663. tok.kind = tkHat
  664. of '%':
  665. inc(L.bufpos)
  666. if L.buf[L.bufpos] == '=':
  667. tok.kind = tkModAsgn
  668. inc(L.bufpos)
  669. else:
  670. tok.kind = tkMod
  671. of '~':
  672. inc(L.bufpos)
  673. if L.buf[L.bufpos] == '=':
  674. tok.kind = tkTildeAsgn
  675. inc(L.bufpos)
  676. else:
  677. tok.kind = tkTilde
  678. of '#':
  679. if L.buf[L.bufpos+1] == '#':
  680. inc(L.bufpos, 2)
  681. tok.kind = tkDirConc
  682. else:
  683. getDirective(L, tok)
  684. of '"': getString(L, tok)
  685. of '\'': getCharLit(L, tok)
  686. of '\0':
  687. tok.kind = tkEof
  688. else:
  689. tok.s = $c
  690. tok.kind = tkInvalid
  691. #lexMessage(L, errGenerated, "invalid token " & c & " (\\" & $(ord(c)) & ')')
  692. inc(L.bufpos)