highlighter.lua 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. local highlighter = {}
  2. local function push_token(t, type, text)
  3. local prev_type = t[#t-1]
  4. local prev_text = t[#t]
  5. if prev_type and (prev_type == type or prev_text:find("^%s*$")) then
  6. t[#t-1] = type
  7. t[#t] = prev_text .. text
  8. else
  9. table.insert(t, type)
  10. table.insert(t, text)
  11. end
  12. end
  13. local function is_escaped(text, idx, esc)
  14. local byte = esc:byte()
  15. local count = 0
  16. for i = idx - 1, 1, -1 do
  17. if text:byte(i) ~= byte then break end
  18. count = count + 1
  19. end
  20. return count % 2 == 1
  21. end
  22. local function find_non_escaped(text, pattern, offset, esc, find)
  23. while true do
  24. local s, e = find(text, pattern, offset)
  25. if not s then break end
  26. if esc and is_escaped(text, s, esc) then
  27. offset = e + 1
  28. else
  29. return s, e
  30. end
  31. end
  32. end
  33. function highlighter.tokenize(syntax, text, state)
  34. local res = {}
  35. local i = 1
  36. if syntax.restore_state then
  37. syntax.restore_state(state)
  38. end
  39. local find = syntax.match_fn or string.find
  40. while i <= #text do
  41. -- continue trying to match the end pattern of a pair if we have a state set
  42. if state then
  43. local p = syntax.patterns[state.idx]
  44. local s, e = find_non_escaped(text, p.pattern[2], i, p.pattern[3], find)
  45. if s then
  46. push_token(res, p.type, text:sub(i, e))
  47. state = nil
  48. i = e + 1
  49. else
  50. push_token(res, p.type, text:sub(i))
  51. break
  52. end
  53. end
  54. -- find matching pattern
  55. local matched = false
  56. for n, p in ipairs(syntax.patterns) do
  57. local pattern = (type(p.pattern) == "table") and p.pattern[1] or p.pattern
  58. local s, e = find(text, "^" .. pattern, i)
  59. if s then
  60. -- matched pattern; make and add token
  61. local t = text:sub(s, e)
  62. push_token(res, syntax.symbols[t] or p.type, t)
  63. -- update state if this was a start|end pattern pair
  64. if type(p.pattern) == "table" then
  65. state = {idx = n}
  66. end
  67. -- move cursor past this token
  68. i = e + 1
  69. matched = true
  70. break
  71. end
  72. end
  73. -- consume character if we didn't match
  74. if not matched then
  75. push_token(res, "normal", text:sub(i, i))
  76. i = i + 1
  77. end
  78. end
  79. if syntax.save_state then
  80. syntax.save_state(state)
  81. end
  82. return res, state
  83. end
  84. local function iter(t, i)
  85. i = i + 2
  86. local type, text = t[i], t[i+1]
  87. if type then
  88. return i, type, text
  89. end
  90. end
  91. function highlighter.each_token(t)
  92. return iter, t, -1
  93. end
  94. return highlighter