highlighter.lua 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. local highlighter = {}
  2. local function push_token(t, type, text)
  3. local prev_type = t[#t-1]
  4. local prev_text = t[#t]
  5. if prev_type and (prev_type == type or prev_text:find("^%s*$")) then
  6. t[#t-1] = type
  7. t[#t] = prev_text .. text
  8. else
  9. table.insert(t, type)
  10. table.insert(t, text)
  11. end
  12. end
  13. local function is_escaped(text, idx, esc)
  14. local byte = esc:byte()
  15. local count = 0
  16. for i = idx - 1, 1, -1 do
  17. if text:byte(i) ~= byte then break end
  18. count = count + 1
  19. end
  20. return count % 2 == 1
  21. end
  22. local function find_non_escaped(text, pattern, offset, esc)
  23. while true do
  24. local s, e = text:find(pattern, offset)
  25. if not s then break end
  26. if esc and is_escaped(text, s, esc) then
  27. offset = e + 1
  28. else
  29. return s, e
  30. end
  31. end
  32. end
  33. function highlighter.tokenize(syntax, text, state)
  34. local res = {}
  35. local i = 1
  36. while i <= #text do
  37. -- continue trying to match the end pattern of a pair if we have a state set
  38. if state then
  39. local p = syntax.patterns[state]
  40. local s, e = find_non_escaped(text, p.pattern[2], i, p.pattern[3])
  41. if s then
  42. push_token(res, p.type, text:sub(i, e))
  43. state = nil
  44. i = e + 1
  45. else
  46. push_token(res, p.type, text:sub(i))
  47. break
  48. end
  49. end
  50. -- find matching pattern
  51. local matched = false
  52. for n, p in ipairs(syntax.patterns) do
  53. local pattern = (type(p.pattern) == "table") and p.pattern[1] or p.pattern
  54. local s, e = text:find("^" .. pattern, i)
  55. if s then
  56. -- matched pattern; make and add token
  57. local t = text:sub(s, e)
  58. push_token(res, syntax.symbols[t] or p.type, t)
  59. -- update state if this was a start|end pattern pair
  60. if type(p.pattern) == "table" then
  61. state = n
  62. end
  63. -- move cursor past this token
  64. i = e + 1
  65. matched = true
  66. break
  67. end
  68. end
  69. -- consume character if we didn't match
  70. if not matched then
  71. push_token(res, "normal", text:sub(i, i))
  72. i = i + 1
  73. end
  74. end
  75. return res, state
  76. end
  77. local function iter(t, i)
  78. i = i + 2
  79. local type, text = t[i], t[i+1]
  80. if type then
  81. return i, type, text
  82. end
  83. end
  84. function highlighter.each_token(t)
  85. return iter, t, -1
  86. end
  87. return highlighter