util.lua 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. -- TODO(justinmk): move most of this to `vim.text`.
  2. local fmt = string.format
  3. --- @class nvim.util.MDNode
  4. --- @field [integer] nvim.util.MDNode
  5. --- @field type string
  6. --- @field text? string
  7. local INDENTATION = 4
  8. local NBSP = string.char(160)
  9. local M = {}
  10. local function contains(t, xs)
  11. return vim.tbl_contains(xs, t)
  12. end
  13. -- Map of api_level:version, by inspection of:
  14. -- :lua= vim.mpack.decode(vim.fn.readfile('test/functional/fixtures/api_level_9.mpack','B')).version
  15. M.version_level = {
  16. [13] = '0.11.0',
  17. [12] = '0.10.0',
  18. [11] = '0.9.0',
  19. [10] = '0.8.0',
  20. [9] = '0.7.0',
  21. [8] = '0.6.0',
  22. [7] = '0.5.0',
  23. [6] = '0.4.0',
  24. [5] = '0.3.2',
  25. [4] = '0.3.0',
  26. [3] = '0.2.1',
  27. [2] = '0.2.0',
  28. [1] = '0.1.0',
  29. }
  30. --- @param txt string
  31. --- @param srow integer
  32. --- @param scol integer
  33. --- @param erow? integer
  34. --- @param ecol? integer
  35. --- @return string
  36. local function slice_text(txt, srow, scol, erow, ecol)
  37. local lines = vim.split(txt, '\n')
  38. if srow == erow then
  39. return lines[srow + 1]:sub(scol + 1, ecol)
  40. end
  41. if erow then
  42. -- Trim the end
  43. for _ = erow + 2, #lines do
  44. table.remove(lines, #lines)
  45. end
  46. end
  47. -- Trim the start
  48. for _ = 1, srow do
  49. table.remove(lines, 1)
  50. end
  51. lines[1] = lines[1]:sub(scol + 1)
  52. lines[#lines] = lines[#lines]:sub(1, ecol)
  53. return table.concat(lines, '\n')
  54. end
  55. --- @param text string
  56. --- @return nvim.util.MDNode
  57. local function parse_md_inline(text)
  58. local parser = vim.treesitter.languagetree.new(text, 'markdown_inline')
  59. local root = parser:parse(true)[1]:root()
  60. --- @param node TSNode
  61. --- @return nvim.util.MDNode?
  62. local function extract(node)
  63. local ntype = node:type()
  64. if ntype:match('^%p$') then
  65. return
  66. end
  67. --- @type table<any,any>
  68. local ret = { type = ntype }
  69. ret.text = vim.treesitter.get_node_text(node, text)
  70. local row, col = 0, 0
  71. for child, child_field in node:iter_children() do
  72. local e = extract(child)
  73. if e and ntype == 'inline' then
  74. local srow, scol = child:start()
  75. if (srow == row and scol > col) or srow > row then
  76. local t = slice_text(ret.text, row, col, srow, scol)
  77. if t and t ~= '' then
  78. table.insert(ret, { type = 'text', j = true, text = t })
  79. end
  80. end
  81. row, col = child:end_()
  82. end
  83. if child_field then
  84. ret[child_field] = e
  85. else
  86. table.insert(ret, e)
  87. end
  88. end
  89. if ntype == 'inline' and (row > 0 or col > 0) then
  90. local t = slice_text(ret.text, row, col)
  91. if t and t ~= '' then
  92. table.insert(ret, { type = 'text', text = t })
  93. end
  94. end
  95. return ret
  96. end
  97. return extract(root) or {}
  98. end
  99. --- @param text string
  100. --- @return nvim.util.MDNode
  101. local function parse_md(text)
  102. local parser = vim.treesitter.languagetree.new(text, 'markdown', {
  103. injections = { markdown = '' },
  104. })
  105. local root = parser:parse(true)[1]:root()
  106. local EXCLUDE_TEXT_TYPE = {
  107. list = true,
  108. list_item = true,
  109. section = true,
  110. document = true,
  111. fenced_code_block = true,
  112. fenced_code_block_delimiter = true,
  113. }
  114. --- @param node TSNode
  115. --- @return nvim.util.MDNode?
  116. local function extract(node)
  117. local ntype = node:type()
  118. if ntype:match('^%p$') or contains(ntype, { 'block_continuation' }) then
  119. return
  120. end
  121. --- @type table<any,any>
  122. local ret = { type = ntype }
  123. if not EXCLUDE_TEXT_TYPE[ntype] then
  124. ret.text = vim.treesitter.get_node_text(node, text)
  125. end
  126. if ntype == 'inline' then
  127. ret = parse_md_inline(ret.text)
  128. end
  129. for child, child_field in node:iter_children() do
  130. local e = extract(child)
  131. if child_field then
  132. ret[child_field] = e
  133. else
  134. table.insert(ret, e)
  135. end
  136. end
  137. return ret
  138. end
  139. return extract(root) or {}
  140. end
  141. --- Prefixes each line in `text`.
  142. ---
  143. --- Does not wrap, not important for "meta" files? (You probably want md_to_vimdoc instead.)
  144. ---
  145. --- @param text string
  146. --- @param prefix_ string
  147. function M.prefix_lines(prefix_, text)
  148. local r = ''
  149. for _, l in ipairs(vim.split(text, '\n', { plain = true })) do
  150. r = r .. vim.trim(prefix_ .. l) .. '\n'
  151. end
  152. return r
  153. end
  154. --- @param x string
  155. --- @param start_indent integer
  156. --- @param indent integer
  157. --- @param text_width integer
  158. --- @return string
  159. function M.wrap(x, start_indent, indent, text_width)
  160. local words = vim.split(vim.trim(x), '%s+')
  161. local parts = { string.rep(' ', start_indent) } --- @type string[]
  162. local count = indent
  163. for i, w in ipairs(words) do
  164. if count > indent and count + #w > text_width - 1 then
  165. parts[#parts + 1] = '\n'
  166. parts[#parts + 1] = string.rep(' ', indent)
  167. count = indent
  168. elseif i ~= 1 then
  169. parts[#parts + 1] = ' '
  170. count = count + 1
  171. end
  172. count = count + #w
  173. parts[#parts + 1] = w
  174. end
  175. return (table.concat(parts):gsub('%s+\n', '\n'):gsub('\n+$', ''))
  176. end
  177. --- @param node nvim.util.MDNode
  178. --- @param start_indent integer
  179. --- @param indent integer
  180. --- @param text_width integer
  181. --- @param level integer
  182. --- @return string[]
  183. local function render_md(node, start_indent, indent, text_width, level, is_list)
  184. local parts = {} --- @type string[]
  185. -- For debugging
  186. local add_tag = false
  187. -- local add_tag = true
  188. local ntype = node.type
  189. if add_tag then
  190. parts[#parts + 1] = '<' .. ntype .. '>'
  191. end
  192. if ntype == 'text' then
  193. parts[#parts + 1] = node.text
  194. elseif ntype == 'html_tag' then
  195. error('html_tag: ' .. node.text)
  196. elseif ntype == 'inline_link' then
  197. vim.list_extend(parts, { '*', node[1].text, '*' })
  198. elseif ntype == 'shortcut_link' then
  199. if node[1].text:find('^<.*>$') then
  200. parts[#parts + 1] = node[1].text
  201. elseif node[1].text:find('^%d+$') then
  202. vim.list_extend(parts, { '[', node[1].text, ']' })
  203. else
  204. vim.list_extend(parts, { '|', node[1].text, '|' })
  205. end
  206. elseif ntype == 'backslash_escape' then
  207. parts[#parts + 1] = node.text
  208. elseif ntype == 'emphasis' then
  209. parts[#parts + 1] = node.text:sub(2, -2)
  210. elseif ntype == 'code_span' then
  211. vim.list_extend(parts, { '`', node.text:sub(2, -2):gsub(' ', NBSP), '`' })
  212. elseif ntype == 'inline' then
  213. if #node == 0 then
  214. local text = assert(node.text)
  215. parts[#parts + 1] = M.wrap(text, start_indent, indent, text_width)
  216. else
  217. for _, child in ipairs(node) do
  218. vim.list_extend(parts, render_md(child, start_indent, indent, text_width, level + 1))
  219. end
  220. end
  221. elseif ntype == 'paragraph' then
  222. local pparts = {}
  223. for _, child in ipairs(node) do
  224. vim.list_extend(pparts, render_md(child, start_indent, indent, text_width, level + 1))
  225. end
  226. parts[#parts + 1] = M.wrap(table.concat(pparts), start_indent, indent, text_width)
  227. parts[#parts + 1] = '\n'
  228. elseif ntype == 'code_fence_content' then
  229. local lines = vim.split(node.text:gsub('\n%s*$', ''), '\n')
  230. local cindent = indent + INDENTATION
  231. if level > 3 then
  232. -- The tree-sitter markdown parser doesn't parse the code blocks indents
  233. -- correctly in lists. Fudge it!
  234. lines[1] = ' ' .. lines[1] -- ¯\_(ツ)_/¯
  235. cindent = indent - level
  236. local _, initial_indent = lines[1]:find('^%s*')
  237. initial_indent = initial_indent + cindent
  238. if initial_indent < indent then
  239. cindent = indent - INDENTATION
  240. end
  241. end
  242. for _, l in ipairs(lines) do
  243. if #l > 0 then
  244. parts[#parts + 1] = string.rep(' ', cindent)
  245. parts[#parts + 1] = l
  246. end
  247. parts[#parts + 1] = '\n'
  248. end
  249. elseif ntype == 'fenced_code_block' then
  250. parts[#parts + 1] = '>'
  251. for _, child in ipairs(node) do
  252. if child.type == 'info_string' then
  253. parts[#parts + 1] = child.text
  254. break
  255. end
  256. end
  257. parts[#parts + 1] = '\n'
  258. for _, child in ipairs(node) do
  259. if child.type ~= 'info_string' then
  260. vim.list_extend(parts, render_md(child, start_indent, indent, text_width, level + 1))
  261. end
  262. end
  263. parts[#parts + 1] = '<\n'
  264. elseif ntype == 'html_block' then
  265. local text = node.text:gsub('^<pre>help', '')
  266. text = text:gsub('</pre>%s*$', '')
  267. parts[#parts + 1] = text
  268. elseif ntype == 'list_marker_dot' then
  269. parts[#parts + 1] = node.text
  270. elseif contains(ntype, { 'list_marker_minus', 'list_marker_star' }) then
  271. parts[#parts + 1] = '• '
  272. elseif ntype == 'list_item' then
  273. parts[#parts + 1] = string.rep(' ', indent)
  274. local offset = node[1].type == 'list_marker_dot' and 3 or 2
  275. for i, child in ipairs(node) do
  276. local sindent = i <= 2 and 0 or (indent + offset)
  277. vim.list_extend(
  278. parts,
  279. render_md(child, sindent, indent + offset, text_width, level + 1, true)
  280. )
  281. end
  282. else
  283. if node.text then
  284. error(fmt('cannot render:\n%s', vim.inspect(node)))
  285. end
  286. for i, child in ipairs(node) do
  287. local start_indent0 = i == 1 and start_indent or indent
  288. vim.list_extend(
  289. parts,
  290. render_md(child, start_indent0, indent, text_width, level + 1, is_list)
  291. )
  292. if ntype ~= 'list' and i ~= #node then
  293. if (node[i + 1] or {}).type ~= 'list' then
  294. parts[#parts + 1] = '\n'
  295. end
  296. end
  297. end
  298. end
  299. if add_tag then
  300. parts[#parts + 1] = '</' .. ntype .. '>'
  301. end
  302. return parts
  303. end
  304. --- @param text_width integer
  305. local function align_tags(text_width)
  306. --- @param line string
  307. --- @return string
  308. return function(line)
  309. local tag_pat = '%s*(%*.+%*)%s*$'
  310. local tags = {}
  311. for m in line:gmatch(tag_pat) do
  312. table.insert(tags, m)
  313. end
  314. if #tags > 0 then
  315. line = line:gsub(tag_pat, '')
  316. local tags_str = ' ' .. table.concat(tags, ' ')
  317. --- @type integer
  318. local conceal_offset = select(2, tags_str:gsub('%*', '')) - 2
  319. local pad = string.rep(' ', text_width - #line - #tags_str + conceal_offset)
  320. return line .. pad .. tags_str
  321. end
  322. return line
  323. end
  324. end
  325. --- @param text string
  326. --- @param start_indent integer
  327. --- @param indent integer
  328. --- @param is_list? boolean
  329. --- @return string
  330. function M.md_to_vimdoc(text, start_indent, indent, text_width, is_list)
  331. -- Add an extra newline so the parser can properly capture ending ```
  332. local parsed = parse_md(text .. '\n')
  333. local ret = render_md(parsed, start_indent, indent, text_width, 0, is_list)
  334. local lines = vim.split(table.concat(ret):gsub(NBSP, ' '), '\n')
  335. lines = vim.tbl_map(align_tags(text_width), lines)
  336. local s = table.concat(lines, '\n')
  337. -- Reduce whitespace in code-blocks
  338. s = s:gsub('\n+%s*>([a-z]+)\n', ' >%1\n')
  339. s = s:gsub('\n+%s*>\n?\n', ' >\n')
  340. return s
  341. end
  342. return M