gen_help_html.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
  1. # Converts Vim/Nvim documentation to HTML.
  2. #
  3. # USAGE:
  4. # 1. python3 scripts/gen_help_html.py runtime/doc/ ~/neovim.github.io/t/
  5. # 3. cd ~/neovim.github.io/ && jekyll serve --host 0.0.0.0
  6. # 2. Visit http://localhost:4000/t/help.txt.html
  7. #
  8. # Adapted from https://github.com/c4rlo/vimhelp/
  9. # License: MIT
  10. #
  11. # Copyright (c) 2016 Carlo Teubner
  12. #
  13. # Permission is hereby granted, free of charge, to any person obtaining a copy
  14. # of this software and associated documentation files (the "Software"), to deal
  15. # in the Software without restriction, including without limitation the rights
  16. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  17. # copies of the Software, and to permit persons to whom the Software is
  18. # furnished to do so, subject to the following conditions:
  19. #
  20. # The above copyright notice and this permission notice shall be included in
  21. # all copies or substantial portions of the Software.
  22. #
  23. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  26. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  27. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  28. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  29. # SOFTWARE.
  30. import os
  31. import re
  32. import urllib.parse
  33. import datetime
  34. import sys
  35. from itertools import chain
  36. HEAD = """\
  37. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
  38. "http://www.w3.org/TR/html4/loose.dtd">
  39. <html>
  40. <head>
  41. <meta http-equiv="Content-type" content="text/html; charset={encoding}"/>
  42. <style>
  43. .h {{
  44. font-weight: bold;
  45. }}
  46. h1 {{
  47. font-family: sans-serif;
  48. }}
  49. pre {{
  50. font-family: sans-serif;
  51. }}
  52. </style>
  53. <title>Nvim: {filename}</title>
  54. """
  55. HEAD_END = '</head>\n<body>\n'
  56. INTRO = """
  57. <h1>Nvim help files</h1>
  58. <p>
  59. <a href="https://neovim.io/">Nvim</a> help pages{vers-note}.
  60. Updated <a href="https://github.com/neovim/bot-ci" class="d">automatically</a>
  61. from the <a href="https://github.com/neovim/neovim" class="d">Nvim source</a>.
  62. </p>
  63. """
  64. VERSION_NOTE = ", current as of Nvim {version}"
  65. SITENAVI_LINKS = """
  66. <a href="quickref.txt.html">Quick reference</a> &middot;
  67. <a href="usr_toc.txt.html">User manual</a> &middot;
  68. <a href="{helptxt}#reference_toc">Reference manual</a> &middot;
  69. """
  70. SITENAVI_LINKS_PLAIN = SITENAVI_LINKS.format(helptxt='help.txt.html')
  71. SITENAVI_LINKS_WEB = SITENAVI_LINKS.format(helptxt='/')
  72. SITENAVI_PLAIN = '<p>' + SITENAVI_LINKS_PLAIN + '</p>'
  73. SITENAVI_WEB = '<p>' + SITENAVI_LINKS_WEB + '</p>'
  74. SITENAVI_SEARCH = '<table width="100%"><tbody><tr><td>' + SITENAVI_LINKS_WEB + \
  75. '</td><td style="text-align: right; max-width: 25vw"><div class="gcse-searchbox">' \
  76. '</div></td></tr></tbody></table><div class="gcse-searchresults"></div>'
  77. TEXTSTART = """
  78. <div id="d1">
  79. <pre id="sp">""" + (" " * 80) + """</pre>
  80. <div id="d2">
  81. <pre>
  82. """
  83. FOOTER = '</pre>'
  84. FOOTER2 = """
  85. <p id="footer">Generated {generated_date} from <code>{commit}</code></p>
  86. </div>
  87. </div>
  88. </body>
  89. </html>
  90. """.format(
  91. generated_date='{0:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()),
  92. commit='?')
  93. RE_TAGLINE = re.compile(r'(\S+)\s+(\S+)')
  94. PAT_WORDCHAR = '[!#-)+-{}~\xC0-\xFF]'
  95. PAT_HEADER = r'(^.*~$)'
  96. PAT_GRAPHIC = r'(^.* `$)'
  97. PAT_PIPEWORD = r'(?<!\\)\|([#-)!+-~]+)\|'
  98. PAT_STARWORD = r'\*([#-)!+-~]+)\*(?:(?=\s)|$)'
  99. PAT_COMMAND = r'`([^` ]+)`'
  100. PAT_OPTWORD = r"('(?:[a-z]{2,}|t_..)')"
  101. PAT_CTRL = r'(CTRL-(?:W_)?(?:\{char\}|<[A-Za-z]+?>|.)?)'
  102. PAT_SPECIAL = r'(<.+?>|\{.+?}|' \
  103. r'\[(?:range|line|count|offset|\+?cmd|[-+]?num|\+\+opt|' \
  104. r'arg|arguments|ident|addr|group)]|' \
  105. r'(?<=\s)\[[-a-z^A-Z0-9_]{2,}])'
  106. PAT_TITLE = r'(Vim version [0-9.a-z]+|VIM REFERENCE.*)'
  107. PAT_NOTE = r'((?<!' + PAT_WORDCHAR + r')(?:note|NOTE|Notes?):?' \
  108. r'(?!' + PAT_WORDCHAR + r'))'
  109. PAT_URL = r'((?:https?|ftp)://[^\'"<> \t]+[a-zA-Z0-9/])'
  110. PAT_WORD = r'((?<!' + PAT_WORDCHAR + r')' + PAT_WORDCHAR + r'+' \
  111. r'(?!' + PAT_WORDCHAR + r'))'
  112. RE_LINKWORD = re.compile(
  113. PAT_OPTWORD + '|' +
  114. PAT_CTRL + '|' +
  115. PAT_SPECIAL)
  116. RE_TAGWORD = re.compile(
  117. PAT_HEADER + '|' +
  118. PAT_GRAPHIC + '|' +
  119. PAT_PIPEWORD + '|' +
  120. PAT_STARWORD + '|' +
  121. PAT_COMMAND + '|' +
  122. PAT_OPTWORD + '|' +
  123. PAT_CTRL + '|' +
  124. PAT_SPECIAL + '|' +
  125. PAT_TITLE + '|' +
  126. PAT_NOTE + '|' +
  127. PAT_URL + '|' +
  128. PAT_WORD)
  129. RE_NEWLINE = re.compile(r'[\r\n]')
  130. # H1 header "=====…"
  131. # H2 header "-----…"
  132. RE_HRULE = re.compile(r'[-=]{3,}.*[-=]{3,3}$')
  133. RE_EG_START = re.compile(r'(?:.* )?>$')
  134. RE_EG_END = re.compile(r'\S')
  135. RE_SECTION = re.compile(r'[-A-Z .][-A-Z0-9 .()]*(?=\s+\*)')
  136. RE_STARTAG = re.compile(r'\s\*([^ \t|]+)\*(?:\s|$)')
  137. RE_LOCAL_ADD = re.compile(r'LOCAL ADDITIONS:\s+\*local-additions\*$')
  138. class Link(object):
  139. __slots__ = 'link_plain_same', 'link_pipe_same', \
  140. 'link_plain_foreign', 'link_pipe_foreign', \
  141. 'filename'
  142. def __init__(self, link_plain_same, link_plain_foreign,
  143. link_pipe_same, link_pipe_foreign, filename):
  144. self.link_plain_same = link_plain_same
  145. self.link_plain_foreign = link_plain_foreign
  146. self.link_pipe_same = link_pipe_same
  147. self.link_pipe_foreign = link_pipe_foreign
  148. self.filename = filename
  149. class VimH2H(object):
  150. def __init__(self, tags, version=None, is_web_version=True):
  151. self._urls = {}
  152. self._version = version
  153. self._is_web_version = is_web_version
  154. for line in RE_NEWLINE.split(tags):
  155. m = RE_TAGLINE.match(line)
  156. if m:
  157. tag, filename = m.group(1, 2)
  158. self.do_add_tag(filename, tag)
  159. def add_tags(self, filename, contents):
  160. for match in RE_STARTAG.finditer(contents):
  161. tag = match.group(1).replace('\\', '\\\\').replace('/', '\\/')
  162. self.do_add_tag(str(filename), tag)
  163. def do_add_tag(self, filename, tag):
  164. tag_quoted = urllib.parse.quote_plus(tag)
  165. def mkpart1(doc):
  166. return '<a href="' + doc + '#' + tag_quoted + '" class="'
  167. part1_same = mkpart1('')
  168. if self._is_web_version and filename == 'help.txt':
  169. doc = '/'
  170. else:
  171. doc = filename + '.html'
  172. part1_foreign = mkpart1(doc)
  173. part2 = '">' + html_escape[tag] + '</a>'
  174. def mklinks(cssclass):
  175. return (part1_same + cssclass + part2,
  176. part1_foreign + cssclass + part2)
  177. cssclass_plain = 'd'
  178. m = RE_LINKWORD.match(tag)
  179. if m:
  180. opt, ctrl, special = m.groups()
  181. if opt is not None:
  182. cssclass_plain = 'o'
  183. elif ctrl is not None:
  184. cssclass_plain = 'k'
  185. elif special is not None:
  186. cssclass_plain = 's'
  187. links_plain = mklinks(cssclass_plain)
  188. links_pipe = mklinks('l')
  189. self._urls[tag] = Link(
  190. links_plain[0], links_plain[1],
  191. links_pipe[0], links_pipe[1],
  192. filename)
  193. def maplink(self, tag, curr_filename, css_class=None):
  194. links = self._urls.get(tag)
  195. if links is not None:
  196. if links.filename == curr_filename:
  197. if css_class == 'l':
  198. return links.link_pipe_same
  199. else:
  200. return links.link_plain_same
  201. else:
  202. if css_class == 'l':
  203. return links.link_pipe_foreign
  204. else:
  205. return links.link_plain_foreign
  206. elif css_class is not None:
  207. return '<span class="' + css_class + '">' + html_escape[tag] + \
  208. '</span>'
  209. else:
  210. return html_escape[tag]
  211. def to_html(self, filename, contents, encoding):
  212. out = []
  213. inexample = 0
  214. filename = str(filename)
  215. is_help_txt = (filename == 'help.txt')
  216. last = ''
  217. for line in RE_NEWLINE.split(contents):
  218. line = line.rstrip('\r\n')
  219. line_tabs = line
  220. line = line.expandtabs()
  221. if last == 'h1':
  222. out.extend(('</pre>')) # XXX
  223. out.extend(('<h1>', line.rstrip(), '</h1>\n'))
  224. out.extend(('<pre>'))
  225. last = ''
  226. continue
  227. if RE_HRULE.match(line):
  228. # out.extend(('<span class="h">', line, '</span>\n'))
  229. last = 'h1'
  230. continue
  231. if inexample == 2:
  232. if RE_EG_END.match(line):
  233. inexample = 0
  234. if line[0] == '<':
  235. line = line[1:]
  236. else:
  237. out.extend(('<span class="e">', html_escape[line],
  238. '</span>\n'))
  239. continue
  240. if RE_EG_START.match(line_tabs):
  241. inexample = 1
  242. line = line[0:-1]
  243. if RE_SECTION.match(line_tabs):
  244. m = RE_SECTION.match(line)
  245. out.extend((r'<span class="c">', m.group(0), r'</span>'))
  246. line = line[m.end():]
  247. lastpos = 0
  248. for match in RE_TAGWORD.finditer(line):
  249. pos = match.start()
  250. if pos > lastpos:
  251. out.append(html_escape[line[lastpos:pos]])
  252. lastpos = match.end()
  253. header, graphic, pipeword, starword, command, opt, ctrl, \
  254. special, title, note, url, word = match.groups()
  255. if pipeword is not None:
  256. out.append(self.maplink(pipeword, filename, 'l'))
  257. elif starword is not None:
  258. out.extend(('<a name="', urllib.parse.quote_plus(starword),
  259. '" class="t">', html_escape[starword], '</a>'))
  260. elif command is not None:
  261. out.extend(('<span class="e">', html_escape[command],
  262. '</span>'))
  263. elif opt is not None:
  264. out.append(self.maplink(opt, filename, 'o'))
  265. elif ctrl is not None:
  266. out.append(self.maplink(ctrl, filename, 'k'))
  267. elif special is not None:
  268. out.append(self.maplink(special, filename, 's'))
  269. elif title is not None:
  270. out.extend(('<span class="i">', html_escape[title],
  271. '</span>'))
  272. elif note is not None:
  273. out.extend(('<span class="n">', html_escape[note],
  274. '</span>'))
  275. elif header is not None:
  276. out.extend(('<span class="h">', html_escape[header[:-1]],
  277. '</span>'))
  278. elif graphic is not None:
  279. out.append(html_escape[graphic[:-2]])
  280. elif url is not None:
  281. out.extend(('<a class="u" href="', url, '">' +
  282. html_escape[url], '</a>'))
  283. elif word is not None:
  284. out.append(self.maplink(word, filename))
  285. if lastpos < len(line):
  286. out.append(html_escape[line[lastpos:]])
  287. out.append('\n')
  288. if inexample == 1:
  289. inexample = 2
  290. header = []
  291. header.append(HEAD.format(encoding=encoding, filename=filename))
  292. header.append(HEAD_END)
  293. if self._is_web_version and is_help_txt:
  294. vers_note = VERSION_NOTE.replace('{version}', self._version) \
  295. if self._version else ''
  296. header.append(INTRO.replace('{vers-note}', vers_note))
  297. if self._is_web_version:
  298. header.append(SITENAVI_SEARCH)
  299. sitenavi_footer = SITENAVI_WEB
  300. else:
  301. header.append(SITENAVI_PLAIN)
  302. sitenavi_footer = SITENAVI_PLAIN
  303. header.append(TEXTSTART)
  304. return ''.join(chain(header, out, (FOOTER, sitenavi_footer, FOOTER2)))
  305. class HtmlEscCache(dict):
  306. def __missing__(self, key):
  307. r = key.replace('&', '&amp;') \
  308. .replace('<', '&lt;') \
  309. .replace('>', '&gt;')
  310. self[key] = r
  311. return r
  312. html_escape = HtmlEscCache()
  313. def slurp(filename):
  314. try:
  315. with open(filename, encoding='UTF-8') as f:
  316. return f.read(), 'UTF-8'
  317. except UnicodeError:
  318. # 'ISO-8859-1' ?
  319. with open(filename, encoding='latin-1') as f:
  320. return f.read(), 'latin-1'
  321. def usage():
  322. return "usage: " + sys.argv[0] + " IN_DIR OUT_DIR [BASENAMES...]"
  323. def main():
  324. if len(sys.argv) < 3:
  325. sys.exit(usage())
  326. in_dir = sys.argv[1]
  327. out_dir = sys.argv[2]
  328. basenames = sys.argv[3:]
  329. print("Processing tags...")
  330. h2h = VimH2H(slurp(os.path.join(in_dir, 'tags'))[0], is_web_version=False)
  331. if len(basenames) == 0:
  332. basenames = os.listdir(in_dir)
  333. for basename in basenames:
  334. if os.path.splitext(basename)[1] != '.txt' and basename != 'tags':
  335. print("Ignoring " + basename)
  336. continue
  337. print("Processing " + basename + "...")
  338. path = os.path.join(in_dir, basename)
  339. text, encoding = slurp(path)
  340. outpath = os.path.join(out_dir, basename + '.html')
  341. of = open(outpath, 'w')
  342. of.write(h2h.to_html(basename, text, encoding))
  343. of.close()
  344. main()