gen_help_html.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356
  1. # Converts Vim/Nvim documentation to HTML.
  2. #
  3. # Adapted from https://github.com/c4rlo/vimhelp/
  4. # License: MIT
  5. #
  6. # Copyright (c) 2016 Carlo Teubner
  7. #
  8. # Permission is hereby granted, free of charge, to any person obtaining a copy
  9. # of this software and associated documentation files (the "Software"), to deal
  10. # in the Software without restriction, including without limitation the rights
  11. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. # copies of the Software, and to permit persons to whom the Software is
  13. # furnished to do so, subject to the following conditions:
  14. #
  15. # The above copyright notice and this permission notice shall be included in
  16. # all copies or substantial portions of the Software.
  17. #
  18. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  24. # SOFTWARE.
  25. import re, urllib.parse
  26. from itertools import chain
  27. HEAD = """\
  28. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
  29. "http://www.w3.org/TR/html4/loose.dtd">
  30. <html>
  31. <head>
  32. <meta http-equiv="Content-type" content="text/html; charset={encoding}"/>
  33. <title>Nvim: {filename}</title>
  34. """
  35. HEAD_END = '</head>\n<body>\n'
  36. INTRO = """
  37. <h1>Nvim help files</h1>
  38. <p>HTML export of the <a href="https://neovim.io/">Nvim</a> help pages{vers-note}.
  39. Updated <a href="https://github.com/neovim/bot-ci" class="d">automatically</a> from the <a
  40. href="https://github.com/vim/vim/tree/master/runtime/doc" class="d">Nvim source repository</a>.
  41. Also includes the <a href="vim_faq.txt.html">Vim FAQ</a>, pulled from its
  42. <a href="https://github.com/chrisbra/vim_faq" class="d">source repository</a>.</p>
  43. """
  44. VERSION_NOTE = ", current as of Vim {version}"
  45. SITENAVI_LINKS = """
  46. Quick links:
  47. <a href="/">help overview</a> &middot;
  48. <a href="quickref.txt.html">quick reference</a> &middot;
  49. <a href="usr_toc.txt.html">user manual toc</a> &middot;
  50. <a href="{helptxt}#reference_toc">reference manual toc</a> &middot;
  51. <a href="vim_faq.txt.html">faq</a>
  52. """
  53. SITENAVI_LINKS_PLAIN = SITENAVI_LINKS.format(helptxt='help.txt.html')
  54. SITENAVI_LINKS_WEB = SITENAVI_LINKS.format(helptxt='/')
  55. SITENAVI_PLAIN = '<p>' + SITENAVI_LINKS_PLAIN + '</p>'
  56. SITENAVI_WEB = '<p>' + SITENAVI_LINKS_WEB + '</p>'
  57. SITENAVI_SEARCH = '<table width="100%"><tbody><tr><td>' + SITENAVI_LINKS_WEB + \
  58. '</td><td style="text-align: right; max-width: 25vw"><div class="gcse-searchbox">' \
  59. '</div></td></tr></tbody></table><div class="gcse-searchresults"></div>'
  60. TEXTSTART = """
  61. <div id="d1">
  62. <pre id="sp"> </pre>
  63. <div id="d2">
  64. <pre>
  65. """
  66. FOOTER = '</pre>'
  67. FOOTER2 = """
  68. <p id="footer">This site is maintained by Carlo Teubner (<i>(my first name) dot (my last name) at gmail dot com</i>).</p>
  69. </div>
  70. </div>
  71. </body>
  72. </html>
  73. """
  74. VIM_FAQ_LINE = '<a href="vim_faq.txt.html#vim_faq.txt" class="l">' \
  75. 'vim_faq.txt</a> Frequently Asked Questions\n'
  76. RE_TAGLINE = re.compile(r'(\S+)\s+(\S+)')
  77. PAT_WORDCHAR = '[!#-)+-{}~\xC0-\xFF]'
  78. PAT_HEADER = r'(^.*~$)'
  79. PAT_GRAPHIC = r'(^.* `$)'
  80. PAT_PIPEWORD = r'(?<!\\)\|([#-)!+-~]+)\|'
  81. PAT_STARWORD = r'\*([#-)!+-~]+)\*(?:(?=\s)|$)'
  82. PAT_COMMAND = r'`([^` ]+)`'
  83. PAT_OPTWORD = r"('(?:[a-z]{2,}|t_..)')"
  84. PAT_CTRL = r'(CTRL-(?:W_)?(?:\{char\}|<[A-Za-z]+?>|.)?)'
  85. PAT_SPECIAL = r'(<.+?>|\{.+?}|' \
  86. r'\[(?:range|line|count|offset|\+?cmd|[-+]?num|\+\+opt|' \
  87. r'arg|arguments|ident|addr|group)]|' \
  88. r'(?<=\s)\[[-a-z^A-Z0-9_]{2,}])'
  89. PAT_TITLE = r'(Vim version [0-9.a-z]+|VIM REFERENCE.*)'
  90. PAT_NOTE = r'((?<!' + PAT_WORDCHAR + r')(?:note|NOTE|Notes?):?' \
  91. r'(?!' + PAT_WORDCHAR + r'))'
  92. PAT_URL = r'((?:https?|ftp)://[^\'"<> \t]+[a-zA-Z0-9/])'
  93. PAT_WORD = r'((?<!' + PAT_WORDCHAR + r')' + PAT_WORDCHAR + r'+' \
  94. r'(?!' + PAT_WORDCHAR + r'))'
  95. RE_LINKWORD = re.compile(
  96. PAT_OPTWORD + '|' +
  97. PAT_CTRL + '|' +
  98. PAT_SPECIAL)
  99. RE_TAGWORD = re.compile(
  100. PAT_HEADER + '|' +
  101. PAT_GRAPHIC + '|' +
  102. PAT_PIPEWORD + '|' +
  103. PAT_STARWORD + '|' +
  104. PAT_COMMAND + '|' +
  105. PAT_OPTWORD + '|' +
  106. PAT_CTRL + '|' +
  107. PAT_SPECIAL + '|' +
  108. PAT_TITLE + '|' +
  109. PAT_NOTE + '|' +
  110. PAT_URL + '|' +
  111. PAT_WORD)
  112. RE_NEWLINE = re.compile(r'[\r\n]')
  113. RE_HRULE = re.compile(r'[-=]{3,}.*[-=]{3,3}$')
  114. RE_EG_START = re.compile(r'(?:.* )?>$')
  115. RE_EG_END = re.compile(r'\S')
  116. RE_SECTION = re.compile(r'[-A-Z .][-A-Z0-9 .()]*(?=\s+\*)')
  117. RE_STARTAG = re.compile(r'\s\*([^ \t|]+)\*(?:\s|$)')
  118. RE_LOCAL_ADD = re.compile(r'LOCAL ADDITIONS:\s+\*local-additions\*$')
  119. class Link(object):
  120. __slots__ = 'link_plain_same', 'link_pipe_same', \
  121. 'link_plain_foreign', 'link_pipe_foreign', \
  122. 'filename'
  123. def __init__(self, link_plain_same, link_plain_foreign,
  124. link_pipe_same, link_pipe_foreign, filename):
  125. self.link_plain_same = link_plain_same
  126. self.link_plain_foreign = link_plain_foreign
  127. self.link_pipe_same = link_pipe_same
  128. self.link_pipe_foreign = link_pipe_foreign
  129. self.filename = filename
  130. class VimH2H(object):
  131. def __init__(self, tags, version=None, is_web_version=True):
  132. self._urls = { }
  133. self._version = version
  134. self._is_web_version = is_web_version
  135. for line in RE_NEWLINE.split(tags):
  136. m = RE_TAGLINE.match(line)
  137. if m:
  138. tag, filename = m.group(1, 2)
  139. self.do_add_tag(filename, tag)
  140. def add_tags(self, filename, contents):
  141. for match in RE_STARTAG.finditer(contents):
  142. tag = match.group(1).replace('\\', '\\\\').replace('/', '\\/')
  143. self.do_add_tag(str(filename), tag)
  144. def do_add_tag(self, filename, tag):
  145. tag_quoted = urllib.parse.quote_plus(tag)
  146. def mkpart1(doc):
  147. return '<a href="' + doc + '#' + tag_quoted + '" class="'
  148. part1_same = mkpart1('')
  149. if self._is_web_version and filename == 'help.txt':
  150. doc = '/'
  151. else:
  152. doc = filename + '.html'
  153. part1_foreign = mkpart1(doc)
  154. part2 = '">' + html_escape[tag] + '</a>'
  155. def mklinks(cssclass):
  156. return (part1_same + cssclass + part2,
  157. part1_foreign + cssclass + part2)
  158. cssclass_plain = 'd'
  159. m = RE_LINKWORD.match(tag)
  160. if m:
  161. opt, ctrl, special = m.groups()
  162. if opt is not None: cssclass_plain = 'o'
  163. elif ctrl is not None: cssclass_plain = 'k'
  164. elif special is not None: cssclass_plain = 's'
  165. links_plain = mklinks(cssclass_plain)
  166. links_pipe = mklinks('l')
  167. self._urls[tag] = Link(
  168. links_plain[0], links_plain[1],
  169. links_pipe[0], links_pipe[1],
  170. filename)
  171. def maplink(self, tag, curr_filename, css_class=None):
  172. links = self._urls.get(tag)
  173. if links is not None:
  174. if links.filename == curr_filename:
  175. if css_class == 'l': return links.link_pipe_same
  176. else: return links.link_plain_same
  177. else:
  178. if css_class == 'l': return links.link_pipe_foreign
  179. else: return links.link_plain_foreign
  180. elif css_class is not None:
  181. return '<span class="' + css_class + '">' + html_escape[tag] + \
  182. '</span>'
  183. else: return html_escape[tag]
  184. def to_html(self, filename, contents, encoding):
  185. out = [ ]
  186. inexample = 0
  187. filename = str(filename)
  188. is_help_txt = (filename == 'help.txt')
  189. faq_line = False
  190. for line in RE_NEWLINE.split(contents):
  191. line = line.rstrip('\r\n')
  192. line_tabs = line
  193. line = line.expandtabs()
  194. if RE_HRULE.match(line):
  195. out.extend(('<span class="h">', line, '</span>\n'))
  196. continue
  197. if inexample == 2:
  198. if RE_EG_END.match(line):
  199. inexample = 0
  200. if line[0] == '<': line = line[1:]
  201. else:
  202. out.extend(('<span class="e">', html_escape[line],
  203. '</span>\n'))
  204. continue
  205. if RE_EG_START.match(line_tabs):
  206. inexample = 1
  207. line = line[0:-1]
  208. if RE_SECTION.match(line_tabs):
  209. m = RE_SECTION.match(line)
  210. out.extend((r'<span class="c">', m.group(0), r'</span>'))
  211. line = line[m.end():]
  212. if is_help_txt and RE_LOCAL_ADD.match(line_tabs):
  213. faq_line = True
  214. lastpos = 0
  215. for match in RE_TAGWORD.finditer(line):
  216. pos = match.start()
  217. if pos > lastpos:
  218. out.append(html_escape[line[lastpos:pos]])
  219. lastpos = match.end()
  220. header, graphic, pipeword, starword, command, opt, ctrl, \
  221. special, title, note, url, word = match.groups()
  222. if pipeword is not None:
  223. out.append(self.maplink(pipeword, filename, 'l'))
  224. elif starword is not None:
  225. out.extend(('<a name="', urllib.parse.quote_plus(starword),
  226. '" class="t">', html_escape[starword], '</a>'))
  227. elif command is not None:
  228. out.extend(('<span class="e">', html_escape[command],
  229. '</span>'))
  230. elif opt is not None:
  231. out.append(self.maplink(opt, filename, 'o'))
  232. elif ctrl is not None:
  233. out.append(self.maplink(ctrl, filename, 'k'))
  234. elif special is not None:
  235. out.append(self.maplink(special, filename, 's'))
  236. elif title is not None:
  237. out.extend(('<span class="i">', html_escape[title],
  238. '</span>'))
  239. elif note is not None:
  240. out.extend(('<span class="n">', html_escape[note],
  241. '</span>'))
  242. elif header is not None:
  243. out.extend(('<span class="h">', html_escape[header[:-1]],
  244. '</span>'))
  245. elif graphic is not None:
  246. out.append(html_escape[graphic[:-2]])
  247. elif url is not None:
  248. out.extend(('<a class="u" href="', url, '">' +
  249. html_escape[url], '</a>'))
  250. elif word is not None:
  251. out.append(self.maplink(word, filename))
  252. if lastpos < len(line):
  253. out.append(html_escape[line[lastpos:]])
  254. out.append('\n')
  255. if inexample == 1: inexample = 2
  256. if faq_line:
  257. out.append(VIM_FAQ_LINE)
  258. faq_line = False
  259. header = []
  260. header.append(HEAD.format(encoding=encoding, filename=filename))
  261. header.append(HEAD_END)
  262. if self._is_web_version and is_help_txt:
  263. vers_note = VERSION_NOTE.replace('{version}', self._version) \
  264. if self._version else ''
  265. header.append(INTRO.replace('{vers-note}', vers_note))
  266. if self._is_web_version:
  267. header.append(SITENAVI_SEARCH)
  268. sitenavi_footer = SITENAVI_WEB
  269. else:
  270. header.append(SITENAVI_PLAIN)
  271. sitenavi_footer = SITENAVI_PLAIN
  272. header.append(TEXTSTART)
  273. return ''.join(chain(header, out, (FOOTER, sitenavi_footer, FOOTER2)))
  274. class HtmlEscCache(dict):
  275. def __missing__(self, key):
  276. r = key.replace('&', '&amp;') \
  277. .replace('<', '&lt;') \
  278. .replace('>', '&gt;')
  279. self[key] = r
  280. return r
  281. html_escape = HtmlEscCache()
  282. import sys, os, os.path
  283. #import cProfile
  284. sys.path.append('.')
  285. def slurp(filename):
  286. try:
  287. with open(filename, encoding='UTF-8') as f:
  288. return f.read(), 'UTF-8'
  289. except UnicodeError:
  290. # 'ISO-8859-1' ?
  291. with open(filename, encoding='latin-1') as f:
  292. return f.read(), 'latin-1'
  293. def usage():
  294. return "usage: " + sys.argv[0] + " IN_DIR OUT_DIR [BASENAMES...]"
  295. def main():
  296. if len(sys.argv) < 3: sys.exit(usage())
  297. in_dir = sys.argv[1]
  298. out_dir = sys.argv[2]
  299. basenames = sys.argv[3:]
  300. print( "Processing tags...")
  301. h2h = VimH2H(slurp(os.path.join(in_dir, 'tags'))[0], is_web_version=False)
  302. if len(basenames) == 0:
  303. basenames = os.listdir(in_dir)
  304. for basename in basenames:
  305. if os.path.splitext(basename)[1] != '.txt' and basename != 'tags':
  306. print( "Ignoring " + basename)
  307. continue
  308. print( "Processing " + basename + "...")
  309. path = os.path.join(in_dir, basename)
  310. text, encoding = slurp(path)
  311. outpath = os.path.join(out_dir, basename + '.html')
  312. of = open(outpath, 'w')
  313. of.write(h2h.to_html(basename, text, encoding))
  314. of.close()
  315. main()
  316. #cProfile.run('main()')