gen_api_vimdoc.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561
  1. #!/usr/bin/env python3
  2. """Parses Doxygen XML output to generate Neovim's API documentation.
  3. This would be easier using lxml and XSLT, but:
  4. 1. This should avoid needing Python dependencies, especially ones that are
  5. C modules that have library dependencies (lxml requires libxml and
  6. libxslt).
  7. 2. I wouldn't know how to deal with nested indentation in <para> tags using
  8. XSLT.
  9. Each function documentation is formatted with the following rules:
  10. - Maximum width of 78 characters (`text_width`).
  11. - Spaces for indentation.
  12. - Function signature and helptag are on the same line.
  13. - Helptag is right aligned.
  14. - Signature and helptag must have a minimum of 8 spaces between them.
  15. - If the signature is too long, it is placed on the line after the
  16. helptag. The signature wraps at `text_width - 8` characters with
  17. subsequent lines indented to the open parenthesis.
  18. - Documentation body will be indented by 16 spaces.
  19. - Subsection bodies are indented an additional 4 spaces.
  20. - Documentation body consists of the function description, parameter details,
  21. return description, and C declaration.
  22. - Parameters are omitted for the `void` and `Error *` types, or if the
  23. parameter is marked as [out].
  24. - Each function documentation is separated by a single line.
  25. The C declaration is added to the end to show actual argument types.
  26. """
  27. import os
  28. import re
  29. import sys
  30. import shutil
  31. import textwrap
  32. import subprocess
  33. from xml.dom import minidom
  34. if sys.version_info[0] < 3:
  35. print("use Python 3")
  36. sys.exit(1)
  37. doc_filename = 'api.txt'
  38. # String used to find the start of the generated part of the doc.
  39. section_start_token = '*api-global*'
  40. # Required prefix for API function names.
  41. api_func_name_prefix = 'nvim_'
  42. # Section name overrides.
  43. section_name = {
  44. 'vim.c': 'Global',
  45. }
  46. # Section ordering.
  47. section_order = (
  48. 'vim.c',
  49. 'buffer.c',
  50. 'window.c',
  51. 'tabpage.c',
  52. 'ui.c',
  53. )
  54. param_exclude = (
  55. 'channel_id',
  56. )
  57. # Annotations are displayed as line items after API function descriptions.
  58. annotation_map = {
  59. 'FUNC_API_ASYNC': '{async}',
  60. }
  61. text_width = 78
  62. script_path = os.path.abspath(__file__)
  63. base_dir = os.path.dirname(os.path.dirname(script_path))
  64. src_dir = os.path.join(base_dir, 'src/nvim/api')
  65. out_dir = os.path.join(base_dir, 'tmp-api-doc')
  66. filter_cmd = '%s %s' % (sys.executable, script_path)
  67. seen_funcs = set()
  68. # Tracks `xrefsect` titles. As of this writing, used only for separating
  69. # deprecated functions.
  70. xrefs = set()
  71. # XML Parsing Utilities {{{
  72. def find_first(parent, name):
  73. """Finds the first matching node within parent."""
  74. sub = parent.getElementsByTagName(name)
  75. if not sub:
  76. return None
  77. return sub[0]
  78. def get_children(parent, name):
  79. """Yield matching child nodes within parent."""
  80. for child in parent.childNodes:
  81. if child.nodeType == child.ELEMENT_NODE and child.nodeName == name:
  82. yield child
  83. def get_child(parent, name):
  84. """Get the first matching child node."""
  85. for child in get_children(parent, name):
  86. return child
  87. return None
  88. def clean_text(text):
  89. """Cleans text.
  90. Only cleans superfluous whitespace at the moment.
  91. """
  92. return ' '.join(text.split()).strip()
  93. def clean_lines(text):
  94. """Removes superfluous lines.
  95. The beginning and end of the string is trimmed. Empty lines are collapsed.
  96. """
  97. return re.sub(r'\A\n\s*\n*|\n\s*\n*\Z', '', re.sub(r'(\n\s*\n+)+', '\n\n', text))
  98. def get_text(parent):
  99. """Combine all text in a node."""
  100. if parent.nodeType == parent.TEXT_NODE:
  101. return parent.data
  102. out = ''
  103. for node in parent.childNodes:
  104. if node.nodeType == node.TEXT_NODE:
  105. out += clean_text(node.data)
  106. elif node.nodeType == node.ELEMENT_NODE:
  107. out += ' ' + get_text(node)
  108. return out
  109. def doc_wrap(text, prefix='', width=70, func=False):
  110. """Wraps text to `width`.
  111. The first line is prefixed with `prefix`, and subsequent lines are aligned.
  112. If `func` is True, only wrap at commas.
  113. """
  114. if not width:
  115. return text
  116. indent_space = ' ' * len(prefix)
  117. if func:
  118. lines = [prefix]
  119. for part in text.split(', '):
  120. if part[-1] not in ');':
  121. part += ', '
  122. if len(lines[-1]) + len(part) > width:
  123. lines.append(indent_space)
  124. lines[-1] += part
  125. return '\n'.join(x.rstrip() for x in lines).rstrip()
  126. return '\n'.join(textwrap.wrap(text.strip(), width=width,
  127. initial_indent=prefix,
  128. subsequent_indent=indent_space))
  129. def parse_params(parent, width=62):
  130. """Parse Doxygen `parameterlist`."""
  131. name_length = 0
  132. items = []
  133. for child in parent.childNodes:
  134. if child.nodeType == child.TEXT_NODE:
  135. continue
  136. name_node = find_first(child, 'parametername')
  137. if name_node.getAttribute('direction') == 'out':
  138. continue
  139. name = get_text(name_node)
  140. if name in param_exclude:
  141. continue
  142. name = '{%s}' % name
  143. name_length = max(name_length, len(name) + 2)
  144. desc = ''
  145. desc_node = get_child(child, 'parameterdescription')
  146. if desc_node:
  147. desc = parse_parblock(desc_node, width=None)
  148. items.append((name.strip(), desc.strip()))
  149. out = 'Parameters: ~\n'
  150. for name, desc in items:
  151. name = ' %s' % name.ljust(name_length)
  152. out += doc_wrap(desc, prefix=name, width=width) + '\n'
  153. return out.strip()
  154. def parse_para(parent, width=62):
  155. """Parse doxygen `para` tag.
  156. I assume <para> is a paragraph block or "a block of text". It can contain
  157. text nodes, or other tags.
  158. """
  159. line = ''
  160. lines = []
  161. for child in parent.childNodes:
  162. if child.nodeType == child.TEXT_NODE:
  163. line += child.data
  164. elif child.nodeName == 'computeroutput':
  165. line += '`%s`' % get_text(child)
  166. else:
  167. if line:
  168. lines.append(doc_wrap(line, width=width))
  169. line = ''
  170. if child.nodeName == 'parameterlist':
  171. lines.append(parse_params(child, width=width))
  172. elif child.nodeName == 'xrefsect':
  173. title = get_text(get_child(child, 'xreftitle'))
  174. xrefs.add(title)
  175. xrefdesc = parse_para(get_child(child, 'xrefdescription'))
  176. lines.append(doc_wrap(xrefdesc, prefix='%s: ' % title,
  177. width=width) + '\n')
  178. elif child.nodeName == 'simplesect':
  179. kind = child.getAttribute('kind')
  180. if kind == 'note':
  181. lines.append('Note:')
  182. lines.append(doc_wrap(parse_para(child),
  183. prefix=' ',
  184. width=width))
  185. elif kind == 'return':
  186. lines.append('%s: ~' % kind.title())
  187. lines.append(doc_wrap(parse_para(child),
  188. prefix=' ',
  189. width=width))
  190. else:
  191. lines.append(get_text(child))
  192. if line:
  193. lines.append(doc_wrap(line, width=width))
  194. return clean_lines('\n'.join(lines).strip())
  195. def parse_parblock(parent, width=62):
  196. """Parses a nested block of `para` tags.
  197. Named after the \parblock command, but not directly related.
  198. """
  199. paragraphs = []
  200. for child in parent.childNodes:
  201. if child.nodeType == child.TEXT_NODE:
  202. paragraphs.append(doc_wrap(child.data, width=width))
  203. elif child.nodeName == 'para':
  204. paragraphs.append(parse_para(child, width=width))
  205. else:
  206. paragraphs.append(doc_wrap(get_text(child), width=width))
  207. paragraphs.append('')
  208. return clean_lines('\n'.join(paragraphs).strip())
  209. # }}}
  210. def parse_source_xml(filename):
  211. """Collects API functions.
  212. Returns two strings:
  213. 1. API functions
  214. 2. Deprecated API functions
  215. Caller decides what to do with the deprecated documentation.
  216. """
  217. global xrefs
  218. xrefs = set()
  219. functions = []
  220. deprecated_functions = []
  221. dom = minidom.parse(filename)
  222. for member in dom.getElementsByTagName('memberdef'):
  223. if member.getAttribute('static') == 'yes' or \
  224. member.getAttribute('kind') != 'function':
  225. continue
  226. loc = find_first(member, 'location')
  227. if 'private' in loc.getAttribute('file'):
  228. continue
  229. return_type = get_text(get_child(member, 'type'))
  230. if return_type == '':
  231. continue
  232. if return_type.startswith(('ArrayOf', 'DictionaryOf')):
  233. parts = return_type.strip('_').split('_')
  234. return_type = '%s(%s)' % (parts[0], ', '.join(parts[1:]))
  235. name = get_text(get_child(member, 'name'))
  236. annotations = get_text(get_child(member, 'argsstring'))
  237. if annotations and ')' in annotations:
  238. annotations = annotations.rsplit(')', 1)[-1].strip()
  239. # XXX: (doxygen 1.8.11) 'argsstring' only includes attributes of
  240. # non-void functions. Special-case void functions here.
  241. if name == 'nvim_get_mode' and len(annotations) == 0:
  242. annotations += 'FUNC_API_ASYNC'
  243. annotations = filter(None, map(lambda x: annotation_map.get(x),
  244. annotations.split()))
  245. vimtag = '*%s()*' % name
  246. args = []
  247. type_length = 0
  248. for param in get_children(member, 'param'):
  249. arg_type = get_text(get_child(param, 'type')).strip()
  250. arg_name = ''
  251. declname = get_child(param, 'declname')
  252. if declname:
  253. arg_name = get_text(declname).strip()
  254. if arg_name in param_exclude:
  255. continue
  256. if arg_type.endswith('*'):
  257. arg_type = arg_type.strip('* ')
  258. arg_name = '*' + arg_name
  259. type_length = max(type_length, len(arg_type))
  260. args.append((arg_type, arg_name))
  261. c_args = []
  262. for arg_type, arg_name in args:
  263. c_args.append(' ' + (
  264. '%s %s' % (arg_type.ljust(type_length), arg_name)).strip())
  265. c_decl = textwrap.indent('%s %s(\n%s\n);' % (return_type, name,
  266. ',\n'.join(c_args)),
  267. ' ')
  268. prefix = '%s(' % name
  269. suffix = '%s)' % ', '.join('{%s}' % a[1] for a in args
  270. if a[0] not in ('void', 'Error'))
  271. # Minimum 8 chars between signature and vimtag
  272. lhs = (text_width - 8) - len(prefix)
  273. if len(prefix) + len(suffix) > lhs:
  274. signature = vimtag.rjust(text_width) + '\n'
  275. signature += doc_wrap(suffix, width=text_width-8, prefix=prefix,
  276. func=True)
  277. else:
  278. signature = prefix + suffix
  279. signature += vimtag.rjust(text_width - len(signature))
  280. doc = ''
  281. desc = find_first(member, 'detaileddescription')
  282. if desc:
  283. doc = parse_parblock(desc)
  284. if 'DEBUG' in os.environ:
  285. print(textwrap.indent(
  286. re.sub(r'\n\s*\n+', '\n',
  287. desc.toprettyxml(indent=' ', newl='\n')), ' ' * 16))
  288. if not doc:
  289. doc = 'TODO: Documentation'
  290. annotations = '\n'.join(annotations)
  291. if annotations:
  292. annotations = ('\n\nAttributes: ~\n' +
  293. textwrap.indent(annotations, ' '))
  294. i = doc.rfind('Parameters: ~')
  295. if i == -1:
  296. doc += annotations
  297. else:
  298. doc = doc[:i] + annotations + '\n\n' + doc[i:]
  299. if 'INCLUDE_C_DECL' in os.environ:
  300. doc += '\n\nC Declaration: ~\n>\n'
  301. doc += c_decl
  302. doc += '\n<'
  303. func_doc = signature + '\n'
  304. func_doc += textwrap.indent(clean_lines(doc), ' ' * 16)
  305. func_doc = re.sub(r'^\s+([<>])$', r'\1', func_doc, flags=re.M)
  306. if 'Deprecated' in xrefs:
  307. deprecated_functions.append(func_doc)
  308. elif name.startswith(api_func_name_prefix):
  309. functions.append(func_doc)
  310. xrefs.clear()
  311. return '\n\n'.join(functions), '\n\n'.join(deprecated_functions)
  312. def delete_lines_below(filename, tokenstr):
  313. """Deletes all lines below the line containing `tokenstr`, the line itself,
  314. and one line above it.
  315. """
  316. lines = open(filename).readlines()
  317. i = 0
  318. for i, line in enumerate(lines, 1):
  319. if tokenstr in line:
  320. break
  321. i = max(0, i - 2)
  322. with open(filename, 'wt') as fp:
  323. fp.writelines(lines[0:i])
  324. def gen_docs(config):
  325. """Generate documentation.
  326. Doxygen is called and configured through stdin.
  327. """
  328. p = subprocess.Popen(['doxygen', '-'], stdin=subprocess.PIPE)
  329. p.communicate(config.format(input=src_dir, output=out_dir,
  330. filter=filter_cmd).encode('utf8'))
  331. if p.returncode:
  332. sys.exit(p.returncode)
  333. sections = {}
  334. intros = {}
  335. sep = '=' * text_width
  336. base = os.path.join(out_dir, 'xml')
  337. dom = minidom.parse(os.path.join(base, 'index.xml'))
  338. # generate docs for section intros
  339. for compound in dom.getElementsByTagName('compound'):
  340. if compound.getAttribute('kind') != 'group':
  341. continue
  342. groupname = get_text(find_first(compound, 'name'))
  343. groupxml = os.path.join(base, '%s.xml' % compound.getAttribute('refid'))
  344. desc = find_first(minidom.parse(groupxml), 'detaileddescription')
  345. if desc:
  346. doc = parse_parblock(desc)
  347. if doc:
  348. intros[groupname] = doc
  349. for compound in dom.getElementsByTagName('compound'):
  350. if compound.getAttribute('kind') != 'file':
  351. continue
  352. filename = get_text(find_first(compound, 'name'))
  353. if filename.endswith('.c'):
  354. functions, deprecated = parse_source_xml(
  355. os.path.join(base, '%s.xml' % compound.getAttribute('refid')))
  356. if not functions and not deprecated:
  357. continue
  358. if functions or deprecated:
  359. name = os.path.splitext(os.path.basename(filename))[0]
  360. if name == 'ui':
  361. name = name.upper()
  362. else:
  363. name = name.title()
  364. doc = ''
  365. intro = intros.get('api-%s' % name.lower())
  366. if intro:
  367. doc += '\n\n' + intro
  368. if functions:
  369. doc += '\n\n' + functions
  370. if 'INCLUDE_DEPRECATED' in os.environ and deprecated:
  371. doc += '\n\n\nDeprecated %s Functions: ~\n\n' % name
  372. doc += deprecated
  373. if doc:
  374. filename = os.path.basename(filename)
  375. name = section_name.get(filename, name)
  376. title = '%s Functions' % name
  377. helptag = '*api-%s*' % name.lower()
  378. sections[filename] = (title, helptag, doc)
  379. if not sections:
  380. return
  381. docs = ''
  382. i = 0
  383. for filename in section_order:
  384. if filename not in sections:
  385. continue
  386. title, helptag, section_doc = sections.pop(filename)
  387. i += 1
  388. docs += sep
  389. docs += '\n%s%s' % (title, helptag.rjust(text_width - len(title)))
  390. docs += section_doc
  391. docs += '\n\n\n'
  392. if sections:
  393. # In case new API sources are added without updating the order dict.
  394. for title, helptag, section_doc in sections.values():
  395. i += 1
  396. docs += sep
  397. docs += '\n%s%s' % (title, helptag.rjust(text_width - len(title)))
  398. docs += section_doc
  399. docs += '\n\n\n'
  400. docs = docs.rstrip() + '\n\n'
  401. docs += ' vim:tw=78:ts=8:ft=help:norl:\n'
  402. doc_file = os.path.join(base_dir, 'runtime/doc', doc_filename)
  403. delete_lines_below(doc_file, section_start_token)
  404. with open(doc_file, 'ab') as fp:
  405. fp.write(docs.encode('utf8'))
  406. shutil.rmtree(out_dir)
  407. def filter_source(filename):
  408. """Filters the source to fix macros that confuse Doxygen."""
  409. with open(filename, 'rt') as fp:
  410. print(re.sub(r'^(ArrayOf|DictionaryOf)(\(.*?\))',
  411. lambda m: m.group(1)+'_'.join(
  412. re.split(r'[^\w]+', m.group(2))),
  413. fp.read(), flags=re.M))
  414. # Doxygen Config {{{
  415. Doxyfile = '''
  416. OUTPUT_DIRECTORY = {output}
  417. INPUT = {input}
  418. INPUT_ENCODING = UTF-8
  419. FILE_PATTERNS = *.h *.c
  420. RECURSIVE = YES
  421. INPUT_FILTER = "{filter}"
  422. EXCLUDE =
  423. EXCLUDE_SYMLINKS = NO
  424. EXCLUDE_PATTERNS = */private/*
  425. EXCLUDE_SYMBOLS =
  426. GENERATE_HTML = NO
  427. GENERATE_DOCSET = NO
  428. GENERATE_HTMLHELP = NO
  429. GENERATE_QHP = NO
  430. GENERATE_TREEVIEW = NO
  431. GENERATE_LATEX = NO
  432. GENERATE_RTF = NO
  433. GENERATE_MAN = NO
  434. GENERATE_DOCBOOK = NO
  435. GENERATE_AUTOGEN_DEF = NO
  436. GENERATE_XML = YES
  437. XML_OUTPUT = xml
  438. XML_PROGRAMLISTING = NO
  439. ENABLE_PREPROCESSING = YES
  440. MACRO_EXPANSION = YES
  441. EXPAND_ONLY_PREDEF = NO
  442. '''
  443. # }}}
  444. if __name__ == "__main__":
  445. if len(sys.argv) > 1:
  446. filter_source(sys.argv[1])
  447. else:
  448. gen_docs(Doxyfile)
  449. # vim: set ft=python ts=4 sw=4 tw=79 et fdm=marker :