123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561 |
- #!/usr/bin/env python3
- """Parses Doxygen XML output to generate Neovim's API documentation.
- This would be easier using lxml and XSLT, but:
- 1. This should avoid needing Python dependencies, especially ones that are
- C modules that have library dependencies (lxml requires libxml and
- libxslt).
- 2. I wouldn't know how to deal with nested indentation in <para> tags using
- XSLT.
- Each function documentation is formatted with the following rules:
- - Maximum width of 78 characters (`text_width`).
- - Spaces for indentation.
- - Function signature and helptag are on the same line.
- - Helptag is right aligned.
- - Signature and helptag must have a minimum of 8 spaces between them.
- - If the signature is too long, it is placed on the line after the
- helptag. The signature wraps at `text_width - 8` characters with
- subsequent lines indented to the open parenthesis.
- - Documentation body will be indented by 16 spaces.
- - Subsection bodies are indented an additional 4 spaces.
- - Documentation body consists of the function description, parameter details,
- return description, and C declaration.
- - Parameters are omitted for the `void` and `Error *` types, or if the
- parameter is marked as [out].
- - Each function documentation is separated by a single line.
- The C declaration is added to the end to show actual argument types.
- """
- import os
- import re
- import sys
- import shutil
- import textwrap
- import subprocess
- from xml.dom import minidom
- if sys.version_info[0] < 3:
- print("use Python 3")
- sys.exit(1)
- doc_filename = 'api.txt'
- # String used to find the start of the generated part of the doc.
- section_start_token = '*api-global*'
- # Required prefix for API function names.
- api_func_name_prefix = 'nvim_'
- # Section name overrides.
- section_name = {
- 'vim.c': 'Global',
- }
- # Section ordering.
- section_order = (
- 'vim.c',
- 'buffer.c',
- 'window.c',
- 'tabpage.c',
- 'ui.c',
- )
- param_exclude = (
- 'channel_id',
- )
- # Annotations are displayed as line items after API function descriptions.
- annotation_map = {
- 'FUNC_API_ASYNC': '{async}',
- }
- text_width = 78
- script_path = os.path.abspath(__file__)
- base_dir = os.path.dirname(os.path.dirname(script_path))
- src_dir = os.path.join(base_dir, 'src/nvim/api')
- out_dir = os.path.join(base_dir, 'tmp-api-doc')
- filter_cmd = '%s %s' % (sys.executable, script_path)
- seen_funcs = set()
- # Tracks `xrefsect` titles. As of this writing, used only for separating
- # deprecated functions.
- xrefs = set()
- # XML Parsing Utilities {{{
- def find_first(parent, name):
- """Finds the first matching node within parent."""
- sub = parent.getElementsByTagName(name)
- if not sub:
- return None
- return sub[0]
- def get_children(parent, name):
- """Yield matching child nodes within parent."""
- for child in parent.childNodes:
- if child.nodeType == child.ELEMENT_NODE and child.nodeName == name:
- yield child
- def get_child(parent, name):
- """Get the first matching child node."""
- for child in get_children(parent, name):
- return child
- return None
- def clean_text(text):
- """Cleans text.
- Only cleans superfluous whitespace at the moment.
- """
- return ' '.join(text.split()).strip()
- def clean_lines(text):
- """Removes superfluous lines.
- The beginning and end of the string is trimmed. Empty lines are collapsed.
- """
- return re.sub(r'\A\n\s*\n*|\n\s*\n*\Z', '', re.sub(r'(\n\s*\n+)+', '\n\n', text))
- def get_text(parent):
- """Combine all text in a node."""
- if parent.nodeType == parent.TEXT_NODE:
- return parent.data
- out = ''
- for node in parent.childNodes:
- if node.nodeType == node.TEXT_NODE:
- out += clean_text(node.data)
- elif node.nodeType == node.ELEMENT_NODE:
- out += ' ' + get_text(node)
- return out
- def doc_wrap(text, prefix='', width=70, func=False):
- """Wraps text to `width`.
- The first line is prefixed with `prefix`, and subsequent lines are aligned.
- If `func` is True, only wrap at commas.
- """
- if not width:
- return text
- indent_space = ' ' * len(prefix)
- if func:
- lines = [prefix]
- for part in text.split(', '):
- if part[-1] not in ');':
- part += ', '
- if len(lines[-1]) + len(part) > width:
- lines.append(indent_space)
- lines[-1] += part
- return '\n'.join(x.rstrip() for x in lines).rstrip()
- return '\n'.join(textwrap.wrap(text.strip(), width=width,
- initial_indent=prefix,
- subsequent_indent=indent_space))
- def parse_params(parent, width=62):
- """Parse Doxygen `parameterlist`."""
- name_length = 0
- items = []
- for child in parent.childNodes:
- if child.nodeType == child.TEXT_NODE:
- continue
- name_node = find_first(child, 'parametername')
- if name_node.getAttribute('direction') == 'out':
- continue
- name = get_text(name_node)
- if name in param_exclude:
- continue
- name = '{%s}' % name
- name_length = max(name_length, len(name) + 2)
- desc = ''
- desc_node = get_child(child, 'parameterdescription')
- if desc_node:
- desc = parse_parblock(desc_node, width=None)
- items.append((name.strip(), desc.strip()))
- out = 'Parameters: ~\n'
- for name, desc in items:
- name = ' %s' % name.ljust(name_length)
- out += doc_wrap(desc, prefix=name, width=width) + '\n'
- return out.strip()
- def parse_para(parent, width=62):
- """Parse doxygen `para` tag.
- I assume <para> is a paragraph block or "a block of text". It can contain
- text nodes, or other tags.
- """
- line = ''
- lines = []
- for child in parent.childNodes:
- if child.nodeType == child.TEXT_NODE:
- line += child.data
- elif child.nodeName == 'computeroutput':
- line += '`%s`' % get_text(child)
- else:
- if line:
- lines.append(doc_wrap(line, width=width))
- line = ''
- if child.nodeName == 'parameterlist':
- lines.append(parse_params(child, width=width))
- elif child.nodeName == 'xrefsect':
- title = get_text(get_child(child, 'xreftitle'))
- xrefs.add(title)
- xrefdesc = parse_para(get_child(child, 'xrefdescription'))
- lines.append(doc_wrap(xrefdesc, prefix='%s: ' % title,
- width=width) + '\n')
- elif child.nodeName == 'simplesect':
- kind = child.getAttribute('kind')
- if kind == 'note':
- lines.append('Note:')
- lines.append(doc_wrap(parse_para(child),
- prefix=' ',
- width=width))
- elif kind == 'return':
- lines.append('%s: ~' % kind.title())
- lines.append(doc_wrap(parse_para(child),
- prefix=' ',
- width=width))
- else:
- lines.append(get_text(child))
- if line:
- lines.append(doc_wrap(line, width=width))
- return clean_lines('\n'.join(lines).strip())
- def parse_parblock(parent, width=62):
- """Parses a nested block of `para` tags.
- Named after the \parblock command, but not directly related.
- """
- paragraphs = []
- for child in parent.childNodes:
- if child.nodeType == child.TEXT_NODE:
- paragraphs.append(doc_wrap(child.data, width=width))
- elif child.nodeName == 'para':
- paragraphs.append(parse_para(child, width=width))
- else:
- paragraphs.append(doc_wrap(get_text(child), width=width))
- paragraphs.append('')
- return clean_lines('\n'.join(paragraphs).strip())
- # }}}
- def parse_source_xml(filename):
- """Collects API functions.
- Returns two strings:
- 1. API functions
- 2. Deprecated API functions
- Caller decides what to do with the deprecated documentation.
- """
- global xrefs
- xrefs = set()
- functions = []
- deprecated_functions = []
- dom = minidom.parse(filename)
- for member in dom.getElementsByTagName('memberdef'):
- if member.getAttribute('static') == 'yes' or \
- member.getAttribute('kind') != 'function':
- continue
- loc = find_first(member, 'location')
- if 'private' in loc.getAttribute('file'):
- continue
- return_type = get_text(get_child(member, 'type'))
- if return_type == '':
- continue
- if return_type.startswith(('ArrayOf', 'DictionaryOf')):
- parts = return_type.strip('_').split('_')
- return_type = '%s(%s)' % (parts[0], ', '.join(parts[1:]))
- name = get_text(get_child(member, 'name'))
- annotations = get_text(get_child(member, 'argsstring'))
- if annotations and ')' in annotations:
- annotations = annotations.rsplit(')', 1)[-1].strip()
- # XXX: (doxygen 1.8.11) 'argsstring' only includes attributes of
- # non-void functions. Special-case void functions here.
- if name == 'nvim_get_mode' and len(annotations) == 0:
- annotations += 'FUNC_API_ASYNC'
- annotations = filter(None, map(lambda x: annotation_map.get(x),
- annotations.split()))
- vimtag = '*%s()*' % name
- args = []
- type_length = 0
- for param in get_children(member, 'param'):
- arg_type = get_text(get_child(param, 'type')).strip()
- arg_name = ''
- declname = get_child(param, 'declname')
- if declname:
- arg_name = get_text(declname).strip()
- if arg_name in param_exclude:
- continue
- if arg_type.endswith('*'):
- arg_type = arg_type.strip('* ')
- arg_name = '*' + arg_name
- type_length = max(type_length, len(arg_type))
- args.append((arg_type, arg_name))
- c_args = []
- for arg_type, arg_name in args:
- c_args.append(' ' + (
- '%s %s' % (arg_type.ljust(type_length), arg_name)).strip())
- c_decl = textwrap.indent('%s %s(\n%s\n);' % (return_type, name,
- ',\n'.join(c_args)),
- ' ')
- prefix = '%s(' % name
- suffix = '%s)' % ', '.join('{%s}' % a[1] for a in args
- if a[0] not in ('void', 'Error'))
- # Minimum 8 chars between signature and vimtag
- lhs = (text_width - 8) - len(prefix)
- if len(prefix) + len(suffix) > lhs:
- signature = vimtag.rjust(text_width) + '\n'
- signature += doc_wrap(suffix, width=text_width-8, prefix=prefix,
- func=True)
- else:
- signature = prefix + suffix
- signature += vimtag.rjust(text_width - len(signature))
- doc = ''
- desc = find_first(member, 'detaileddescription')
- if desc:
- doc = parse_parblock(desc)
- if 'DEBUG' in os.environ:
- print(textwrap.indent(
- re.sub(r'\n\s*\n+', '\n',
- desc.toprettyxml(indent=' ', newl='\n')), ' ' * 16))
- if not doc:
- doc = 'TODO: Documentation'
- annotations = '\n'.join(annotations)
- if annotations:
- annotations = ('\n\nAttributes: ~\n' +
- textwrap.indent(annotations, ' '))
- i = doc.rfind('Parameters: ~')
- if i == -1:
- doc += annotations
- else:
- doc = doc[:i] + annotations + '\n\n' + doc[i:]
- if 'INCLUDE_C_DECL' in os.environ:
- doc += '\n\nC Declaration: ~\n>\n'
- doc += c_decl
- doc += '\n<'
- func_doc = signature + '\n'
- func_doc += textwrap.indent(clean_lines(doc), ' ' * 16)
- func_doc = re.sub(r'^\s+([<>])$', r'\1', func_doc, flags=re.M)
- if 'Deprecated' in xrefs:
- deprecated_functions.append(func_doc)
- elif name.startswith(api_func_name_prefix):
- functions.append(func_doc)
- xrefs.clear()
- return '\n\n'.join(functions), '\n\n'.join(deprecated_functions)
- def delete_lines_below(filename, tokenstr):
- """Deletes all lines below the line containing `tokenstr`, the line itself,
- and one line above it.
- """
- lines = open(filename).readlines()
- i = 0
- for i, line in enumerate(lines, 1):
- if tokenstr in line:
- break
- i = max(0, i - 2)
- with open(filename, 'wt') as fp:
- fp.writelines(lines[0:i])
- def gen_docs(config):
- """Generate documentation.
- Doxygen is called and configured through stdin.
- """
- p = subprocess.Popen(['doxygen', '-'], stdin=subprocess.PIPE)
- p.communicate(config.format(input=src_dir, output=out_dir,
- filter=filter_cmd).encode('utf8'))
- if p.returncode:
- sys.exit(p.returncode)
- sections = {}
- intros = {}
- sep = '=' * text_width
- base = os.path.join(out_dir, 'xml')
- dom = minidom.parse(os.path.join(base, 'index.xml'))
- # generate docs for section intros
- for compound in dom.getElementsByTagName('compound'):
- if compound.getAttribute('kind') != 'group':
- continue
- groupname = get_text(find_first(compound, 'name'))
- groupxml = os.path.join(base, '%s.xml' % compound.getAttribute('refid'))
- desc = find_first(minidom.parse(groupxml), 'detaileddescription')
- if desc:
- doc = parse_parblock(desc)
- if doc:
- intros[groupname] = doc
- for compound in dom.getElementsByTagName('compound'):
- if compound.getAttribute('kind') != 'file':
- continue
- filename = get_text(find_first(compound, 'name'))
- if filename.endswith('.c'):
- functions, deprecated = parse_source_xml(
- os.path.join(base, '%s.xml' % compound.getAttribute('refid')))
- if not functions and not deprecated:
- continue
- if functions or deprecated:
- name = os.path.splitext(os.path.basename(filename))[0]
- if name == 'ui':
- name = name.upper()
- else:
- name = name.title()
- doc = ''
- intro = intros.get('api-%s' % name.lower())
- if intro:
- doc += '\n\n' + intro
- if functions:
- doc += '\n\n' + functions
- if 'INCLUDE_DEPRECATED' in os.environ and deprecated:
- doc += '\n\n\nDeprecated %s Functions: ~\n\n' % name
- doc += deprecated
- if doc:
- filename = os.path.basename(filename)
- name = section_name.get(filename, name)
- title = '%s Functions' % name
- helptag = '*api-%s*' % name.lower()
- sections[filename] = (title, helptag, doc)
- if not sections:
- return
- docs = ''
- i = 0
- for filename in section_order:
- if filename not in sections:
- continue
- title, helptag, section_doc = sections.pop(filename)
- i += 1
- docs += sep
- docs += '\n%s%s' % (title, helptag.rjust(text_width - len(title)))
- docs += section_doc
- docs += '\n\n\n'
- if sections:
- # In case new API sources are added without updating the order dict.
- for title, helptag, section_doc in sections.values():
- i += 1
- docs += sep
- docs += '\n%s%s' % (title, helptag.rjust(text_width - len(title)))
- docs += section_doc
- docs += '\n\n\n'
- docs = docs.rstrip() + '\n\n'
- docs += ' vim:tw=78:ts=8:ft=help:norl:\n'
- doc_file = os.path.join(base_dir, 'runtime/doc', doc_filename)
- delete_lines_below(doc_file, section_start_token)
- with open(doc_file, 'ab') as fp:
- fp.write(docs.encode('utf8'))
- shutil.rmtree(out_dir)
- def filter_source(filename):
- """Filters the source to fix macros that confuse Doxygen."""
- with open(filename, 'rt') as fp:
- print(re.sub(r'^(ArrayOf|DictionaryOf)(\(.*?\))',
- lambda m: m.group(1)+'_'.join(
- re.split(r'[^\w]+', m.group(2))),
- fp.read(), flags=re.M))
- # Doxygen Config {{{
- Doxyfile = '''
- OUTPUT_DIRECTORY = {output}
- INPUT = {input}
- INPUT_ENCODING = UTF-8
- FILE_PATTERNS = *.h *.c
- RECURSIVE = YES
- INPUT_FILTER = "{filter}"
- EXCLUDE =
- EXCLUDE_SYMLINKS = NO
- EXCLUDE_PATTERNS = */private/*
- EXCLUDE_SYMBOLS =
- GENERATE_HTML = NO
- GENERATE_DOCSET = NO
- GENERATE_HTMLHELP = NO
- GENERATE_QHP = NO
- GENERATE_TREEVIEW = NO
- GENERATE_LATEX = NO
- GENERATE_RTF = NO
- GENERATE_MAN = NO
- GENERATE_DOCBOOK = NO
- GENERATE_AUTOGEN_DEF = NO
- GENERATE_XML = YES
- XML_OUTPUT = xml
- XML_PROGRAMLISTING = NO
- ENABLE_PREPROCESSING = YES
- MACRO_EXPANSION = YES
- EXPAND_ONLY_PREDEF = NO
- '''
- # }}}
- if __name__ == "__main__":
- if len(sys.argv) > 1:
- filter_source(sys.argv[1])
- else:
- gen_docs(Doxyfile)
- # vim: set ft=python ts=4 sw=4 tw=79 et fdm=marker :
|