extract-rst-targets.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. #!/usr/bin/env python
  2. # License: GPLv3 Copyright: 2022, Kovid Goyal <kovid at kovidgoyal.net>
  3. import os
  4. import re
  5. from typing import Dict, Iterator
  6. tgt_pat = re.compile(r'^.. _(\S+?):$', re.MULTILINE)
  7. title_pat = re.compile('^(.+)\n[-=^#*]{5,}$', re.MULTILINE)
  8. def find_explicit_targets(text: str) -> Iterator[str]:
  9. for m in tgt_pat.finditer(text):
  10. yield m.group(1)
  11. def find_page_title(text: str) -> str:
  12. for m in title_pat.finditer(text):
  13. return m.group(1)
  14. return ''
  15. def main() -> Dict[str, Dict[str, str]]:
  16. refs = {}
  17. docs = {}
  18. base = os.path.dirname(os.path.abspath(__file__))
  19. for dirpath, dirnames, filenames in os.walk(base):
  20. if 'generated' in dirnames:
  21. dirnames.remove('generated')
  22. for f in filenames:
  23. if f.endswith('.rst'):
  24. with open(os.path.join(dirpath, f)) as stream:
  25. raw = stream.read()
  26. href = os.path.relpath(stream.name, base).replace(os.sep, '/')
  27. href = href.rpartition('.')[0] + '/'
  28. docs[href.rstrip('/')] = find_page_title(raw)
  29. first_line = raw.lstrip('\n').partition('\n')[0]
  30. first_target_added = False
  31. for explicit_target in find_explicit_targets(raw):
  32. # Shorten the reference link to the top of the page.
  33. # Note that anchor links should still be used in HTML docs
  34. # to allow jumping within the same page.
  35. if not first_target_added:
  36. first_target_added = True
  37. if first_line.startswith(f'.. _{explicit_target}:'):
  38. refs[explicit_target] = href
  39. continue
  40. refs[explicit_target] = href + f'#{explicit_target.replace("_", "-")}'
  41. return {'ref': refs, 'doc': docs}
  42. if __name__ == '__main__':
  43. import json
  44. print(json.dumps(main(), indent=2))