parabola-wiki-docs.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. #! /usr/bin/env python
  2. import datetime
  3. import argparse
  4. from simplemediawiki import build_user_agent
  5. import ParabolaWiki
  6. if __name__ == "__main__":
  7. aparser = argparse.ArgumentParser(description="Download pages from Parabola Wiki and optimize them for offline browsing")
  8. aparser.add_argument("--output-directory", type=str, required=True, help="Path where the downloaded pages should be stored.")
  9. aparser.add_argument("--force", action="store_true", help="Ignore timestamp, always download the page from the wiki.")
  10. aparser.add_argument("--clean", action="store_true", help="Clean the output directory after downloading, useful for removing pages deleted/moved on the wiki. Warning: any unknown files found in the output directory will be deleted!")
  11. aparser.add_argument("--safe-filenames", action="store_true", help="Force using ASCII file names instead of the default Unicode.")
  12. args = aparser.parse_args()
  13. if args.force:
  14. epoch = datetime.datetime.utcnow()
  15. else:
  16. # this should be the date of the latest incompatible change
  17. epoch = datetime.datetime(2016, 3, 3, 18, 0, 0)
  18. user_agent = build_user_agent(__file__, ParabolaWiki.__version__, ParabolaWiki.__url__)
  19. aw = ParabolaWiki.ParabolaWiki(user_agent=user_agent, safe_filenames=args.safe_filenames)
  20. optimizer = ParabolaWiki.Optimizer(aw, args.output_directory)
  21. downloader = ParabolaWiki.Downloader(aw, args.output_directory, epoch, cb_download=optimizer.optimize_url)
  22. aw.print_namespaces()
  23. for ns in ["0", "4", "12", "14"]:
  24. downloader.process_namespace(ns)
  25. downloader.download_images()
  26. downloader.download_css()
  27. if args.clean:
  28. downloader.clean_output_directory()