update_ahmia_blacklist.py 949 B

123456789101112131415161718192021222324252627282930313233
  1. #!/usr/bin/env python
  2. # SPDX-License-Identifier: AGPL-3.0-or-later
  3. """This script saves `Ahmia's blacklist`_ for onion sites.
  4. Output file: :origin:`searx/data/ahmia_blacklist.txt` (:origin:`CI Update data
  5. ... <.github/workflows/data-update.yml>`).
  6. .. _Ahmia's blacklist: https://ahmia.fi/blacklist/
  7. """
  8. # pylint: disable=use-dict-literal
  9. import requests
  10. from searx.data import data_dir
  11. DATA_FILE = data_dir / 'ahmia_blacklist.txt'
  12. URL = 'https://ahmia.fi/blacklist/banned/'
  13. def fetch_ahmia_blacklist():
  14. resp = requests.get(URL, timeout=3.0)
  15. if resp.status_code != 200:
  16. # pylint: disable=broad-exception-raised
  17. raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code) # type: ignore
  18. return resp.text.split()
  19. if __name__ == '__main__':
  20. blacklist = fetch_ahmia_blacklist()
  21. blacklist.sort()
  22. with DATA_FILE.open("w", encoding='utf-8') as f:
  23. f.write('\n'.join(blacklist))