awesome_game_remakes.py 1.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. """
  2. Scrape Awesome Game Remakes and find games OSGC doesn't have
  3. To run, install from pip:
  4. - aiohttp
  5. - markdown
  6. - lxml
  7. """
  8. import aiohttp
  9. import asyncio
  10. import markdown
  11. import re
  12. from lxml import etree
  13. from scripts.utils import games
  14. URL = "https://raw.githubusercontent.com/radek-sprta/awesome-game-remakes/master/README.md"
  15. BLACKLIST_PATTERNS = [
  16. re.compile(pat) for pat in [
  17. "https://awesome.re",
  18. "^#.+",
  19. ]
  20. ]
  21. async def main():
  22. # Find links from AGR
  23. async with aiohttp.ClientSession() as session:
  24. async with session.get(URL) as resp:
  25. content = await resp.text()
  26. md = markdown.markdown(content)
  27. doc = etree.fromstring(f"<div>{md}</div>")
  28. urls = set()
  29. for link in doc.xpath("//a"):
  30. url = link.attrib["href"]
  31. for pat in BLACKLIST_PATTERNS:
  32. if pat.match(url):
  33. break
  34. else:
  35. urls.add(url)
  36. # Find URLs and repos from OSGC
  37. osgc_urls = set()
  38. for game in games():
  39. if repo := game.get("repo", ""):
  40. osgc_urls.add(repo)
  41. if url := game.get("repo", ""):
  42. osgc_urls.add(url)
  43. # Print URLS that OSGC doesn't have
  44. for url in urls - osgc_urls:
  45. print(url)
  46. if __name__ == "__main__":
  47. asyncio.run(main())