scrape_osgl_inspirations.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. import re
  2. import httpx
  3. from mistletoe import Document
  4. from mistletoe.block_token import Heading, List
  5. from scripts.utils import games, originals
  6. INSPIRATION_PATTERN = re.compile(r"(.+) \[\d+\]")
  7. INSPIRED_PATTERN = re.compile(r"Inspired entries: (.+)")
  8. # OSGL name to OSGC alias
  9. ALIASES = {
  10. "Alone in the Dark series": "Alone in the Dark",
  11. "Anno (series)": "Anno series",
  12. "Anno 1404": "Anno series",
  13. "BioWare's Aurora engine": "Neverwinter Nights",
  14. "Blake Stone: Aliens of Gold": "Blake Stone: Planet Strike",
  15. "Blasteroids": "Asteroids",
  16. "Caesar 3": "Caesar III",
  17. "Civilization series": "Civilization",
  18. "Company of Heroes: Opposing Fronts": "Company of Heroes",
  19. "Company of Heroes: Tales of Valor": "Company of Heroes",
  20. "CrossUO": "CrossUO: Ultima Online",
  21. "Final Fantasy series": "Final Fantasy",
  22. "Krush Kill 'n' Destroy": "Krush, Kill 'n' Destroy",
  23. "Marathon 2: Durandal": "Marathon 2",
  24. "Microprose Falcon 4.0 Combat Simulator": "Falcon",
  25. "Panzer General 2": "Panzer General",
  26. "Quake II": "Quake 2",
  27. "Quake III Arena": "Quake 3",
  28. "QUakeWorld": "Quake",
  29. "Runescape Classic": "RuneScape Classic",
  30. "S.T.A.L.K.E.R: Call of Pripyat": "S.T.A.L.K.E.R.: Call of Pripyat",
  31. "Settlers": "The Settlers",
  32. "Shobon Action": "Syobon Action",
  33. "Simon Says": "Simon",
  34. "Sonic the Hedgehog series": "Sonic the Hedgehog",
  35. "Super Methane Brothers for Wii and GameCube": "Super Methane Brothers (homebrew edition)",
  36. "Super Pang": "Pang",
  37. "The Incredible Machine series": "The Incredible Machine",
  38. "Ultima series": "Ultima",
  39. "Ultima Underworld 1": "Ultima Underworld",
  40. "Warcraft": "Warcraft: Orcs & Humans",
  41. "World of Might and Magic": "OpenEnroth",
  42. "Worms": "Worms Series",
  43. "X-COM: Enemy Unknown": "X-COM: UFO Defense",
  44. }
  45. # Games that aren't games, aren't interesting enough or weren't closed source
  46. BLACKLIST = {
  47. "Angband",
  48. "arithmetic",
  49. "Black Shades",
  50. "Blob Wars Attrition",
  51. "Blobby Volley",
  52. "Brogue",
  53. "Cards Against Humanity",
  54. "Chromium B.S.U.",
  55. "CorsixTH",
  56. "Crossfire",
  57. "Cube",
  58. "Cube 2: Sauerbraten",
  59. "CUBE engine",
  60. "Daimonin",
  61. "DragonBall",
  62. "Dungeon Crawl Stone Soup",
  63. "Eternal Lands",
  64. "Falcon's Eye",
  65. "Flixel",
  66. "FooBillard",
  67. "GalaxyMage",
  68. "GearHead",
  69. "GL-117",
  70. "Kobold's Quest",
  71. "Konquest",
  72. "LBreakout",
  73. "Linley's Dungeon Crawl",
  74. "Liquid War",
  75. "LÖVE",
  76. "Metroidvania",
  77. "Noiz2",
  78. "NScripter",
  79. "OGRE",
  80. "Open Dune",
  81. "RARS",
  82. "Red Eclipse",
  83. "Revenge Of The Cats: Ethernet",
  84. "sfxr",
  85. "Teeworlds",
  86. "The Clans",
  87. "The Mana World",
  88. "Tower defense",
  89. "Transball",
  90. "TuxMath",
  91. "Tux Racer",
  92. "Urho3D",
  93. "Vavoom",
  94. "Volleyball",
  95. "Webhangman",
  96. "XKobo",
  97. "XRay engine",
  98. "Xtank",
  99. }
  100. # Valid clones but we don't want to add it to OSGC unless we really have to
  101. BLACKLIST_CLONES = {
  102. "Colonization too", # halted long ago, status unknown
  103. "CommonDrops", # halted, unknown status
  104. "DOOM-iOS", # Superseded by DOOM-iOS2
  105. "Slot-Racers", # 404, inactive
  106. }
  107. def main():
  108. resp = httpx.get("https://raw.githubusercontent.com/Trilarion/opensourcegames/master/inspirations.md")
  109. doc = Document(resp.text)
  110. # Only look at level 2 headings
  111. children = [child for child in doc.children if not isinstance(child, Heading) or child.level == 2]
  112. inspiration = None
  113. osgl_games = {}
  114. for child in children:
  115. if isinstance(child, Heading):
  116. inspiration = INSPIRATION_PATTERN.match(child.children[0].content).group(1)
  117. else:
  118. assert isinstance(child, List)
  119. for subchild in child.children:
  120. text = subchild.children[0].children[0].content
  121. if matches := INSPIRED_PATTERN.match(text):
  122. inspireds = matches.group(1).split(", ")
  123. osgl_games[inspiration] = inspireds
  124. # Find games and clones from OSGC
  125. osgc_originals = set()
  126. for original in originals():
  127. osgc_originals.add(original["name"])
  128. for name in original.get("names", []):
  129. osgc_originals.add(name)
  130. osgc_games = {game["name"] for game in games()}
  131. osgl_inspireds = {
  132. inspired
  133. for inspireds in osgl_games.values()
  134. for inspired in inspireds
  135. }
  136. for game in osgl_games:
  137. if game in BLACKLIST:
  138. continue
  139. # Exclude games that are open source clones to begin with
  140. if game in osgc_games and game not in osgc_originals:
  141. continue
  142. # Exclude transitive inspirations - we only want the originals
  143. if game in osgl_inspireds:
  144. continue
  145. alias = ALIASES.get(game)
  146. if game not in osgc_originals and (not alias or alias not in osgc_originals):
  147. print(f"Missing original: {game}")
  148. for inspired in osgl_inspireds:
  149. if inspired in BLACKLIST_CLONES:
  150. continue
  151. if inspired not in osgc_games:
  152. print(f"Missing clone: {inspired}")
  153. if __name__ == "__main__":
  154. main()