scrape_osgl_inspirations.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. import re
  2. import httpx
  3. from mistletoe import Document
  4. from mistletoe.block_token import Heading, List
  5. from scripts.utils import games, originals
  6. INSPIRATION_PATTERN = re.compile(r"(.+) \[\d+\]")
  7. INSPIRED_PATTERN = re.compile(r"Inspired entries: (.+)")
  8. # OSGL name to OSGC alias
  9. ALIASES = {
  10. "Alone in the Dark series": "Alone in the Dark",
  11. "Anno (series)": "Anno series",
  12. "Anno 1404": "Anno series",
  13. "BioWare's Aurora engine": "Neverwinter Nights",
  14. "Blake Stone: Aliens of Gold": "Blake Stone: Planet Strike",
  15. "Blasteroids": "Asteroids",
  16. "Caesar 3": "Caesar III",
  17. "Civilization series": "Civilization",
  18. "Company of Heroes: Opposing Fronts": "Company of Heroes",
  19. "Company of Heroes: Tales of Valor": "Company of Heroes",
  20. "CrossFire 1981": "Crossfire",
  21. "CrossUO": "CrossUO: Ultima Online",
  22. "Final Fantasy series": "Final Fantasy",
  23. "Krush Kill 'n' Destroy": "Krush, Kill 'n' Destroy",
  24. "Marathon 2: Durandal": "Marathon 2",
  25. "Microprose Falcon 4.0 Combat Simulator": "Falcon",
  26. "Panzer General 2": "Panzer General",
  27. "Quake II": "Quake 2",
  28. "Quake III Arena": "Quake 3",
  29. "QUakeWorld": "Quake",
  30. "Runescape Classic": "RuneScape Classic",
  31. "S.T.A.L.K.E.R: Call of Pripyat": "S.T.A.L.K.E.R.: Call of Pripyat",
  32. "Settlers": "The Settlers",
  33. "Shobon Action": "Syobon Action",
  34. "Simon Says": "Simon",
  35. "Sonic the Hedgehog series": "Sonic the Hedgehog",
  36. "Super Methane Brothers for Wii and GameCube": "Super Methane Brothers (homebrew edition)",
  37. "Super Pang": "Pang",
  38. "The Incredible Machine series": "The Incredible Machine",
  39. "Ultima series": "Ultima",
  40. "Ultima Underworld 1": "Ultima Underworld",
  41. "Warcraft": "Warcraft: Orcs & Humans",
  42. "World of Might and Magic": "OpenEnroth",
  43. "Worms": "Worms Series",
  44. "X-COM: Enemy Unknown": "X-COM: UFO Defense",
  45. }
  46. # Games that aren't games, aren't interesting enough or weren't closed source
  47. BLACKLIST = {
  48. "Angband",
  49. "arithmetic",
  50. "Black Shades",
  51. "Blob Wars Attrition",
  52. "Blobby Volley",
  53. "Brogue",
  54. "Cards Against Humanity",
  55. "Catan",
  56. "Chromium B.S.U.",
  57. "CorsixTH",
  58. "Cube",
  59. "Cube 2: Sauerbraten",
  60. "CUBE engine",
  61. "Daimonin",
  62. "DragonBall",
  63. "Dungeon Crawl Stone Soup",
  64. "Eternal Lands",
  65. "Falcon's Eye",
  66. "Flixel",
  67. "FooBillard",
  68. "GalaxyMage",
  69. "GearHead",
  70. "GL-117",
  71. "Kobold's Quest",
  72. "Konquest",
  73. "LBreakout",
  74. "Linley's Dungeon Crawl",
  75. "Liquid War",
  76. "LÖVE",
  77. "Metroidvania",
  78. "Noiz2",
  79. "NScripter",
  80. "OGRE",
  81. "Open Dune",
  82. "RARS",
  83. "Red Eclipse",
  84. "Revenge Of The Cats: Ethernet",
  85. "sfxr",
  86. "Teeworlds",
  87. "The Clans",
  88. "The Mana World",
  89. "Tower defense",
  90. "Transball",
  91. "TuxMath",
  92. "Tux Racer",
  93. "Urho3D",
  94. "Vavoom",
  95. "Volleyball",
  96. "Webhangman",
  97. "XKobo",
  98. "XRay engine",
  99. "Xtank",
  100. }
  101. # Valid clones but we don't want to add it to OSGC unless we really have to
  102. BLACKLIST_CLONES = {
  103. "Colonization too", # halted long ago, status unknown
  104. "CommonDrops", # halted, unknown status
  105. "DOOM-iOS", # Superseded by DOOM-iOS2
  106. "Slot-Racers", # 404, inactive
  107. }
  108. def main():
  109. resp = httpx.get("https://raw.githubusercontent.com/Trilarion/opensourcegames/master/inspirations.md")
  110. doc = Document(resp.text)
  111. # Only look at level 2 headings
  112. children = [child for child in doc.children if not isinstance(child, Heading) or child.level == 2]
  113. inspiration = None
  114. osgl_games = {}
  115. for child in children:
  116. if isinstance(child, Heading):
  117. inspiration = INSPIRATION_PATTERN.match(child.children[0].content).group(1)
  118. else:
  119. assert isinstance(child, List)
  120. for subchild in child.children:
  121. text = subchild.children[0].children[0].content
  122. if matches := INSPIRED_PATTERN.match(text):
  123. inspireds = matches.group(1).split(", ")
  124. osgl_games[inspiration] = inspireds
  125. # Find games and clones from OSGC
  126. osgc_originals = set()
  127. for original in originals():
  128. osgc_originals.add(original["name"])
  129. for name in original.get("names", []):
  130. osgc_originals.add(name)
  131. osgc_games = {game["name"] for game in games()}
  132. osgl_inspireds = {
  133. inspired
  134. for inspireds in osgl_games.values()
  135. for inspired in inspireds
  136. }
  137. for game in osgl_games:
  138. if game in BLACKLIST:
  139. continue
  140. # Exclude games that are open source clones to begin with
  141. if game in osgc_games and game not in osgc_originals:
  142. continue
  143. # Exclude transitive inspirations - we only want the originals
  144. if game in osgl_inspireds:
  145. continue
  146. alias = ALIASES.get(game)
  147. if game not in osgc_originals and (not alias or alias not in osgc_originals):
  148. print(f"Missing original: {game}")
  149. for inspired in osgl_inspireds:
  150. if inspired in BLACKLIST_CLONES:
  151. continue
  152. if inspired not in osgc_games:
  153. print(f"Missing clone: {inspired}")
  154. if __name__ == "__main__":
  155. main()