opengaming
/
osgameclones
peilaus alkaen https://github.com/opengaming/osgameclones


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
							"""
Scrape ScummVM supported games at good or excellent level, and create YAML clone templates

Uses libraries:
- aiohttp
- asyncio
- beautifulsoup4
- tenacity
"""
import asyncio
from pathlib import Path
from typing import Container, Optional

import aiohttp
import yaml
from bs4 import BeautifulSoup
from tenacity import stop_after_attempt, retry, wait_exponential

SCUMMVM_LIST = "https://www.scummvm.org/compatibility/"
SCUMMVM_BASE_URL = "https://www.scummvm.org"
SUPPORT_LEVELS = {"Good", "Excellent"}
PLATFORM_ALIASES = {
    "Apple IIgs": "Apple II",
    "Atari ST": "Atari",
    "Macintosh": "Classic Mac OS",
    "Steam": "Windows",
    "Tandy Color Computer 3": "CoCo",
}

async def main():
    # Get list of OSGC originals
    osgc_originals = set()
    for p in Path("originals").iterdir():
        if p.is_file() and p.suffix == ".yaml":
            originals = yaml.safe_load(open(p, encoding="utf-8"))
            for original in originals:
                osgc_originals.add(original["name"])

    # Get platforms
    platforms = yaml.safe_load(open(Path("schema") / "originals.yaml", encoding="utf-8"))["schema;platforms"]["enum"]

    # Get list of games
    async with aiohttp.ClientSession() as session:
        async with session.get(SCUMMVM_LIST) as resp:
            content = await resp.text()
        soup = BeautifulSoup(content, "html.parser")
        game_links = {}
        for td_name, td_support_level in zip(soup.find_all("td", class_="gameFullName"), soup.find_all("td", class_="gameSupportLevel")):
            # Filter out those that aren't good enough
            if td_support_level.text not in SUPPORT_LEVELS:
                continue
            game_links[td_name.text] = SCUMMVM_BASE_URL + td_name.a.attrs["href"]

        # Generate originals list
        originals = list(game_links)
        print("ScummVM originals:")
        for original in sorted(originals):
            print(f"- {original}")

        # Filter out those we already have
        missing_originals = {original for original in originals if original not in osgc_originals}

        for original in missing_originals:
            if game_info := await scrape_game_info(session, game_links[original], platforms):
                print(yaml.dump(game_info))


@retry(stop=stop_after_attempt(10), wait=wait_exponential(multiplier=1, min=2, max=10))
async def scrape_game_info(session: aiohttp.ClientSession, link: str, platforms: Container[str]) -> Optional[dict]:
    # Go to game subpage
    async with session.get(link) as resp:
        content = await resp.text()
    soup = BeautifulSoup(content, "html.parser")

    # Don't add games that aren't clones
    if soup.find("a", string="ScummVM Freeware Games"):
        return None

    # Generate game entry, with name
    game = {
        "name": soup.find("td", class_="gameFullName").text,
        "external": {},
        "platforms": [],
        "meta": {
            "genres": [],
            "themes": [],
        }
    }
    # Add Supported Platforms
    supported_platforms_title = soup.find("h3", string="Supported Platforms")
    if supported_platforms_lis := supported_platforms_title.find_next_sibling("ul"):
        for li in supported_platforms_lis.find_all("li"):
            platform = li.text.strip()
            platform = PLATFORM_ALIASES.get(platform, platform)
            if platform not in platforms:
                print(f"{platform=} unknown")
            elif platform not in game["platforms"]:
                game["platforms"].append(platform)

    # Find links
    if wikipedia_link := soup.find("a", string="Wikipedia"):
        game["external"]["wikipedia"] = wikipedia_name(wikipedia_link.attrs["href"])
    if mobygames_link := soup.find("a", string="MobyGames"):
        game["external"]["website"] = mobygames_link.attrs["href"]
    if not wikipedia_link and not mobygames_link:
        # Use ScummVM wiki as fallback
        if scummvm_link := soup.find("a", string="ScummVM Wiki"):
            game["external"]["website"] = scummvm_link.attrs["href"]
        else:
            print(f"Cannot find link for {game['name']}")

    return game


def wikipedia_name(link: str) -> str:
    """
    >>> wikipedia_name("https://en.wikipedia.org/wiki/Operation_Stealth")
    'Operation Stealth'
    """
    return link.split("/")[-1].replace("_", " ")


if __name__ == "__main__":
    asyncio.run(main())