123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345 |
- #!/usr/bin/env python3
- """Manages page redirects for the Godot documentation on ReadTheDocs. (https://docs.godotengine.org)
- Note that RTD redirects only apply in case of 404 errors, and to all branches and languages:
- https://docs.readthedocs.io/en/stable/user-defined-redirects.html.
- If this ever changes, we need to rework how we manage these (likely adding per-branch logic).
- How to use:
- - Install requirements: pip3 install -r requirements.txt
- - Store your API token in RTD_API_TOKEN environment variable or
- a .env file (the latter requires the package dotenv)
- - Generate new redirects from two git revisions using convert_git_renames_to_csv.py
- - Run this script
- Example:
- python convert_git_renames_to_csv.py stable latest >> redirects.csv
- python create_redirects.py
- This would add all files that were renamed in latest from stable to redirects.csv,
- and then create the redirects on RTD accordingly.
- Make sure to use the old branch first, then the more recent branch (i.e., stable > master).
- You need to have both branches or revisions available and up to date locally.
- Care is taken to not add redirects that already exist on RTD.
- """
- import argparse
- import csv
- import os
- import time
- import requests
- from requests.models import default_hooks
- from requests.adapters import HTTPAdapter
- from requests.packages.urllib3.util.retry import Retry
- RTD_AUTH_TOKEN = ""
- REQUEST_HEADERS = ""
- REDIRECT_URL = "https://readthedocs.org/api/v3/projects/godot/redirects/"
- USER_AGENT = "Godot RTD Redirects on Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"
- DEFAULT_PAGINATED_SIZE = 1024
- API_SLEEP_TIME = 0.2 # Seconds.
- REDIRECT_SUFFIXES = [".html", "/"]
- BUILD_PATH = "../../_build/html"
- TIMEOUT_SECONDS = 5
- HTTP = None
- def parse_command_line_args():
- parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
- parser.add_argument(
- "-f",
- "--file",
- metavar="file",
- default="redirects.csv",
- type=str,
- help="Path to a CSV file used to keep a list of redirects, containing two columns: source and destination.",
- )
- parser.add_argument(
- "--delete",
- action="store_true",
- help="Deletes all currently setup 'page' and 'exact' redirects on ReadTheDocs.",
- )
- parser.add_argument(
- "--dry-run",
- action="store_true",
- help="Safe mode: Run the program and output information without any calls to the ReadTheDocs API.",
- )
- parser.add_argument(
- "--dump",
- action="store_true",
- help="Only dumps or deletes (if --delete) existing RTD redirects, skips submission.",
- )
- parser.add_argument(
- "-v",
- "--verbose",
- action="store_true",
- help="Enables verbose output.",
- )
- parser.add_argument(
- "--validate",
- action="store_true",
- help="Validates each redirect by checking the target page exists. Implies --dry-run.",
- )
- return parser.parse_args()
- def is_dry_run(args):
- return args.dry_run or args.validate
- def validate(destination):
- p = BUILD_PATH + destination
- if not os.path.exists(p):
- print("Invalid destination: " + destination + " (" + p + ")")
- def make_redirect(source, destination, args, retry=0):
- if args.validate:
- validate(destination)
- json_data = {"from_url": source, "to_url": destination, "type": "page"}
- headers = REQUEST_HEADERS
- if args.verbose:
- print("POST " + REDIRECT_URL, headers, json_data)
- if is_dry_run(args):
- if not args.validate:
- print(f"Created redirect {source} -> {destination} (DRY RUN)")
- return
- response = HTTP.post(
- REDIRECT_URL,
- json=json_data,
- headers=headers,
- timeout=TIMEOUT_SECONDS
- )
- if response.status_code == 201:
- print(f"Created redirect {source} -> {destination}")
- elif response.status_code == 429 and retry<5:
- retry += 1
- time.sleep(retry*retry)
- make_redirect(source, destination, args, retry)
- return
- else:
- print(
- f"Failed to create redirect {source} -> {destination}. "
- f"Status code: {response.status_code}"
- )
- exit(1)
- def sleep():
- time.sleep(API_SLEEP_TIME)
- def id(from_url, to_url):
- return from_url + " -> " + to_url
- def get_paginated(url, parameters={"limit": DEFAULT_PAGINATED_SIZE}):
- entries = []
- count = -1
- while True:
- data = HTTP.get(
- url,
- headers=REQUEST_HEADERS,
- params=parameters,
- timeout=TIMEOUT_SECONDS
- )
- if data.status_code != 200:
- if data.status_code == 401:
- print("Access denied, check RTD API key in RTD_AUTH_TOKEN!")
- print("Error accessing RTD API: " + url + ": " + str(data.status_code))
- exit(1)
- else:
- json = data.json()
- if json["count"] and count < 0:
- count = json["count"]
- entries.extend(json["results"])
- next = json["next"]
- if next and len(next) > 0 and next != url:
- url = next
- sleep()
- continue
- if count > 0 and len(entries) != count:
- print(
- "Mismatch getting paginated content from " + url + ": " +
- "expected " + str(count) + " items, got " + str(len(entries)))
- exit(1)
- return entries
- def delete_redirect(id):
- url = REDIRECT_URL + str(id)
- data = HTTP.delete(url, headers=REQUEST_HEADERS, timeout=TIMEOUT_SECONDS)
- if data.status_code != 204:
- print("Error deleting redirect with ID", id, "- code:", data.status_code)
- exit(1)
- else:
- print("Deleted redirect", id, "on RTD.")
- def get_existing_redirects(delete=False):
- redirs = get_paginated(REDIRECT_URL)
- existing = []
- for redir in redirs:
- if redir["type"] != "page":
- print(
- "Ignoring redirect (only type 'page' is handled): #" +
- str(redir["pk"]) + " " + id(redir["from_url"], redir["to_url"]) +
- " on ReadTheDocs is '" + redir["type"] + "'. "
- )
- continue
- if delete:
- delete_redirect(redir["pk"])
- sleep()
- else:
- existing.append([redir["from_url"], redir["to_url"]])
- return existing
- def set_auth(token):
- global RTD_AUTH_TOKEN
- RTD_AUTH_TOKEN = token
- global REQUEST_HEADERS
- REQUEST_HEADERS = {"Authorization": f"token {RTD_AUTH_TOKEN}", "User-Agent": USER_AGENT}
- def load_auth():
- try:
- import dotenv
- dotenv.load_dotenv()
- except:
- print("Failed to load dotenv. If you want to use .env files, install the dotenv.")
- token = os.environ.get("RTD_AUTH_TOKEN", "")
- if len(token) < 1:
- print("Missing auth token in RTD_AUTH_TOKEN env var or .env file not found. Aborting.")
- exit(1)
- set_auth(token)
- def has_suffix(s, suffixes):
- for suffix in suffixes:
- if s.endswith(suffix):
- return True
- return False
- def is_valid_redirect_url(url):
- if len(url) < len("/a"):
- return False
- if not has_suffix(url.lower(), REDIRECT_SUFFIXES):
- return False
- return True
- def redirect_to_str(item):
- return id(item[0], item[1])
- def main():
- args = parse_command_line_args()
- if not is_dry_run(args):
- load_auth()
- retry_strategy = Retry(
- total=3,
- status_forcelist=[429, 500, 502, 503, 504],
- backoff_factor=2,
- method_whitelist=["HEAD", "GET", "PUT", "DELETE", "OPTIONS", "TRACE"]
- )
- adapter = HTTPAdapter(max_retries=retry_strategy)
- global HTTP
- HTTP = requests.Session()
- HTTP.mount("https://", adapter)
- HTTP.mount("http://", adapter)
- to_add = []
- redirects_file = []
- with open(args.file, "r", encoding="utf-8") as f:
- redirects_file = list(csv.DictReader(f))
- if len(redirects_file) > 0:
- assert redirects_file[0].keys() == {
- "source",
- "destination",
- }, "CSV file must have a header and two columns: source, destination."
- for row in redirects_file:
- to_add.append([row["source"], row["destination"]])
- print("Loaded", len(redirects_file), "redirects from", args.file + ".")
- existing = []
- if not is_dry_run(args):
- existing = get_existing_redirects(args.delete)
- print("Loaded", len(existing), "existing redirects from RTD.")
- print("Total redirects:", str(len(to_add)) +
- " new + " + str(len(existing)), "existing =", to_add+existing, "total")
- redirects = []
- added = {}
- sources = {}
- for redirect in to_add:
- if len(redirect) != 2:
- print("Invalid redirect:", redirect, "- expected 2 elements, got:", len(redirect))
- continue
- if redirect[0] == redirect[1]:
- print("Invalid redirect:", redirect, "- redirects to itself!")
- continue
- if not is_valid_redirect_url(redirect[0]) or not is_valid_redirect_url(redirect[1]):
- print("Invalid redirect:", redirect, "- invalid URL!")
- continue
- if not redirect[0].startswith("/") or not redirect[1].startswith("/"):
- print("Invalid redirect:", redirect, "- invalid URL: should start with slash!")
- continue
- if redirect[0] in sources:
- print("Invalid redirect:", redirect,
- "- collision, source", redirect[0], "already has redirect:",
- sources[redirect[0]])
- continue
- redirect_id = id(redirect[0], redirect[1])
- if redirect_id in added:
- # Duplicate; skip.
- continue
- added[redirect_id] = True
- sources[redirect[0]] = redirect
- redirects.append(redirect)
- redirects.sort(key=redirect_to_str)
- with open(args.file, "w", encoding="utf-8", newline="") as f:
- writer = csv.writer(f)
- writer.writerows([["source", "destination"]])
- writer.writerows(redirects)
- existing_ids = {}
- for e in existing:
- existing_ids[id(e[0], e[1])] = True
- if not args.dump:
- print("Creating redirects.")
- for redirect in redirects:
- if not id(redirect[0], redirect[1]) in existing_ids:
- make_redirect(redirect[0], redirect[1], args)
- if not is_dry_run(args):
- sleep()
- print("Finished creating", len(redirects), "redirects.")
- if is_dry_run(args):
- print("THIS WAS A DRY RUN, NOTHING WAS SUBMITTED TO READTHEDOCS!")
- if __name__ == "__main__":
- main()
|