create_redirects.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345
  1. #!/usr/bin/env python3
  2. """Manages page redirects for the Godot documentation on ReadTheDocs. (https://docs.godotengine.org)
  3. Note that RTD redirects only apply in case of 404 errors, and to all branches and languages:
  4. https://docs.readthedocs.io/en/stable/user-defined-redirects.html.
  5. If this ever changes, we need to rework how we manage these (likely adding per-branch logic).
  6. How to use:
  7. - Install requirements: pip3 install -r requirements.txt
  8. - Store your API token in RTD_API_TOKEN environment variable or
  9. a .env file (the latter requires the package dotenv)
  10. - Generate new redirects from two git revisions using convert_git_renames_to_csv.py
  11. - Run this script
  12. Example:
  13. python convert_git_renames_to_csv.py stable latest >> redirects.csv
  14. python create_redirects.py
  15. This would add all files that were renamed in latest from stable to redirects.csv,
  16. and then create the redirects on RTD accordingly.
  17. Make sure to use the old branch first, then the more recent branch (i.e., stable > master).
  18. You need to have both branches or revisions available and up to date locally.
  19. Care is taken to not add redirects that already exist on RTD.
  20. """
  21. import argparse
  22. import csv
  23. import os
  24. import time
  25. import requests
  26. from requests.models import default_hooks
  27. from requests.adapters import HTTPAdapter
  28. from requests.packages.urllib3.util.retry import Retry
  29. RTD_AUTH_TOKEN = ""
  30. REQUEST_HEADERS = ""
  31. REDIRECT_URL = "https://readthedocs.org/api/v3/projects/godot/redirects/"
  32. USER_AGENT = "Godot RTD Redirects on Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"
  33. DEFAULT_PAGINATED_SIZE = 1024
  34. API_SLEEP_TIME = 0.2 # Seconds.
  35. REDIRECT_SUFFIXES = [".html", "/"]
  36. BUILD_PATH = "../../_build/html"
  37. TIMEOUT_SECONDS = 5
  38. HTTP = None
  39. def parse_command_line_args():
  40. parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
  41. parser.add_argument(
  42. "-f",
  43. "--file",
  44. metavar="file",
  45. default="redirects.csv",
  46. type=str,
  47. help="Path to a CSV file used to keep a list of redirects, containing two columns: source and destination.",
  48. )
  49. parser.add_argument(
  50. "--delete",
  51. action="store_true",
  52. help="Deletes all currently setup 'page' and 'exact' redirects on ReadTheDocs.",
  53. )
  54. parser.add_argument(
  55. "--dry-run",
  56. action="store_true",
  57. help="Safe mode: Run the program and output information without any calls to the ReadTheDocs API.",
  58. )
  59. parser.add_argument(
  60. "--dump",
  61. action="store_true",
  62. help="Only dumps or deletes (if --delete) existing RTD redirects, skips submission.",
  63. )
  64. parser.add_argument(
  65. "-v",
  66. "--verbose",
  67. action="store_true",
  68. help="Enables verbose output.",
  69. )
  70. parser.add_argument(
  71. "--validate",
  72. action="store_true",
  73. help="Validates each redirect by checking the target page exists. Implies --dry-run.",
  74. )
  75. return parser.parse_args()
  76. def is_dry_run(args):
  77. return args.dry_run or args.validate
  78. def validate(destination):
  79. p = BUILD_PATH + destination
  80. if not os.path.exists(p):
  81. print("Invalid destination: " + destination + " (" + p + ")")
  82. def make_redirect(source, destination, args, retry=0):
  83. if args.validate:
  84. validate(destination)
  85. json_data = {"from_url": source, "to_url": destination, "type": "page"}
  86. headers = REQUEST_HEADERS
  87. if args.verbose:
  88. print("POST " + REDIRECT_URL, headers, json_data)
  89. if is_dry_run(args):
  90. if not args.validate:
  91. print(f"Created redirect {source} -> {destination} (DRY RUN)")
  92. return
  93. response = HTTP.post(
  94. REDIRECT_URL,
  95. json=json_data,
  96. headers=headers,
  97. timeout=TIMEOUT_SECONDS
  98. )
  99. if response.status_code == 201:
  100. print(f"Created redirect {source} -> {destination}")
  101. elif response.status_code == 429 and retry<5:
  102. retry += 1
  103. time.sleep(retry*retry)
  104. make_redirect(source, destination, args, retry)
  105. return
  106. else:
  107. print(
  108. f"Failed to create redirect {source} -> {destination}. "
  109. f"Status code: {response.status_code}"
  110. )
  111. exit(1)
  112. def sleep():
  113. time.sleep(API_SLEEP_TIME)
  114. def id(from_url, to_url):
  115. return from_url + " -> " + to_url
  116. def get_paginated(url, parameters={"limit": DEFAULT_PAGINATED_SIZE}):
  117. entries = []
  118. count = -1
  119. while True:
  120. data = HTTP.get(
  121. url,
  122. headers=REQUEST_HEADERS,
  123. params=parameters,
  124. timeout=TIMEOUT_SECONDS
  125. )
  126. if data.status_code != 200:
  127. if data.status_code == 401:
  128. print("Access denied, check RTD API key in RTD_AUTH_TOKEN!")
  129. print("Error accessing RTD API: " + url + ": " + str(data.status_code))
  130. exit(1)
  131. else:
  132. json = data.json()
  133. if json["count"] and count < 0:
  134. count = json["count"]
  135. entries.extend(json["results"])
  136. next = json["next"]
  137. if next and len(next) > 0 and next != url:
  138. url = next
  139. sleep()
  140. continue
  141. if count > 0 and len(entries) != count:
  142. print(
  143. "Mismatch getting paginated content from " + url + ": " +
  144. "expected " + str(count) + " items, got " + str(len(entries)))
  145. exit(1)
  146. return entries
  147. def delete_redirect(id):
  148. url = REDIRECT_URL + str(id)
  149. data = HTTP.delete(url, headers=REQUEST_HEADERS, timeout=TIMEOUT_SECONDS)
  150. if data.status_code != 204:
  151. print("Error deleting redirect with ID", id, "- code:", data.status_code)
  152. exit(1)
  153. else:
  154. print("Deleted redirect", id, "on RTD.")
  155. def get_existing_redirects(delete=False):
  156. redirs = get_paginated(REDIRECT_URL)
  157. existing = []
  158. for redir in redirs:
  159. if redir["type"] != "page":
  160. print(
  161. "Ignoring redirect (only type 'page' is handled): #" +
  162. str(redir["pk"]) + " " + id(redir["from_url"], redir["to_url"]) +
  163. " on ReadTheDocs is '" + redir["type"] + "'. "
  164. )
  165. continue
  166. if delete:
  167. delete_redirect(redir["pk"])
  168. sleep()
  169. else:
  170. existing.append([redir["from_url"], redir["to_url"]])
  171. return existing
  172. def set_auth(token):
  173. global RTD_AUTH_TOKEN
  174. RTD_AUTH_TOKEN = token
  175. global REQUEST_HEADERS
  176. REQUEST_HEADERS = {"Authorization": f"token {RTD_AUTH_TOKEN}", "User-Agent": USER_AGENT}
  177. def load_auth():
  178. try:
  179. import dotenv
  180. dotenv.load_dotenv()
  181. except:
  182. print("Failed to load dotenv. If you want to use .env files, install the dotenv.")
  183. token = os.environ.get("RTD_AUTH_TOKEN", "")
  184. if len(token) < 1:
  185. print("Missing auth token in RTD_AUTH_TOKEN env var or .env file not found. Aborting.")
  186. exit(1)
  187. set_auth(token)
  188. def has_suffix(s, suffixes):
  189. for suffix in suffixes:
  190. if s.endswith(suffix):
  191. return True
  192. return False
  193. def is_valid_redirect_url(url):
  194. if len(url) < len("/a"):
  195. return False
  196. if not has_suffix(url.lower(), REDIRECT_SUFFIXES):
  197. return False
  198. return True
  199. def redirect_to_str(item):
  200. return id(item[0], item[1])
  201. def main():
  202. args = parse_command_line_args()
  203. if not is_dry_run(args):
  204. load_auth()
  205. retry_strategy = Retry(
  206. total=3,
  207. status_forcelist=[429, 500, 502, 503, 504],
  208. backoff_factor=2,
  209. method_whitelist=["HEAD", "GET", "PUT", "DELETE", "OPTIONS", "TRACE"]
  210. )
  211. adapter = HTTPAdapter(max_retries=retry_strategy)
  212. global HTTP
  213. HTTP = requests.Session()
  214. HTTP.mount("https://", adapter)
  215. HTTP.mount("http://", adapter)
  216. to_add = []
  217. redirects_file = []
  218. with open(args.file, "r", encoding="utf-8") as f:
  219. redirects_file = list(csv.DictReader(f))
  220. if len(redirects_file) > 0:
  221. assert redirects_file[0].keys() == {
  222. "source",
  223. "destination",
  224. }, "CSV file must have a header and two columns: source, destination."
  225. for row in redirects_file:
  226. to_add.append([row["source"], row["destination"]])
  227. print("Loaded", len(redirects_file), "redirects from", args.file + ".")
  228. existing = []
  229. if not is_dry_run(args):
  230. existing = get_existing_redirects(args.delete)
  231. print("Loaded", len(existing), "existing redirects from RTD.")
  232. print("Total redirects:", str(len(to_add)) +
  233. " new + " + str(len(existing)), "existing =", to_add+existing, "total")
  234. redirects = []
  235. added = {}
  236. sources = {}
  237. for redirect in to_add:
  238. if len(redirect) != 2:
  239. print("Invalid redirect:", redirect, "- expected 2 elements, got:", len(redirect))
  240. continue
  241. if redirect[0] == redirect[1]:
  242. print("Invalid redirect:", redirect, "- redirects to itself!")
  243. continue
  244. if not is_valid_redirect_url(redirect[0]) or not is_valid_redirect_url(redirect[1]):
  245. print("Invalid redirect:", redirect, "- invalid URL!")
  246. continue
  247. if not redirect[0].startswith("/") or not redirect[1].startswith("/"):
  248. print("Invalid redirect:", redirect, "- invalid URL: should start with slash!")
  249. continue
  250. if redirect[0] in sources:
  251. print("Invalid redirect:", redirect,
  252. "- collision, source", redirect[0], "already has redirect:",
  253. sources[redirect[0]])
  254. continue
  255. redirect_id = id(redirect[0], redirect[1])
  256. if redirect_id in added:
  257. # Duplicate; skip.
  258. continue
  259. added[redirect_id] = True
  260. sources[redirect[0]] = redirect
  261. redirects.append(redirect)
  262. redirects.sort(key=redirect_to_str)
  263. with open(args.file, "w", encoding="utf-8", newline="") as f:
  264. writer = csv.writer(f)
  265. writer.writerows([["source", "destination"]])
  266. writer.writerows(redirects)
  267. existing_ids = {}
  268. for e in existing:
  269. existing_ids[id(e[0], e[1])] = True
  270. if not args.dump:
  271. print("Creating redirects.")
  272. for redirect in redirects:
  273. if not id(redirect[0], redirect[1]) in existing_ids:
  274. make_redirect(redirect[0], redirect[1], args)
  275. if not is_dry_run(args):
  276. sleep()
  277. print("Finished creating", len(redirects), "redirects.")
  278. if is_dry_run(args):
  279. print("THIS WAS A DRY RUN, NOTHING WAS SUBMITTED TO READTHEDOCS!")
  280. if __name__ == "__main__":
  281. main()