vercel.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. import json
  2. import re
  3. from typing import Any
  4. import quickjs
  5. from curl_cffi import requests
  6. session = requests.Session(impersonate="chrome107")
  7. def get_model_info() -> dict[str, Any]:
  8. url = "https://sdk.vercel.ai"
  9. response = session.get(url)
  10. html = response.text
  11. paths_regex = r"static\/chunks.+?\.js"
  12. separator_regex = r'"\]\)<\/script><script>self\.__next_f\.push\(\[.,"'
  13. paths = re.findall(paths_regex, html)
  14. paths = [re.sub(separator_regex, "", path) for path in paths]
  15. paths = list(set(paths))
  16. urls = [f"{url}/_next/{path}" for path in paths]
  17. scripts = [session.get(url).text for url in urls]
  18. models_regex = r'let .="\\n\\nHuman:\",r=(.+?),.='
  19. for script in scripts:
  20. matches = re.findall(models_regex, script)
  21. if matches:
  22. models_str = matches[0]
  23. stop_sequences_regex = r"(?<=stopSequences:{value:\[)\D(?<!\])"
  24. models_str = re.sub(
  25. stop_sequences_regex, re.escape('"\\n\\nHuman:"'), models_str
  26. )
  27. context = quickjs.Context() # type: ignore
  28. json_str: str = context.eval(f"({models_str})").json() # type: ignore
  29. return json.loads(json_str) # type: ignore
  30. return {}
  31. def convert_model_info(models: dict[str, Any]) -> dict[str, Any]:
  32. model_info: dict[str, Any] = {}
  33. for model_name, params in models.items():
  34. default_params = params_to_default_params(params["parameters"])
  35. model_info[model_name] = {"id": params["id"], "default_params": default_params}
  36. return model_info
  37. def params_to_default_params(parameters: dict[str, Any]):
  38. defaults: dict[str, Any] = {}
  39. for key, parameter in parameters.items():
  40. if key == "maximumLength":
  41. key = "maxTokens"
  42. defaults[key] = parameter["value"]
  43. return defaults
  44. def get_model_names(model_info: dict[str, Any]):
  45. model_names = model_info.keys()
  46. model_names = [
  47. name
  48. for name in model_names
  49. if name not in ["openai:gpt-4", "openai:gpt-3.5-turbo"]
  50. ]
  51. model_names.sort()
  52. return model_names
  53. def print_providers(model_names: list[str]):
  54. for name in model_names:
  55. split_name = re.split(r":|/", name)
  56. base_provider = split_name[0]
  57. variable_name = split_name[-1].replace("-", "_").replace(".", "")
  58. line = f'{variable_name} = Model(name="{name}", base_provider="{base_provider}", best_provider=Vercel,)\n'
  59. print(line)
  60. def print_convert(model_names: list[str]):
  61. for name in model_names:
  62. split_name = re.split(r":|/", name)
  63. key = split_name[-1]
  64. variable_name = split_name[-1].replace("-", "_").replace(".", "")
  65. # "claude-instant-v1": claude_instant_v1,
  66. line = f' "{key}": {variable_name},'
  67. print(line)
  68. def main():
  69. model_info = get_model_info()
  70. model_info = convert_model_info(model_info)
  71. print(json.dumps(model_info, indent=2))
  72. model_names = get_model_names(model_info)
  73. print("-------" * 40)
  74. print_providers(model_names)
  75. print("-------" * 40)
  76. print_convert(model_names)
  77. if __name__ == "__main__":
  78. main()