DeepInfra.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. from __future__ import annotations
  2. import json
  3. import requests
  4. from ..typing import AsyncResult, Messages
  5. from .base_provider import AsyncGeneratorProvider, ProviderModelMixin
  6. from ..requests import StreamSession
  7. class DeepInfra(AsyncGeneratorProvider, ProviderModelMixin):
  8. url = "https://deepinfra.com"
  9. working = True
  10. supports_stream = True
  11. supports_message_history = True
  12. default_model = 'meta-llama/Llama-2-70b-chat-hf'
  13. @classmethod
  14. def get_models(cls):
  15. if not cls.models:
  16. url = 'https://api.deepinfra.com/models/featured'
  17. models = requests.get(url).json()
  18. cls.models = [model['model_name'] for model in models]
  19. return cls.models
  20. @classmethod
  21. async def create_async_generator(
  22. cls,
  23. model: str,
  24. messages: Messages,
  25. stream: bool,
  26. proxy: str = None,
  27. timeout: int = 120,
  28. auth: str = None,
  29. **kwargs
  30. ) -> AsyncResult:
  31. headers = {
  32. 'Accept-Encoding': 'gzip, deflate, br',
  33. 'Accept-Language': 'en-US',
  34. 'Connection': 'keep-alive',
  35. 'Content-Type': 'application/json',
  36. 'Origin': 'https://deepinfra.com',
  37. 'Referer': 'https://deepinfra.com/',
  38. 'Sec-Fetch-Dest': 'empty',
  39. 'Sec-Fetch-Mode': 'cors',
  40. 'Sec-Fetch-Site': 'same-site',
  41. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
  42. 'X-Deepinfra-Source': 'web-embed',
  43. 'accept': 'text/event-stream',
  44. 'sec-ch-ua': '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"',
  45. 'sec-ch-ua-mobile': '?0',
  46. 'sec-ch-ua-platform': '"macOS"',
  47. }
  48. if auth:
  49. headers['Authorization'] = f"bearer {auth}"
  50. async with StreamSession(headers=headers,
  51. timeout=timeout,
  52. proxies={"https": proxy},
  53. impersonate="chrome110"
  54. ) as session:
  55. json_data = {
  56. 'model' : cls.get_model(model),
  57. 'messages': messages,
  58. 'temperature': kwargs.get("temperature", 0.7),
  59. 'max_tokens': kwargs.get("max_tokens", 512),
  60. 'stop': kwargs.get("stop", []),
  61. 'stream' : True
  62. }
  63. async with session.post('https://api.deepinfra.com/v1/openai/chat/completions',
  64. json=json_data) as response:
  65. response.raise_for_status()
  66. first = True
  67. async for line in response.iter_lines():
  68. if not line.startswith(b"data: "):
  69. continue
  70. try:
  71. json_line = json.loads(line[6:])
  72. choices = json_line.get("choices", [{}])
  73. finish_reason = choices[0].get("finish_reason")
  74. if finish_reason:
  75. break
  76. token = choices[0].get("delta", {}).get("content")
  77. if token:
  78. if first:
  79. token = token.lstrip()
  80. if token:
  81. first = False
  82. yield token
  83. except Exception:
  84. raise RuntimeError(f"Response: {line}")