Phind.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. from __future__ import annotations
  2. import re
  3. import json
  4. from urllib import parse
  5. from datetime import datetime
  6. from ...typing import AsyncResult, Messages
  7. from ..base_provider import AsyncGeneratorProvider
  8. from ...requests import StreamSession
  9. class Phind(AsyncGeneratorProvider):
  10. url = "https://www.phind.com"
  11. working = False
  12. lockdown = True
  13. supports_stream = True
  14. supports_message_history = True
  15. @classmethod
  16. async def create_async_generator(
  17. cls,
  18. model: str,
  19. messages: Messages,
  20. proxy: str = None,
  21. timeout: int = 120,
  22. creative_mode: bool = False,
  23. **kwargs
  24. ) -> AsyncResult:
  25. headers = {
  26. "Accept": "*/*",
  27. "Origin": cls.url,
  28. "Referer": f"{cls.url}/search",
  29. "Sec-Fetch-Dest": "empty",
  30. "Sec-Fetch-Mode": "cors",
  31. "Sec-Fetch-Site": "same-origin",
  32. }
  33. async with StreamSession(
  34. headers=headers,
  35. impersonate="chrome",
  36. proxies={"https": proxy},
  37. timeout=timeout
  38. ) as session:
  39. url = "https://www.phind.com/search?home=true"
  40. async with session.get(url) as response:
  41. text = await response.text()
  42. match = re.search(r'<script id="__NEXT_DATA__" type="application/json">(?P<json>[\S\s]+?)</script>', text)
  43. data = json.loads(match.group("json"))
  44. challenge_seeds = data["props"]["pageProps"]["challengeSeeds"]
  45. prompt = messages[-1]["content"]
  46. data = {
  47. "question": prompt,
  48. "question_history": [
  49. message["content"] for message in messages[:-1] if message["role"] == "user"
  50. ],
  51. "answer_history": [
  52. message["content"] for message in messages if message["role"] == "assistant"
  53. ],
  54. "webResults": [],
  55. "options": {
  56. "date": datetime.now().strftime("%d.%m.%Y"),
  57. "language": "en-US",
  58. "detailed": True,
  59. "anonUserId": "",
  60. "answerModel": "GPT-4" if model.startswith("gpt-4") else "Phind-34B",
  61. "creativeMode": creative_mode,
  62. "customLinks": []
  63. },
  64. "context": "\n".join([message["content"] for message in messages if message["role"] == "system"]),
  65. }
  66. data["challenge"] = generate_challenge(data, **challenge_seeds)
  67. async with session.post(f"https://https.api.phind.com/infer/", headers=headers, json=data) as response:
  68. new_line = False
  69. async for line in response.iter_lines():
  70. if line.startswith(b"data: "):
  71. chunk = line[6:]
  72. if chunk.startswith(b'<PHIND_DONE/>'):
  73. break
  74. if chunk.startswith(b'<PHIND_BACKEND_ERROR>'):
  75. raise RuntimeError(f"Response: {chunk.decode()}")
  76. if chunk.startswith(b'<PHIND_WEBRESULTS>') or chunk.startswith(b'<PHIND_FOLLOWUP>'):
  77. pass
  78. elif chunk.startswith(b"<PHIND_METADATA>") or chunk.startswith(b"<PHIND_INDICATOR>"):
  79. pass
  80. elif chunk.startswith(b"<PHIND_SPAN_BEGIN>") or chunk.startswith(b"<PHIND_SPAN_END>"):
  81. pass
  82. elif chunk:
  83. yield chunk.decode()
  84. elif new_line:
  85. yield "\n"
  86. new_line = False
  87. else:
  88. new_line = True
  89. def deterministic_stringify(obj):
  90. def handle_value(value):
  91. if isinstance(value, (dict, list)):
  92. if isinstance(value, list):
  93. return '[' + ','.join(sorted(map(handle_value, value))) + ']'
  94. else: # It's a dict
  95. return '{' + deterministic_stringify(value) + '}'
  96. elif isinstance(value, bool):
  97. return 'true' if value else 'false'
  98. elif isinstance(value, (int, float)):
  99. return format(value, '.8f').rstrip('0').rstrip('.')
  100. elif isinstance(value, str):
  101. return f'"{value}"'
  102. else:
  103. return 'null'
  104. items = sorted(obj.items(), key=lambda x: x[0])
  105. return ','.join([f'{k}:{handle_value(v)}' for k, v in items if handle_value(v) is not None])
  106. def prng_general(seed, multiplier, addend, modulus):
  107. a = seed * multiplier + addend
  108. if a < 0:
  109. return ((a%modulus)-modulus)/modulus
  110. else:
  111. return a%modulus/modulus
  112. def generate_challenge_seed(l):
  113. I = deterministic_stringify(l)
  114. d = parse.quote(I, safe='')
  115. return simple_hash(d)
  116. def simple_hash(s):
  117. d = 0
  118. for char in s:
  119. if len(char) > 1 or ord(char) >= 256:
  120. continue
  121. d = ((d << 5) - d + ord(char[0])) & 0xFFFFFFFF
  122. if d > 0x7FFFFFFF: # 2147483647
  123. d -= 0x100000000 # Subtract 2**32
  124. return d
  125. def generate_challenge(obj, **kwargs):
  126. return prng_general(
  127. seed=generate_challenge_seed(obj),
  128. **kwargs
  129. )