requests.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. from __future__ import annotations
  2. import json
  3. from functools import partialmethod
  4. from typing import AsyncGenerator
  5. from urllib.parse import urlparse
  6. from curl_cffi.requests import AsyncSession, Session, Response
  7. from .webdriver import WebDriver, WebDriverSession, bypass_cloudflare, get_driver_cookies
  8. class StreamResponse:
  9. """
  10. A wrapper class for handling asynchronous streaming responses.
  11. Attributes:
  12. inner (Response): The original Response object.
  13. """
  14. def __init__(self, inner: Response) -> None:
  15. """Initialize the StreamResponse with the provided Response object."""
  16. self.inner: Response = inner
  17. async def text(self) -> str:
  18. """Asynchronously get the response text."""
  19. return await self.inner.atext()
  20. def raise_for_status(self) -> None:
  21. """Raise an HTTPError if one occurred."""
  22. self.inner.raise_for_status()
  23. async def json(self, **kwargs) -> dict:
  24. """Asynchronously parse the JSON response content."""
  25. return json.loads(await self.inner.acontent(), **kwargs)
  26. async def iter_lines(self) -> AsyncGenerator[bytes, None]:
  27. """Asynchronously iterate over the lines of the response."""
  28. async for line in self.inner.aiter_lines():
  29. yield line
  30. async def iter_content(self) -> AsyncGenerator[bytes, None]:
  31. """Asynchronously iterate over the response content."""
  32. async for chunk in self.inner.aiter_content():
  33. yield chunk
  34. async def __aenter__(self):
  35. """Asynchronously enter the runtime context for the response object."""
  36. inner: Response = await self.inner
  37. self.inner = inner
  38. self.request = inner.request
  39. self.status_code: int = inner.status_code
  40. self.reason: str = inner.reason
  41. self.ok: bool = inner.ok
  42. self.headers = inner.headers
  43. self.cookies = inner.cookies
  44. return self
  45. async def __aexit__(self, *args):
  46. """Asynchronously exit the runtime context for the response object."""
  47. await self.inner.aclose()
  48. class StreamSession(AsyncSession):
  49. """
  50. An asynchronous session class for handling HTTP requests with streaming.
  51. Inherits from AsyncSession.
  52. """
  53. def request(
  54. self, method: str, url: str, **kwargs
  55. ) -> StreamResponse:
  56. """Create and return a StreamResponse object for the given HTTP request."""
  57. return StreamResponse(super().request(method, url, stream=True, **kwargs))
  58. # Defining HTTP methods as partial methods of the request method.
  59. head = partialmethod(request, "HEAD")
  60. get = partialmethod(request, "GET")
  61. post = partialmethod(request, "POST")
  62. put = partialmethod(request, "PUT")
  63. patch = partialmethod(request, "PATCH")
  64. delete = partialmethod(request, "DELETE")
  65. def get_session_from_browser(url: str, webdriver: WebDriver = None, proxy: str = None, timeout: int = 120) -> Session:
  66. """
  67. Create a Session object using a WebDriver to handle cookies and headers.
  68. Args:
  69. url (str): The URL to navigate to using the WebDriver.
  70. webdriver (WebDriver, optional): The WebDriver instance to use.
  71. proxy (str, optional): Proxy server to use for the Session.
  72. timeout (int, optional): Timeout in seconds for the WebDriver.
  73. Returns:
  74. Session: A Session object configured with cookies and headers from the WebDriver.
  75. """
  76. with WebDriverSession(webdriver, "", proxy=proxy, virtual_display=True) as driver:
  77. bypass_cloudflare(driver, url, timeout)
  78. cookies = get_driver_cookies(driver)
  79. user_agent = driver.execute_script("return navigator.userAgent")
  80. parse = urlparse(url)
  81. return Session(
  82. cookies=cookies,
  83. headers={
  84. 'accept': '*/*',
  85. 'authority': parse.netloc,
  86. 'origin': f'{parse.scheme}://{parse.netloc}',
  87. 'referer': url,
  88. 'sec-fetch-dest': 'empty',
  89. 'sec-fetch-mode': 'cors',
  90. 'sec-fetch-site': 'same-origin',
  91. 'user-agent': user_agent
  92. },
  93. proxies={"https": proxy, "http": proxy},
  94. timeout=timeout,
  95. impersonate="chrome110"
  96. )