__init__.py 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. from __future__ import annotations
  2. from urllib.parse import urlparse
  3. try:
  4. from curl_cffi.requests import Session
  5. from .curl_cffi import StreamResponse, StreamSession
  6. has_curl_cffi = True
  7. except ImportError:
  8. from typing import Type as Session
  9. from .aiohttp import StreamResponse, StreamSession
  10. has_curl_cffi = False
  11. from ..webdriver import WebDriver, WebDriverSession, bypass_cloudflare, get_driver_cookies
  12. from ..errors import MissingRequirementsError
  13. from .defaults import DEFAULT_HEADERS
  14. def get_args_from_browser(
  15. url: str,
  16. webdriver: WebDriver = None,
  17. proxy: str = None,
  18. timeout: int = 120,
  19. do_bypass_cloudflare: bool = True
  20. ) -> dict:
  21. """
  22. Create a Session object using a WebDriver to handle cookies and headers.
  23. Args:
  24. url (str): The URL to navigate to using the WebDriver.
  25. webdriver (WebDriver, optional): The WebDriver instance to use.
  26. proxy (str, optional): Proxy server to use for the Session.
  27. timeout (int, optional): Timeout in seconds for the WebDriver.
  28. Returns:
  29. Session: A Session object configured with cookies and headers from the WebDriver.
  30. """
  31. with WebDriverSession(webdriver, "", proxy=proxy, virtual_display=False) as driver:
  32. if do_bypass_cloudflare:
  33. bypass_cloudflare(driver, url, timeout)
  34. cookies = get_driver_cookies(driver)
  35. user_agent = driver.execute_script("return navigator.userAgent")
  36. parse = urlparse(url)
  37. return {
  38. 'cookies': cookies,
  39. 'headers': {
  40. **DEFAULT_HEADERS,
  41. 'Authority': parse.netloc,
  42. 'Origin': f'{parse.scheme}://{parse.netloc}',
  43. 'Referer': url,
  44. 'User-Agent': user_agent,
  45. },
  46. }
  47. def get_session_from_browser(url: str, webdriver: WebDriver = None, proxy: str = None, timeout: int = 120) -> Session:
  48. if not has_curl_cffi:
  49. raise MissingRequirementsError('Install "curl_cffi" package')
  50. args = get_args_from_browser(url, webdriver, proxy, timeout)
  51. return Session(
  52. **args,
  53. proxies={"https": proxy, "http": proxy},
  54. timeout=timeout,
  55. impersonate="chrome110"
  56. )