requests.py 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. from __future__ import annotations
  2. from urllib.parse import urlparse
  3. try:
  4. from curl_cffi.requests import Session
  5. from .requests_curl_cffi import StreamResponse, StreamSession
  6. has_curl_cffi = True
  7. except ImportError:
  8. from typing import Type as Session
  9. from .requests_aiohttp import StreamResponse, StreamSession
  10. has_curl_cffi = False
  11. from .webdriver import WebDriver, WebDriverSession, bypass_cloudflare, get_driver_cookies
  12. from .errors import MissingRequirementsError
  13. from .defaults import DEFAULT_HEADERS
  14. def get_args_from_browser(url: str, webdriver: WebDriver = None, proxy: str = None, timeout: int = 120) -> dict:
  15. """
  16. Create a Session object using a WebDriver to handle cookies and headers.
  17. Args:
  18. url (str): The URL to navigate to using the WebDriver.
  19. webdriver (WebDriver, optional): The WebDriver instance to use.
  20. proxy (str, optional): Proxy server to use for the Session.
  21. timeout (int, optional): Timeout in seconds for the WebDriver.
  22. Returns:
  23. Session: A Session object configured with cookies and headers from the WebDriver.
  24. """
  25. with WebDriverSession(webdriver, "", proxy=proxy, virtual_display=False) as driver:
  26. bypass_cloudflare(driver, url, timeout)
  27. cookies = get_driver_cookies(driver)
  28. user_agent = driver.execute_script("return navigator.userAgent")
  29. parse = urlparse(url)
  30. return {
  31. 'cookies': cookies,
  32. 'headers': {
  33. **DEFAULT_HEADERS,
  34. 'Authority': parse.netloc,
  35. 'Origin': f'{parse.scheme}://{parse.netloc}',
  36. 'Referer': url,
  37. 'User-Agent': user_agent,
  38. },
  39. }
  40. def get_session_from_browser(url: str, webdriver: WebDriver = None, proxy: str = None, timeout: int = 120) -> Session:
  41. if not has_curl_cffi:
  42. raise MissingRequirementsError('Install "curl_cffi" package')
  43. args = get_args_from_browser(url, webdriver, proxy, timeout)
  44. return Session(
  45. **args,
  46. proxies={"https": proxy, "http": proxy},
  47. timeout=timeout,
  48. impersonate="chrome110"
  49. )