webdriver.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. from __future__ import annotations
  2. from platformdirs import user_config_dir
  3. from selenium.webdriver.remote.webdriver import WebDriver
  4. from undetected_chromedriver import Chrome, ChromeOptions
  5. from selenium.webdriver.common.by import By
  6. from selenium.webdriver.support.ui import WebDriverWait
  7. from selenium.webdriver.support import expected_conditions as EC
  8. from os import path
  9. from . import debug
  10. try:
  11. from pyvirtualdisplay import Display
  12. has_pyvirtualdisplay = True
  13. except ImportError:
  14. has_pyvirtualdisplay = False
  15. def get_browser(
  16. user_data_dir: str = None,
  17. headless: bool = False,
  18. proxy: str = None,
  19. options: ChromeOptions = None
  20. ) -> WebDriver:
  21. """
  22. Creates and returns a Chrome WebDriver with specified options.
  23. Args:
  24. user_data_dir (str, optional): Directory for user data. If None, uses default directory.
  25. headless (bool, optional): Whether to run the browser in headless mode. Defaults to False.
  26. proxy (str, optional): Proxy settings for the browser. Defaults to None.
  27. options (ChromeOptions, optional): ChromeOptions object with specific browser options. Defaults to None.
  28. Returns:
  29. WebDriver: An instance of WebDriver configured with the specified options.
  30. """
  31. if user_data_dir is None:
  32. user_data_dir = user_config_dir("g4f")
  33. if user_data_dir and debug.logging:
  34. print("Open browser with config dir:", user_data_dir)
  35. if not options:
  36. options = ChromeOptions()
  37. if proxy:
  38. options.add_argument(f'--proxy-server={proxy}')
  39. driver = '/usr/bin/chromedriver'
  40. if not path.isfile(driver):
  41. driver = None
  42. return Chrome(
  43. options=options,
  44. user_data_dir=user_data_dir,
  45. driver_executable_path=driver,
  46. headless=headless
  47. )
  48. def get_driver_cookies(driver: WebDriver) -> dict:
  49. """
  50. Retrieves cookies from the specified WebDriver.
  51. Args:
  52. driver (WebDriver): The WebDriver instance from which to retrieve cookies.
  53. Returns:
  54. dict: A dictionary containing cookies with their names as keys and values as cookie values.
  55. """
  56. return {cookie["name"]: cookie["value"] for cookie in driver.get_cookies()}
  57. def bypass_cloudflare(driver: WebDriver, url: str, timeout: int) -> None:
  58. """
  59. Attempts to bypass Cloudflare protection when accessing a URL using the provided WebDriver.
  60. Args:
  61. driver (WebDriver): The WebDriver to use for accessing the URL.
  62. url (str): The URL to access.
  63. timeout (int): Time in seconds to wait for the page to load.
  64. Raises:
  65. Exception: If there is an error while bypassing Cloudflare or loading the page.
  66. """
  67. driver.get(url)
  68. if driver.find_element(By.TAG_NAME, "body").get_attribute("class") == "no-js":
  69. if debug.logging:
  70. print("Cloudflare protection detected:", url)
  71. try:
  72. driver.switch_to.frame(driver.find_element(By.CSS_SELECTOR, "#turnstile-wrapper iframe"))
  73. WebDriverWait(driver, 5).until(
  74. EC.presence_of_element_located((By.CSS_SELECTOR, "#challenge-stage input"))
  75. ).click()
  76. except Exception as e:
  77. if debug.logging:
  78. print(f"Error bypassing Cloudflare: {e}")
  79. finally:
  80. driver.switch_to.default_content()
  81. WebDriverWait(driver, timeout).until(
  82. EC.presence_of_element_located((By.CSS_SELECTOR, "body:not(.no-js)"))
  83. )
  84. class WebDriverSession:
  85. """
  86. Manages a Selenium WebDriver session, including handling of virtual displays and proxies.
  87. """
  88. def __init__(
  89. self,
  90. webdriver: WebDriver = None,
  91. user_data_dir: str = None,
  92. headless: bool = False,
  93. virtual_display: bool = False,
  94. proxy: str = None,
  95. options: ChromeOptions = None
  96. ):
  97. """
  98. Initializes a new instance of the WebDriverSession.
  99. Args:
  100. webdriver (WebDriver, optional): A WebDriver instance for the session. Defaults to None.
  101. user_data_dir (str, optional): Directory for user data. Defaults to None.
  102. headless (bool, optional): Whether to run the browser in headless mode. Defaults to False.
  103. virtual_display (bool, optional): Whether to use a virtual display. Defaults to False.
  104. proxy (str, optional): Proxy settings for the browser. Defaults to None.
  105. options (ChromeOptions, optional): ChromeOptions for the browser. Defaults to None.
  106. """
  107. self.webdriver = webdriver
  108. self.user_data_dir = user_data_dir
  109. self.headless = headless
  110. self.virtual_display = Display(size=(1920, 1080)) if has_pyvirtualdisplay and virtual_display else None
  111. self.proxy = proxy
  112. self.options = options
  113. self.default_driver = None
  114. def reopen(
  115. self,
  116. user_data_dir: str = None,
  117. headless: bool = False,
  118. virtual_display: bool = False
  119. ) -> WebDriver:
  120. """
  121. Reopens the WebDriver session with new settings.
  122. Args:
  123. user_data_dir (str, optional): Directory for user data. Defaults to current value.
  124. headless (bool, optional): Whether to run the browser in headless mode. Defaults to current value.
  125. virtual_display (bool, optional): Whether to use a virtual display. Defaults to current value.
  126. Returns:
  127. WebDriver: The reopened WebDriver instance.
  128. """
  129. user_data_dir = user_data_data_dir or self.user_data_dir
  130. if self.default_driver:
  131. self.default_driver.quit()
  132. if not virtual_display and self.virtual_display:
  133. self.virtual_display.stop()
  134. self.virtual_display = None
  135. self.default_driver = get_browser(user_data_dir, headless, self.proxy)
  136. return self.default_driver
  137. def __enter__(self) -> WebDriver:
  138. """
  139. Context management method for entering a session. Initializes and returns a WebDriver instance.
  140. Returns:
  141. WebDriver: An instance of WebDriver for this session.
  142. """
  143. if self.webdriver:
  144. return self.webdriver
  145. if self.virtual_display:
  146. self.virtual_display.start()
  147. self.default_driver = get_browser(self.user_data_dir, self.headless, self.proxy, self.options)
  148. return self.default_driver
  149. def __exit__(self, exc_type, exc_val, exc_tb):
  150. """
  151. Context management method for exiting a session. Closes and quits the WebDriver.
  152. Args:
  153. exc_type: Exception type.
  154. exc_val: Exception value.
  155. exc_tb: Exception traceback.
  156. Note:
  157. Closes the WebDriver and stops the virtual display if used.
  158. """
  159. if self.default_driver:
  160. try:
  161. self.default_driver.close()
  162. except Exception as e:
  163. if debug.logging:
  164. print(f"Error closing WebDriver: {e}")
  165. self.default_driver.quit()
  166. if self.virtual_display:
  167. self.virtual_display.stop()