webdriver.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. from __future__ import annotations
  2. try:
  3. from platformdirs import user_config_dir
  4. from selenium.webdriver.remote.webdriver import WebDriver
  5. from selenium.webdriver.remote.webelement import WebElement
  6. from undetected_chromedriver import Chrome, ChromeOptions
  7. from selenium.webdriver.common.by import By
  8. from selenium.webdriver.support.ui import WebDriverWait
  9. from selenium.webdriver.support import expected_conditions as EC
  10. from selenium.webdriver.common.keys import Keys
  11. from selenium.common.exceptions import NoSuchElementException
  12. has_requirements = True
  13. except ImportError:
  14. from typing import Type as WebDriver
  15. has_requirements = False
  16. import time
  17. from shutil import which
  18. from os import path
  19. from os import access, R_OK
  20. from .typing import Cookies
  21. from .errors import MissingRequirementsError
  22. from . import debug
  23. try:
  24. from pyvirtualdisplay import Display
  25. has_pyvirtualdisplay = True
  26. except ImportError:
  27. has_pyvirtualdisplay = False
  28. def get_browser(
  29. user_data_dir: str = None,
  30. headless: bool = False,
  31. proxy: str = None,
  32. options: ChromeOptions = None
  33. ) -> WebDriver:
  34. """
  35. Creates and returns a Chrome WebDriver with specified options.
  36. Args:
  37. user_data_dir (str, optional): Directory for user data. If None, uses default directory.
  38. headless (bool, optional): Whether to run the browser in headless mode. Defaults to False.
  39. proxy (str, optional): Proxy settings for the browser. Defaults to None.
  40. options (ChromeOptions, optional): ChromeOptions object with specific browser options. Defaults to None.
  41. Returns:
  42. WebDriver: An instance of WebDriver configured with the specified options.
  43. """
  44. if not has_requirements:
  45. raise MissingRequirementsError('Install "undetected_chromedriver" and "platformdirs" package')
  46. if user_data_dir is None:
  47. user_data_dir = user_config_dir("g4f")
  48. if user_data_dir and debug.logging:
  49. print("Open browser with config dir:", user_data_dir)
  50. if not options:
  51. options = ChromeOptions()
  52. if proxy:
  53. options.add_argument(f'--proxy-server={proxy}')
  54. # Check for system driver in docker
  55. driver = which('chromedriver') or '/usr/bin/chromedriver'
  56. if not path.isfile(driver) or not access(driver, R_OK):
  57. driver = None
  58. return Chrome(
  59. options=options,
  60. user_data_dir=user_data_dir,
  61. driver_executable_path=driver,
  62. headless=headless,
  63. patcher_force_close=True
  64. )
  65. def get_driver_cookies(driver: WebDriver) -> Cookies:
  66. """
  67. Retrieves cookies from the specified WebDriver.
  68. Args:
  69. driver (WebDriver): The WebDriver instance from which to retrieve cookies.
  70. Returns:
  71. dict: A dictionary containing cookies with their names as keys and values as cookie values.
  72. """
  73. return {cookie["name"]: cookie["value"] for cookie in driver.get_cookies()}
  74. def bypass_cloudflare(driver: WebDriver, url: str, timeout: int) -> None:
  75. """
  76. Attempts to bypass Cloudflare protection when accessing a URL using the provided WebDriver.
  77. Args:
  78. driver (WebDriver): The WebDriver to use for accessing the URL.
  79. url (str): The URL to access.
  80. timeout (int): Time in seconds to wait for the page to load.
  81. Raises:
  82. Exception: If there is an error while bypassing Cloudflare or loading the page.
  83. """
  84. driver.get(url)
  85. if driver.find_element(By.TAG_NAME, "body").get_attribute("class") == "no-js":
  86. if debug.logging:
  87. print("Cloudflare protection detected:", url)
  88. # Open website in a new tab
  89. element = driver.find_element(By.ID, "challenge-body-text")
  90. driver.execute_script(f"""
  91. arguments[0].addEventListener('click', () => {{
  92. window.open(arguments[1]);
  93. }});
  94. """, element, url)
  95. element.click()
  96. time.sleep(3)
  97. # Switch to the new tab and close the old tab
  98. original_window = driver.current_window_handle
  99. for window_handle in driver.window_handles:
  100. if window_handle != original_window:
  101. driver.close()
  102. driver.switch_to.window(window_handle)
  103. break
  104. # Click on the challenge button in the iframe
  105. try:
  106. driver.switch_to.frame(driver.find_element(By.CSS_SELECTOR, "#turnstile-wrapper iframe"))
  107. WebDriverWait(driver, 5).until(
  108. EC.presence_of_element_located((By.CSS_SELECTOR, "#challenge-stage input"))
  109. ).click()
  110. except NoSuchElementException:
  111. ...
  112. except Exception as e:
  113. if debug.logging:
  114. print(f"Error bypassing Cloudflare: {e}")
  115. finally:
  116. driver.switch_to.default_content()
  117. WebDriverWait(driver, timeout).until(
  118. EC.presence_of_element_located((By.CSS_SELECTOR, "body:not(.no-js)"))
  119. )
  120. class WebDriverSession:
  121. """
  122. Manages a Selenium WebDriver session, including handling of virtual displays and proxies.
  123. """
  124. def __init__(
  125. self,
  126. webdriver: WebDriver = None,
  127. user_data_dir: str = None,
  128. headless: bool = False,
  129. virtual_display: bool = False,
  130. proxy: str = None,
  131. options: ChromeOptions = None
  132. ):
  133. """
  134. Initializes a new instance of the WebDriverSession.
  135. Args:
  136. webdriver (WebDriver, optional): A WebDriver instance for the session. Defaults to None.
  137. user_data_dir (str, optional): Directory for user data. Defaults to None.
  138. headless (bool, optional): Whether to run the browser in headless mode. Defaults to False.
  139. virtual_display (bool, optional): Whether to use a virtual display. Defaults to False.
  140. proxy (str, optional): Proxy settings for the browser. Defaults to None.
  141. options (ChromeOptions, optional): ChromeOptions for the browser. Defaults to None.
  142. """
  143. self.webdriver = webdriver
  144. self.user_data_dir = user_data_dir
  145. self.headless = headless
  146. self.virtual_display = Display(size=(1920, 1080)) if has_pyvirtualdisplay and virtual_display else None
  147. self.proxy = proxy
  148. self.options = options
  149. self.default_driver = None
  150. def reopen(
  151. self,
  152. user_data_dir: str = None,
  153. headless: bool = False,
  154. virtual_display: bool = False
  155. ) -> WebDriver:
  156. """
  157. Reopens the WebDriver session with new settings.
  158. Args:
  159. user_data_dir (str, optional): Directory for user data. Defaults to current value.
  160. headless (bool, optional): Whether to run the browser in headless mode. Defaults to current value.
  161. virtual_display (bool, optional): Whether to use a virtual display. Defaults to current value.
  162. Returns:
  163. WebDriver: The reopened WebDriver instance.
  164. """
  165. user_data_dir = user_data_dir or self.user_data_dir
  166. if self.default_driver:
  167. self.default_driver.quit()
  168. if not virtual_display and self.virtual_display:
  169. self.virtual_display.stop()
  170. self.virtual_display = None
  171. self.default_driver = get_browser(user_data_dir, headless, self.proxy)
  172. return self.default_driver
  173. def __enter__(self) -> WebDriver:
  174. """
  175. Context management method for entering a session. Initializes and returns a WebDriver instance.
  176. Returns:
  177. WebDriver: An instance of WebDriver for this session.
  178. """
  179. if self.webdriver:
  180. return self.webdriver
  181. if self.virtual_display:
  182. self.virtual_display.start()
  183. self.default_driver = get_browser(self.user_data_dir, self.headless, self.proxy, self.options)
  184. return self.default_driver
  185. def __exit__(self, exc_type, exc_val, exc_tb):
  186. """
  187. Context management method for exiting a session. Closes and quits the WebDriver.
  188. Args:
  189. exc_type: Exception type.
  190. exc_val: Exception value.
  191. exc_tb: Exception traceback.
  192. Note:
  193. Closes the WebDriver and stops the virtual display if used.
  194. """
  195. if self.default_driver:
  196. try:
  197. self.default_driver.close()
  198. except Exception as e:
  199. if debug.logging:
  200. print(f"Error closing WebDriver: {e}")
  201. finally:
  202. self.default_driver.quit()
  203. if self.virtual_display:
  204. self.virtual_display.stop()
  205. def element_send_text(element: WebElement, text: str) -> None:
  206. script = "arguments[0].innerText = arguments[1]"
  207. element.parent.execute_script(script, element, text)
  208. element.send_keys(Keys.ENTER)