create_images.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. from __future__ import annotations
  2. import asyncio
  3. import time
  4. import json
  5. from aiohttp import ClientSession, BaseConnector
  6. from urllib.parse import quote
  7. from typing import List, Dict
  8. try:
  9. from bs4 import BeautifulSoup
  10. has_requirements = True
  11. except ImportError:
  12. has_requirements = False
  13. from ..helper import get_connector
  14. from ...errors import MissingRequirementsError, RateLimitError
  15. from ...webdriver import WebDriver, get_driver_cookies, get_browser
  16. BING_URL = "https://www.bing.com"
  17. TIMEOUT_LOGIN = 1200
  18. TIMEOUT_IMAGE_CREATION = 300
  19. ERRORS = [
  20. "this prompt is being reviewed",
  21. "this prompt has been blocked",
  22. "we're working hard to offer image creator in more languages",
  23. "we can't create your images right now"
  24. ]
  25. BAD_IMAGES = [
  26. "https://r.bing.com/rp/in-2zU3AJUdkgFe7ZKv19yPBHVs.png",
  27. "https://r.bing.com/rp/TX9QuO3WzcCJz1uaaSwQAz39Kb0.jpg",
  28. ]
  29. def wait_for_login(driver: WebDriver, timeout: int = TIMEOUT_LOGIN) -> None:
  30. """
  31. Waits for the user to log in within a given timeout period.
  32. Args:
  33. driver (WebDriver): Webdriver for browser automation.
  34. timeout (int): Maximum waiting time in seconds.
  35. Raises:
  36. RuntimeError: If the login process exceeds the timeout.
  37. """
  38. driver.get(f"{BING_URL}/")
  39. start_time = time.time()
  40. while not driver.get_cookie("_U"):
  41. if time.time() - start_time > timeout:
  42. raise RuntimeError("Timeout error")
  43. time.sleep(0.5)
  44. def get_cookies_from_browser(proxy: str = None) -> dict[str, str]:
  45. """
  46. Retrieves cookies from the browser using webdriver.
  47. Args:
  48. proxy (str, optional): Proxy configuration.
  49. Returns:
  50. dict[str, str]: Retrieved cookies.
  51. """
  52. with get_browser(proxy=proxy) as driver:
  53. wait_for_login(driver)
  54. time.sleep(1)
  55. return get_driver_cookies(driver)
  56. def create_session(cookies: Dict[str, str], proxy: str = None, connector: BaseConnector = None) -> ClientSession:
  57. """
  58. Creates a new client session with specified cookies and headers.
  59. Args:
  60. cookies (Dict[str, str]): Cookies to be used for the session.
  61. Returns:
  62. ClientSession: The created client session.
  63. """
  64. headers = {
  65. "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
  66. "accept-encoding": "gzip, deflate, br",
  67. "accept-language": "en-US,en;q=0.9,zh-CN;q=0.8,zh-TW;q=0.7,zh;q=0.6",
  68. "content-type": "application/x-www-form-urlencoded",
  69. "referrer-policy": "origin-when-cross-origin",
  70. "referrer": "https://www.bing.com/images/create/",
  71. "origin": "https://www.bing.com",
  72. "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.54",
  73. "sec-ch-ua": "\"Microsoft Edge\";v=\"111\", \"Not(A:Brand\";v=\"8\", \"Chromium\";v=\"111\"",
  74. "sec-ch-ua-mobile": "?0",
  75. "sec-fetch-dest": "document",
  76. "sec-fetch-mode": "navigate",
  77. "sec-fetch-site": "same-origin",
  78. "sec-fetch-user": "?1",
  79. "upgrade-insecure-requests": "1",
  80. }
  81. if cookies:
  82. headers["Cookie"] = "; ".join(f"{k}={v}" for k, v in cookies.items())
  83. return ClientSession(headers=headers, connector=get_connector(connector, proxy))
  84. async def create_images(session: ClientSession, prompt: str, timeout: int = TIMEOUT_IMAGE_CREATION) -> List[str]:
  85. """
  86. Creates images based on a given prompt using Bing's service.
  87. Args:
  88. session (ClientSession): Active client session.
  89. prompt (str): Prompt to generate images.
  90. proxy (str, optional): Proxy configuration.
  91. timeout (int): Timeout for the request.
  92. Returns:
  93. List[str]: A list of URLs to the created images.
  94. Raises:
  95. RuntimeError: If image creation fails or times out.
  96. """
  97. if not has_requirements:
  98. raise MissingRequirementsError('Install "beautifulsoup4" package')
  99. url_encoded_prompt = quote(prompt)
  100. payload = f"q={url_encoded_prompt}&rt=4&FORM=GENCRE"
  101. url = f"{BING_URL}/images/create?q={url_encoded_prompt}&rt=4&FORM=GENCRE"
  102. async with session.post(url, allow_redirects=False, data=payload, timeout=timeout) as response:
  103. response.raise_for_status()
  104. text = (await response.text()).lower()
  105. if "0 coins available" in text:
  106. raise RateLimitError("No coins left. Log in with a different account or wait a while")
  107. for error in ERRORS:
  108. if error in text:
  109. raise RuntimeError(f"Create images failed: {error}")
  110. if response.status != 302:
  111. url = f"{BING_URL}/images/create?q={url_encoded_prompt}&rt=3&FORM=GENCRE"
  112. async with session.post(url, allow_redirects=False, timeout=timeout) as response:
  113. if response.status != 302:
  114. raise RuntimeError(f"Create images failed. Code: {response.status}")
  115. redirect_url = response.headers["Location"].replace("&nfy=1", "")
  116. redirect_url = f"{BING_URL}{redirect_url}"
  117. request_id = redirect_url.split("id=")[1]
  118. async with session.get(redirect_url) as response:
  119. response.raise_for_status()
  120. polling_url = f"{BING_URL}/images/create/async/results/{request_id}?q={url_encoded_prompt}"
  121. start_time = time.time()
  122. while True:
  123. if time.time() - start_time > timeout:
  124. raise RuntimeError(f"Timeout error after {timeout} sec")
  125. async with session.get(polling_url) as response:
  126. if response.status != 200:
  127. raise RuntimeError(f"Polling images faild. Code: {response.status}")
  128. text = await response.text()
  129. if not text or "GenerativeImagesStatusPage" in text:
  130. await asyncio.sleep(1)
  131. else:
  132. break
  133. error = None
  134. try:
  135. error = json.loads(text).get("errorMessage")
  136. except:
  137. pass
  138. if error == "Pending":
  139. raise RuntimeError("Prompt is been blocked")
  140. elif error:
  141. raise RuntimeError(error)
  142. return read_images(text)
  143. def read_images(html_content: str) -> List[str]:
  144. """
  145. Extracts image URLs from the HTML content.
  146. Args:
  147. html_content (str): HTML content containing image URLs.
  148. Returns:
  149. List[str]: A list of image URLs.
  150. """
  151. soup = BeautifulSoup(html_content, "html.parser")
  152. tags = soup.find_all("img", class_="mimg")
  153. if not tags:
  154. tags = soup.find_all("img", class_="gir_mmimg")
  155. images = [img["src"].split("?w=")[0] for img in tags]
  156. if any(im in BAD_IMAGES for im in images):
  157. raise RuntimeError("Bad images found")
  158. if not images:
  159. raise RuntimeError("No images found")
  160. return images