request.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. import pickle
  2. import urllib.parse as urlparse
  3. from collections import defaultdict
  4. from concurrent.futures import ThreadPoolExecutor, as_completed
  5. from time import time
  6. from urllib.parse import parse_qs
  7. from tqdm.asyncio import tqdm_asyncio
  8. import utils.constants as constants
  9. from updates.fofa import get_channels_by_fofa
  10. from updates.proxy import get_proxy, get_proxy_next
  11. from utils.channel import (
  12. get_results_from_multicast_soup,
  13. get_results_from_multicast_soup_requests,
  14. get_channel_multicast_name_region_type_result,
  15. get_channel_multicast_region_type_list,
  16. get_channel_multicast_result,
  17. get_multicast_fofa_search_urls,
  18. format_channel_name
  19. )
  20. from utils.config import config
  21. from utils.driver.setup import setup_driver
  22. from utils.driver.tools import search_submit
  23. from utils.requests.tools import get_soup_requests, close_session
  24. from utils.retry import (
  25. retry_func,
  26. find_clickable_element_with_retry,
  27. )
  28. from utils.tools import get_pbar_remaining, get_soup, merge_objects, resource_path
  29. from .update_tmp import get_multicast_region_result_by_rtp_txt
  30. if config.open_driver:
  31. try:
  32. from selenium.webdriver.common.by import By
  33. except:
  34. pass
  35. async def get_channels_by_multicast(names, callback=None):
  36. """
  37. Get the channels by multicast
  38. """
  39. channels = {}
  40. format_names = [format_channel_name(name) for name in names]
  41. if config.open_use_cache:
  42. try:
  43. with open(
  44. resource_path("updates/multicast/cache.pkl"),
  45. "rb",
  46. ) as file:
  47. cache = pickle.load(file) or {}
  48. for name in format_names:
  49. channels[name] = cache.get(name, [])
  50. except:
  51. pass
  52. if config.open_request:
  53. pageUrl = constants.foodie_hotel_url
  54. proxy = None
  55. open_proxy = config.open_proxy
  56. open_driver = config.open_driver
  57. page_num = config.multicast_page_num
  58. if open_proxy:
  59. proxy = await get_proxy(pageUrl, best=True, with_test=True)
  60. multicast_region_result = get_multicast_region_result_by_rtp_txt(callback=callback)
  61. name_region_type_result = get_channel_multicast_name_region_type_result(
  62. multicast_region_result, format_names
  63. )
  64. region_type_list = get_channel_multicast_region_type_list(name_region_type_result)
  65. search_region_type_result = defaultdict(lambda: defaultdict(list))
  66. if config.open_multicast_fofa:
  67. fofa_search_urls = get_multicast_fofa_search_urls()
  68. fofa_result = await get_channels_by_fofa(
  69. fofa_search_urls, multicast=True, callback=callback
  70. )
  71. search_region_type_result = merge_objects(search_region_type_result, fofa_result)
  72. def process_channel_by_multicast(region, type):
  73. nonlocal proxy
  74. name = f"{region}{type}"
  75. info_list = []
  76. driver = None
  77. try:
  78. if open_driver:
  79. driver = setup_driver(proxy)
  80. try:
  81. retry_func(
  82. lambda: driver.get(pageUrl), name=f"multicast search:{name}"
  83. )
  84. except Exception as e:
  85. if open_proxy:
  86. proxy = get_proxy_next()
  87. driver.close()
  88. driver.quit()
  89. driver = setup_driver(proxy)
  90. driver.get(pageUrl)
  91. search_submit(driver, name)
  92. else:
  93. page_soup = None
  94. post_form = {"saerch": name}
  95. code = None
  96. try:
  97. page_soup = retry_func(
  98. lambda: get_soup_requests(pageUrl, data=post_form, proxy=proxy),
  99. name=f"multicast search:{name}",
  100. )
  101. except Exception as e:
  102. if open_proxy:
  103. proxy = get_proxy_next()
  104. page_soup = get_soup_requests(pageUrl, data=post_form, proxy=proxy)
  105. if not page_soup:
  106. print(f"{name}:Request fail.")
  107. return {"region": region, "type": type, "data": info_list}
  108. else:
  109. a_tags = page_soup.find_all("a", href=True)
  110. for a_tag in a_tags:
  111. href_value = a_tag["href"]
  112. parsed_url = urlparse.urlparse(href_value)
  113. code = parse_qs(parsed_url.query).get("code", [None])[0]
  114. if code:
  115. break
  116. for page in range(1, page_num + 1):
  117. try:
  118. if page > 1:
  119. if open_driver:
  120. page_link = find_clickable_element_with_retry(
  121. driver,
  122. (
  123. By.XPATH,
  124. f'//a[contains(@href, "={page}") and contains(@href, "{name}")]',
  125. ),
  126. )
  127. if not page_link:
  128. break
  129. driver.execute_script("arguments[0].click();", page_link)
  130. else:
  131. request_url = (
  132. f"{pageUrl}?net={name}&page={page}&code={code}"
  133. )
  134. page_soup = retry_func(
  135. lambda: get_soup_requests(request_url, proxy=proxy),
  136. name=f"multicast search:{name}, page:{page}",
  137. )
  138. soup = get_soup(driver.page_source) if open_driver else page_soup
  139. if soup:
  140. if "About 0 results" in soup.text:
  141. break
  142. results = (
  143. get_results_from_multicast_soup(soup)
  144. if open_driver
  145. else get_results_from_multicast_soup_requests(soup)
  146. )
  147. print(name, "page:", page, "results num:", len(results))
  148. if len(results) == 0:
  149. print(f"{name}:No results found")
  150. info_list = info_list + results
  151. else:
  152. print(f"{name}:No page soup found")
  153. if page != page_num and open_driver:
  154. driver.refresh()
  155. except Exception as e:
  156. print(f"{name}:Error on page {page}: {e}")
  157. continue
  158. except Exception as e:
  159. print(f"{name}:Error on search: {e}")
  160. pass
  161. finally:
  162. if driver:
  163. driver.close()
  164. driver.quit()
  165. pbar.update()
  166. if callback:
  167. callback(
  168. f"正在进行Foodie组播更新, 剩余{region_type_list_len - pbar.n}个地区待查询, 预计剩余时间: {get_pbar_remaining(n=pbar.n, total=pbar.total, start_time=start_time)}",
  169. int((pbar.n / region_type_list_len) * 100),
  170. )
  171. return {"region": region, "type": type, "data": info_list}
  172. if config.open_multicast_foodie:
  173. region_type_list_len = len(region_type_list)
  174. pbar = tqdm_asyncio(total=region_type_list_len, desc="Multicast search")
  175. if callback:
  176. callback(
  177. f"正在进行Foodie组播更新, {len(names)}个频道, 共{region_type_list_len}个地区",
  178. 0,
  179. )
  180. start_time = time()
  181. with ThreadPoolExecutor(max_workers=3) as executor:
  182. futures = {
  183. executor.submit(process_channel_by_multicast, region, type): (
  184. region,
  185. type,
  186. )
  187. for region, type in region_type_list
  188. }
  189. for future in as_completed(futures):
  190. region, type = futures[future]
  191. result = future.result()
  192. data = result.get("data")
  193. if data:
  194. for item in data:
  195. url = item.get("url")
  196. date = item.get("date")
  197. if url:
  198. search_region_type_result[region][type].append(
  199. (url, date, None)
  200. )
  201. pbar.close()
  202. request_channels = get_channel_multicast_result(
  203. name_region_type_result, search_region_type_result
  204. )
  205. channels = merge_objects(channels, request_channels)
  206. if not open_driver:
  207. close_session()
  208. return channels