itv.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. import time
  2. import os
  3. import concurrent.futures
  4. from selenium import webdriver
  5. from selenium.webdriver.chrome.options import Options
  6. import requests
  7. import re
  8. urls = [
  9. "https://fofa.info/result?qbase64=ImlwdHYvbGl2ZS96aF9jbi5qcyIgJiYgY291bnRyeT0iQ04iICYmIHJlZ2lvbj0iSGViZWki", # Hebei (河北)
  10. "https://fofa.info/result?qbase64=ImlwdHYvbGl2ZS96aF9jbi5qcyIgJiYgY291bnRyeT0iQ04iICYmIHJlZ2lvbj0iYmVpamluZyI%3D", # Beijing (北京)
  11. "https://fofa.info/result?qbase64=ImlwdHYvbGl2ZS96aF9jbi5qcyIgJiYgY291bnRyeT0iQ04iICYmIHJlZ2lvbj0iZ3Vhbmdkb25nIg%3D%3D", # Guangdong (广东)
  12. "https://fofa.info/result?qbase64=ImlwdHYvbGl2ZS96aF9jbi5qcyIgJiYgY291bnRyeT0iQ04iICYmIHJlZ2lvbj0ic2hhbmdoYWki", # Shanghai (上海)
  13. "https://fofa.info/result?qbase64=ImlwdHYvbGl2ZS96aF9jbi5qcyIgJiYgY291bnRyeT0iQ04iICYmIHJlZ2lvbj0idGlhbmppbiI%3D", # Tianjin (天津)
  14. "https://fofa.info/result?qbase64=ImlwdHYvbGl2ZS96aF9jbi5qcyIgJiYgY291bnRyeT0iQ04iICYmIHJlZ2lvbj0i5rKz5Y2XIg%3D%3D", # 河南
  15. "https://fofa.info/result?qbase64=ImlwdHYvbGl2ZS96aF9jbi5qcyIgJiYgY291bnRyeT0iQ04iICYmIHJlZ2lvbj0iRnVqaWFuIg%3D%3D", # 福建
  16. "https://fofa.info/result?qbase64=ImlwdHYvbGl2ZS96aF9jbi5qcyIgJiYgY291bnRyeT0iQ04iICYmIGNpdHk9Inh1Y2hhbmci", # 河南xc
  17. "https://fofa.info/result?qbase64=ImlwdHYvbGl2ZS96aF9jbi5qcyIgJiYgY291bnRyeT0iQ04iICYmIGNpdHk9InpoZW5nemhvdSI%3D", # 河南zz
  18. "https://fofa.info/result?qbase64=ImlwdHYvbGl2ZS96aF9jbi5qcyIgJiYgY291bnRyeT0iQ04iICYmIGNpdHk9ImthaWZlbmci", # 河南kf
  19. ]
  20. def modify_urls(url):
  21. modified_urls = []
  22. ip_start_index = url.find("//") + 2
  23. ip_end_index = url.find(":", ip_start_index)
  24. base_url = url[:ip_start_index] # http:// or https://
  25. ip_address = url[ip_start_index:ip_end_index]
  26. port = url[ip_end_index:]
  27. ip_end = "/iptv/live/1000.json?key=txiptv"
  28. for i in range(1, 256):
  29. modified_ip = f"{ip_address[:-1]}{i}"
  30. modified_url = f"{base_url}{modified_ip}{port}{ip_end}"
  31. modified_urls.append(modified_url)
  32. return modified_urls
  33. def is_url_accessible(url):
  34. try:
  35. response = requests.get(url, timeout=0.5)
  36. if response.status_code == 200:
  37. return url
  38. except requests.exceptions.RequestException:
  39. pass
  40. return None
  41. results = []
  42. for url in urls:
  43. try:
  44. # 创建一个Chrome WebDriver实例
  45. chrome_options = Options()
  46. chrome_options.add_argument('--headless')
  47. chrome_options.add_argument('--no-sandbox')
  48. chrome_options.add_argument('--disable-dev-shm-usage')
  49. driver = webdriver.Chrome(options=chrome_options)
  50. # 使用WebDriver访问网页
  51. driver.get(url) # 将网址替换为你要访问的网页地址
  52. time.sleep(10)
  53. # 获取网页内容
  54. page_content = driver.page_source
  55. # 关闭WebDriver
  56. driver.quit()
  57. # 查找所有符合指定格式的网址
  58. pattern = r"http://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+" # 设置匹配的格式,如http://8.8.8.8:8888
  59. urls_all = re.findall(pattern, page_content)
  60. # urls = list(set(urls_all)) # 去重得到唯一的URL列表
  61. urls = set(urls_all) # 去重得到唯一的URL列表
  62. x_urls = []
  63. for url in urls: # 对urls进行处理,ip第四位修改为1,并去重
  64. url = url.strip()
  65. ip_start_index = url.find("//") + 2
  66. ip_end_index = url.find(":", ip_start_index)
  67. ip_dot_start = url.find(".") + 1
  68. ip_dot_second = url.find(".", ip_dot_start) + 1
  69. ip_dot_three = url.find(".", ip_dot_second) + 1
  70. base_url = url[:ip_start_index] # http:// or https://
  71. ip_address = url[ip_start_index:ip_dot_three]
  72. port = url[ip_end_index:]
  73. ip_end = "1"
  74. modified_ip = f"{ip_address}{ip_end}"
  75. x_url = f"{base_url}{modified_ip}{port}"
  76. x_urls.append(x_url)
  77. urls = set(x_urls) # 去重得到唯一的URL列表
  78. valid_urls = []
  79. # 多线程获取可用url
  80. with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor:
  81. futures = []
  82. for url in urls:
  83. url = url.strip()
  84. modified_urls = modify_urls(url)
  85. for modified_url in modified_urls:
  86. futures.append(executor.submit(is_url_accessible, modified_url))
  87. for future in concurrent.futures.as_completed(futures):
  88. result = future.result()
  89. if result:
  90. valid_urls.append(result)
  91. for url in valid_urls:
  92. print(url)
  93. # 遍历网址列表,获取JSON文件并解析
  94. for url in valid_urls:
  95. try:
  96. # 发送GET请求获取JSON文件,设置超时时间为0.5秒
  97. ip_start_index = url.find("//") + 2
  98. ip_dot_start = url.find(".") + 1
  99. ip_index_second = url.find("/", ip_dot_start)
  100. base_url = url[:ip_start_index] # http:// or https://
  101. ip_address = url[ip_start_index:ip_index_second]
  102. url_x = f"{base_url}{ip_address}"
  103. json_url = f"{url}"
  104. response = requests.get(json_url, timeout=0.5)
  105. json_data = response.json()
  106. try:
  107. # 解析JSON文件,获取name和url字段
  108. for item in json_data['data']:
  109. if isinstance(item, dict):
  110. name = item.get('name')
  111. urlx = item.get('url')
  112. if ',' in urlx:
  113. urlx=f"aaaaaaaa"
  114. #if 'http' in urlx or 'udp' in urlx or 'rtp' in urlx:
  115. if 'http' in urlx:
  116. urld = f"{urlx}"
  117. else:
  118. urld = f"{url_x}{urlx}"
  119. if name and urld:
  120. # 删除特定文字
  121. name = name.replace("cctv", "CCTV")
  122. name = name.replace("中央", "CCTV")
  123. name = name.replace("央视", "CCTV")
  124. name = name.replace("高清", "")
  125. name = name.replace("超高", "")
  126. name = name.replace("HD", "")
  127. name = name.replace("标清", "")
  128. name = name.replace("频道", "")
  129. name = name.replace("-", "")
  130. name = name.replace(" ", "")
  131. name = name.replace("PLUS", "+")
  132. name = name.replace("+", "+")
  133. name = name.replace("(", "")
  134. name = name.replace(")", "")
  135. name = re.sub(r"CCTV(\d+)台", r"CCTV\1", name)
  136. name = name.replace("CCTV1综合", "CCTV1")
  137. name = name.replace("CCTV2财经", "CCTV2")
  138. name = name.replace("CCTV3综艺", "CCTV3")
  139. name = name.replace("CCTV4国际", "CCTV4")
  140. name = name.replace("CCTV4中文国际", "CCTV4")
  141. name = name.replace("CCTV4欧洲", "CCTV4")
  142. name = name.replace("CCTV5体育", "CCTV5")
  143. name = name.replace("CCTV6电影", "CCTV6")
  144. name = name.replace("CCTV7军事", "CCTV7")
  145. name = name.replace("CCTV7军农", "CCTV7")
  146. name = name.replace("CCTV7农业", "CCTV7")
  147. name = name.replace("CCTV7国防军事", "CCTV7")
  148. name = name.replace("CCTV8电视剧", "CCTV8")
  149. name = name.replace("CCTV8记录", "CCTV9")
  150. name = name.replace("CCTV8纪录", "CCTV9")
  151. name = name.replace("CCTV9纪录", "CCTV9")
  152. name = name.replace("CCTV纪录", "CCTV9")
  153. name = name.replace("CCTV记录", "CCTV9")
  154. name = name.replace("CCTV10科教", "CCTV10")
  155. name = name.replace("CCTV11戏曲", "CCTV11")
  156. name = name.replace("CCTV12社会与法", "CCTV12")
  157. name = name.replace("CCTV13新闻", "CCTV13")
  158. name = name.replace("CCTV新闻", "CCTV13")
  159. name = name.replace("CCTV14少儿", "CCTV14")
  160. name = name.replace("CCTV14超", "CCTV14")
  161. name = name.replace("CCTV少儿", "CCTV14")
  162. name = name.replace("CCTV15音乐", "CCTV15")
  163. name = name.replace("CCTV音乐", "CCTV15")
  164. name = name.replace("CCTV16奥林匹克", "CCTV16")
  165. name = name.replace("CCTV17农业农村", "CCTV17")
  166. name = name.replace("CCTV17农业", "CCTV17")
  167. name = name.replace("CCTV5+体育赛视", "CCTV5+")
  168. name = name.replace("CCTV5+体育赛事", "CCTV5+")
  169. name = name.replace("CCTV5+体育", "CCTV5+")
  170. name = name.replace("CCTV教育", "CETV1")
  171. name = name.replace("BTV北京卫视", "北京卫视")
  172. name = name.replace("北京卫视CMIPTV", "北京卫视")
  173. name = name.replace("山西卫视CMIPTV", "山西卫视")
  174. name = name.replace("广西卫视CMIPTV", "广西卫视")
  175. name = name.replace("上海卫视", "东方卫视")
  176. name = name.replace("内蒙卫视", "内蒙古卫视")
  177. name = name.replace("天津卫视台", "天津卫视")
  178. name = name.replace("CCTV英语新闻", "CGTN")
  179. name = name.replace("广东南方卫视", "大湾区卫视")
  180. if 'udp' not in urld or 'rtp' not in urld:
  181. results.append(f"{name},{urld}")
  182. except:
  183. continue
  184. except:
  185. continue
  186. except:
  187. continue
  188. results = set(results) # 去重得到唯一的URL列表
  189. results = sorted(results)
  190. with open("itv.txt", 'w', encoding='utf-8') as file:
  191. for result in results:
  192. file.write(result + "\n")
  193. print(result)