py_黑料.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271
  1. # coding=utf-8
  2. # !/usr/bin/python
  3. import sys
  4. import requests
  5. from bs4 import BeautifulSoup
  6. import re
  7. import base64
  8. from base.spider import Spider
  9. import random
  10. sys.path.append('..')
  11. xurl = "https://heiliaowang-44.buzz"
  12. headerx = {
  13. 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36',
  14. }
  15. class Spider(Spider):
  16. global xurl
  17. global headerx
  18. def getName(self):
  19. return "首页"
  20. def init(self, extend):
  21. pass
  22. def destroy(self):
  23. pass
  24. def isVideoFormat(self, url):
  25. pass
  26. def manualVideoCheck(self):
  27. pass
  28. def homeContent(self, filter):
  29. res = requests.get(xurl, headers=headerx)
  30. res.encoding = "utf-8"
  31. doc = BeautifulSoup(res.text, "html.parser")
  32. sourcediv = doc.find('div', class_='nav')
  33. vod = sourcediv.find_all('dd')
  34. string_list = ["首页", "激情图漫", "激情小说",
  35. "情色小说", "随机推荐", "顶级资源"]
  36. result = {}
  37. result['class'] = []
  38. result['class'].append({'type_id': "/type/328", 'type_name': "国产视频"})
  39. result['class'].append({'type_id': "/type/329", 'type_name': "中文字幕"})
  40. result['class'].append({'type_id': "/type/331", 'type_name': "日本有码"})
  41. result['class'].append({'type_id': "/type/332", 'type_name': "日本无码"})
  42. result['class'].append({'type_id': "/type/333", 'type_name': "欧美无码"})
  43. result['class'].append({'type_id': "/type/334", 'type_name': "强奸乱轮"})
  44. result['class'].append({'type_id': "/type/335", 'type_name': "制服诱惑"})
  45. result['class'].append({'type_id': "/type/336", 'type_name': "直播主播"})
  46. result['class'].append({'type_id': "/type/338", 'type_name': "明星换脸"})
  47. result['class'].append({'type_id': "/type/339", 'type_name': "抖阴视频"})
  48. result['class'].append({'type_id': "/type/340", 'type_name': "女优明星"})
  49. result['class'].append({'type_id': "/type/343", 'type_name': "网爆门"})
  50. result['class'].append({'type_id': "/type/345", 'type_name': "伦理三级"})
  51. result['class'].append({'type_id': "/type/346", 'type_name': "AV解说"})
  52. result['class'].append({'type_id': "/type/347", 'type_name': "SM调教"})
  53. result['class'].append({'type_id': "/type/348", 'type_name': "萝莉少女"})
  54. result['class'].append({'type_id': "/type/349", 'type_name': "极品媚黑"})
  55. result['class'].append({'type_id': "/type/350", 'type_name': "女同性恋"})
  56. result['class'].append({'type_id': "/type/351", 'type_name': "玩偶姐姐"})
  57. result['class'].append({'type_id': "/type/353", 'type_name': "人妖系列"})
  58. result['class'].append({'type_id': "/type/373", 'type_name': "韩国主播"})
  59. result['class'].append({'type_id': "/type/378", 'type_name': "VR视角"})
  60. for item in vod:
  61. name = item.find('a').text
  62. if name in string_list:
  63. continue
  64. id = item.find('a')['href']
  65. id = id.replace(".html", "")
  66. result['class'].append({'type_id': id, 'type_name': name})
  67. return result
  68. def homeVideoContent(self):
  69. videos = []
  70. try:
  71. res = requests.get(xurl, headers=headerx)
  72. res.encoding = "utf-8"
  73. doc = BeautifulSoup(res.text, "html.parser")
  74. sourcediv = doc.find_all('div', class_='pic')
  75. for vod in sourcediv:
  76. ul_elements = vod.find_all('ul')
  77. for item in ul_elements:
  78. name = item.select_one("li a")['title']
  79. pic = item.select_one("li a img")["data-src"]
  80. remark = item.select_one("li a span").text
  81. id = item.select_one("li a")['href']
  82. video = {
  83. "vod_id": id,
  84. "vod_name": name,
  85. "vod_pic": pic,
  86. "vod_remarks": remark
  87. }
  88. videos.append(video)
  89. except:
  90. pass
  91. result = {'list': videos}
  92. return result
  93. def categoryContent(self, cid, pg, filter, ext):
  94. result = {}
  95. videos = []
  96. if not pg:
  97. pg = 1
  98. url = xurl +cid + "/" + str(pg) + ".html"
  99. detail = requests.get(url=url, headers=headerx)
  100. detail.encoding = "utf-8"
  101. doc = BeautifulSoup(detail.text, "html.parser")
  102. sourcediv = doc.find_all('div', class_='pic')
  103. for vod in sourcediv:
  104. ul_elements = vod.find_all('ul')
  105. for item in ul_elements:
  106. name = item.select_one("li a")['title']
  107. pic = item.select_one("li a img")["src"]
  108. remark = item.select_one("li a span").text
  109. id = item.select_one("li a")['href']
  110. video = {
  111. "vod_id": id,
  112. "vod_name": name,
  113. "vod_pic": pic,
  114. "vod_remarks": remark
  115. }
  116. videos.append(video)
  117. result['list'] = videos
  118. result['page'] = pg
  119. result['pagecount'] = 9999
  120. result['limit'] = 90
  121. result['total'] = 999999
  122. return result
  123. def detailContent(self, ids):
  124. did = ids[0]
  125. videos = []
  126. result = {}
  127. res = requests.get(url=xurl + did, headers=headerx)
  128. res.encoding = "utf-8"
  129. doc = BeautifulSoup(res.text, "html.parser")
  130. sourcediv = doc.find('div', style='padding-bottom: 10px;')
  131. vod = sourcediv.find_all('a')
  132. play_from = ""
  133. play_url = ""
  134. for item in vod:
  135. play_from = play_from + item.text + "$$$"
  136. play_url = play_url + item['href'] + "$$$"
  137. while play_url[-1] == "#" or play_url[-1] == "$":
  138. play_url = play_url[:-1]
  139. while play_from[-1] == "#" or play_from[-1] == "$":
  140. play_from = play_from[:-1]
  141. source_match = re.search(r"<li>播放地址:<strong>(.*?)</strong></li>", res.text)
  142. if source_match:
  143. tx = source_match.group(1)
  144. videos.append({
  145. "vod_id": did,
  146. "vod_name": tx,
  147. "vod_pic": "",
  148. "type_name": "ぃぅおか🍬 คิดถึง",
  149. "vod_year": "",
  150. "vod_area": "",
  151. "vod_remarks": "",
  152. "vod_actor": "",
  153. "vod_director": "",
  154. "vod_content": "",
  155. "vod_play_from": play_from,
  156. "vod_play_url": play_url
  157. })
  158. result['list'] = videos
  159. return result
  160. def playerContent(self, flag, id, vipFlags):
  161. result = {}
  162. res = requests.get(url=xurl + id, headers=headerx)
  163. res.encoding = "utf-8"
  164. if '"rid"' in res.text:
  165. decoded_str = ''
  166. while not decoded_str:
  167. source_match3 = re.search(r'"rid" : "(.*?)"', res.text)
  168. if source_match3:
  169. id = source_match3.group(1)
  170. data = "rid=" + id
  171. header = {
  172. "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36",
  173. 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
  174. }
  175. res2 = requests.post(url="https://heiliaowang-44.buzz/fetchPlayUrl3", headers=header, data=data)
  176. source_match4 = re.search(r'"returnData"\s*:\s*"([^"]+)"', res2.text)
  177. if source_match4:
  178. decoded_str = source_match4.group(1)
  179. else:
  180. source_match = re.search(r"http:(.*?)\.m3u8", res.text)
  181. decoded_str = ""
  182. if source_match:
  183. str3 = source_match.group(1)
  184. if "aHR0c" in str3:
  185. padding_needed = len(str3) % 4
  186. if padding_needed:
  187. str3 += '=' * (4 - padding_needed)
  188. decoded_str = base64.b64decode(str3).decode("utf-8")
  189. if not decoded_str:
  190. source_match2 = re.search(r"'(.*?)\.m3u8';", res.text)
  191. if source_match2:
  192. decoded_str = source_match2.group(1) + ".m3u8"
  193. result["parse"] = 0
  194. result["playUrl"] = ''
  195. result["url"] = decoded_str
  196. result["header"] = headerx
  197. return result
  198. def searchContent(self, key, quick):
  199. return self.searchContentPage(key, quick, '1')
  200. def searchContentPage(self, key, quick, page):
  201. result = {}
  202. videos = []
  203. if not page:
  204. page = 1
  205. url = xurl +"/search/"+ key +"/n/" + str(page)+".html"
  206. detail = requests.get(url=url, headers=headerx)
  207. detail.encoding = "utf-8"
  208. doc = BeautifulSoup(detail.text, "html.parser")
  209. sourcediv = doc.find_all('div', class_='pic')
  210. for vod in sourcediv:
  211. ul_elements = vod.find_all('ul')
  212. for item in ul_elements:
  213. name = item.select_one("li a")['title']
  214. pic = item.select_one("li a img")["src"]
  215. remark = item.select_one("li a span").text
  216. id = item.select_one("li a")['href']
  217. video = {
  218. "vod_id": id,
  219. "vod_name": name,
  220. "vod_pic": pic,
  221. "vod_remarks": remark
  222. }
  223. videos.append(video)
  224. result['list'] = videos
  225. result['page'] = page
  226. result['pagecount'] = 9999
  227. result['limit'] = 90
  228. result['total'] = 999999
  229. return result
  230. def localProxy(self, params):
  231. if params['type'] == "m3u8":
  232. return self.proxyM3u8(params)
  233. elif params['type'] == "media":
  234. return self.proxyMedia(params)
  235. elif params['type'] == "ts":
  236. return self.proxyTs(params)
  237. return None