py_bdys.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. #coding=utf-8
  2. #!/usr/bin/python
  3. import sys
  4. sys.path.append('..')
  5. from base.spider import Spider
  6. import json
  7. import time
  8. from urllib.parse import quote_plus
  9. import requests
  10. from bs4 import BeautifulSoup
  11. import ddddocr
  12. import urllib3
  13. import re
  14. import hashlib
  15. from Crypto.Cipher import AES
  16. from binascii import b2a_hex
  17. from cryptography.hazmat.primitives import padding
  18. from cryptography.hazmat.primitives.ciphers import algorithms
  19. import zlib
  20. import base64
  21. urllib3.util.timeout.Timeout._validate_timeout = lambda *args: 5 if args[2] != 'total' else None
  22. Tag = "bdys01"
  23. Tag_name = "哔滴影视"
  24. siteUrl = "https://www.bdys01.com"
  25. def getHeaders(url):
  26. headers = {}
  27. if url:
  28. headers.setdefault("Referer", url)
  29. headers.setdefault("Accept-Encoding", "gzip, deflate, br")
  30. headers.setdefault("DNT", "1")
  31. headers.setdefault("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0")
  32. headers.setdefault("Accept", "*/*")
  33. headers.setdefault("Accept-Language", "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2")
  34. return headers
  35. def cacu(code):
  36. if "=" in code:
  37. code = code[:code.find("=")]
  38. elif code[-1] == "2" or code[-1] == "7":
  39. code = code[:-1]
  40. if code[-1] == "4" or code[-1] == "-":
  41. code = code[:-1]
  42. code = code.replace("I", "1")
  43. code = code.replace("l", "1")
  44. if code.isdigit():
  45. if len(code) > 4:
  46. code = code[:4]
  47. return int(code[:2]) - int(code[2:])
  48. elif "+" in code:
  49. code = code.split("+")
  50. return int(code[0]) + int(code[1])
  51. elif "-" in code:
  52. code = code.split("-")
  53. return int(code[0]) - int(code[1])
  54. elif "x" in code:
  55. code = code.split("x")
  56. return int(code[0]) * int(code[1])
  57. def verifyCode(key):
  58. retry = 5
  59. while retry:
  60. try:
  61. session = requests.session()
  62. ocr = ddddocr.DdddOcr()
  63. img = session.get(
  64. url=f"https://www.bdys01.com/search/verifyCode?t={str(int(round(time.time() * 1000)))}",
  65. headers=getHeaders(siteUrl)
  66. ).content
  67. # with open("verifyCode.jpg", 'wb') as f:
  68. # f.write(img)
  69. code = cacu(ocr.classification(img))
  70. url = f"{siteUrl}/search/{quote_plus(key)}?code={code}"
  71. res = session.get(
  72. url=url,
  73. headers=getHeaders(url.split("?")[0])
  74. ).text
  75. if "/search/verifyCode?t=" not in res:
  76. return res
  77. # time.sleep(1)
  78. except Exception as e:
  79. print(e)
  80. if e.__class__.__name__ == 'ConnectTimeout':
  81. break
  82. finally:
  83. retry = retry - 1
  84. def pkcs7_padding(data):
  85. padder = padding.PKCS7(algorithms.AES.block_size).padder()
  86. padded_data = padder.update(data) + padder.finalize()
  87. return padded_data
  88. def encrypt(text, key):
  89. cryptor = AES.new(key.encode('utf-8'), AES.MODE_ECB)
  90. ciphertext = cryptor.encrypt(pkcs7_padding(text.encode('utf-8')))
  91. return b2a_hex(ciphertext).decode().upper()
  92. def get_lines(path):
  93. try:
  94. lines = []
  95. pid = re.search("pid = (\d*)", requests.get(url=f'{siteUrl}{path}', headers=getHeaders(siteUrl)).text).group(1)
  96. t = str(int(round(time.time() * 1000)))
  97. key = hashlib.md5(f"{pid}-{t}".encode(encoding='UTF-8')).hexdigest()[0:16]
  98. sg = encrypt(f"{pid}-{t}", key)
  99. play_url = f"{siteUrl}/lines?t={t}&sg={sg}&pid={pid}"
  100. data = requests.get(url=play_url, headers=getHeaders(play_url)).json()["data"]
  101. if len(data) == 1:
  102. play_line = requests.post(
  103. url=f"{siteUrl}/god/{pid}",
  104. data={
  105. "t": t,
  106. "sg": sg,
  107. "verifyCode": 666
  108. },
  109. headers=getHeaders(siteUrl)
  110. ).json().get("url", "")
  111. if not play_line:
  112. play_line = requests.post(
  113. url=f"{siteUrl}/god/{pid}?type=1",
  114. data={
  115. "t": t,
  116. "sg": sg,
  117. "verifyCode": 888
  118. },
  119. headers=getHeaders(siteUrl)
  120. ).json().get("url", "")
  121. if "rkey" in play_line:
  122. realurl = play_line.replace("?rkey", str(int(round(time.time() * 1000))) + ".mp4?ver=6010&rkey")
  123. elif "ixigua" in play_line:
  124. realurl = play_line
  125. else:
  126. realurl = play_line.replace("http:", "https:") + "/" + str(int(round(time.time() * 1000))) + ".mp4"
  127. lines.append(realurl)
  128. else:
  129. for item in data:
  130. if item == "m3u8_2" or item == "m3u8":
  131. play_lines = data[item].split(",")
  132. for line in play_lines:
  133. if "mp4" in line:
  134. lines.append(line)
  135. else:
  136. lines.append(line.replace("www.bde4.cc", "www.bdys01.com"))
  137. elif item == "url3":
  138. if "mp4" in data[item]:
  139. lines.append(data[item])
  140. else:
  141. lines.append(data[item])
  142. return lines
  143. except Exception as e:
  144. print(e)
  145. return []
  146. def add_domain(matched):
  147. url = "https://vod.bdys.me/" + matched.group(0)
  148. return url
  149. def searchContent(key, token):
  150. try:
  151. res = verifyCode(key)
  152. searchResult = BeautifulSoup(res, "html.parser")
  153. videos = []
  154. lists = searchResult.select("div.row.row-0")
  155. for vod in lists:
  156. vod_name = vod.select_one("div.card-body.py-0.pe-1").a["title"]
  157. if key in vod_name:
  158. videos.append({
  159. "vod_id": f'{Tag}${vod.a["href"].split(".")[0]}',
  160. "vod_name": vod_name,
  161. "vod_pic": vod.img["src"],
  162. "vod_remarks": Tag_name + " " + vod.select_one("div.card-body.py-0.pe-1").a.get_text()
  163. })
  164. return videos
  165. except Exception as e:
  166. print(e)
  167. return []
  168. def detailContent(ids, token):
  169. try:
  170. id = ids.split("$")[-1]
  171. url = f"{siteUrl}/{id}.htm"
  172. doc = BeautifulSoup(requests.get(url=url, headers=getHeaders(siteUrl)).text, "html.parser").select_one(
  173. "div.container-xl.clear-padding-sm.my-3.py-1")
  174. # 取基本数据
  175. sourcediv = doc.select_one("div.card-body")
  176. module_info_items = sourcediv.select("p")
  177. director = ""
  178. actor = ""
  179. vod_remarks = ""
  180. type_name = ""
  181. vod_year = ""
  182. vod_area = ""
  183. for item in module_info_items:
  184. if item.strong:
  185. if "导演" in item.strong.get_text():
  186. director = ",".join(i.get_text() for i in item.select("a"))
  187. elif "主演" in item.strong.get_text():
  188. actor = ",".join(i.get_text() for i in item.select("a"))
  189. elif "摘要" in item.strong.get_text():
  190. vod_remarks = item.span.get_text()
  191. elif "类型" in item.strong.get_text():
  192. type_name = ",".join(i.get_text() for i in item.select("a"))
  193. elif "上映日期" in item.strong.get_text():
  194. vod_year = ",".join(i.get_text() for i in item.select("a"))
  195. elif "制片国家/地区" in item.strong.get_text():
  196. vod_area = item.get_text().replace("制片国家/地区", "").replace("[", "").replace("]", "")
  197. vodList = {
  198. "vod_id": f'{Tag}${id}',
  199. "vod_name": sourcediv.h2.get_text(),
  200. "vod_pic": sourcediv.img["src"],
  201. "type_name": type_name,
  202. "vod_year": vod_year,
  203. "vod_area": vod_area,
  204. "vod_remarks": vod_remarks,
  205. "vod_actor": actor,
  206. "vod_director": director,
  207. "vod_content": doc.select_one("div.card.collapse").select_one("div.card-body").get_text().strip(),
  208. }
  209. vod_play = {}
  210. # 取播放列表数据
  211. sources = doc.select("a.btn.btn-square")
  212. lines_count = 0
  213. for source in sources:
  214. lines_count = len(get_lines(source["href"]))
  215. if lines_count:
  216. break
  217. for i in range(lines_count):
  218. sourceName = f"线路{i + 1}"
  219. vodItems = []
  220. playList = ""
  221. for source in sources:
  222. vodItems.append(
  223. source.get_text() + "$" + f"{Tag}___" + source["href"].split(".")[0] + f"__{(i + 1) % lines_count}")
  224. if len(vodItems):
  225. playList = "#".join(vodItems)
  226. vod_play.setdefault(sourceName, playList)
  227. if len(vod_play):
  228. vod_play_from = "$$$".join(vod_play.keys())
  229. vod_play_url = "$$$".join(vod_play.values())
  230. vodList.setdefault("vod_play_from", vod_play_from)
  231. vodList.setdefault("vod_play_url", vod_play_url)
  232. return [vodList]
  233. except Exception as e:
  234. print(e)
  235. return []
  236. def playerContent(ids, flag, token):
  237. try:
  238. ids = ids.split("___")
  239. url = ids[-1].split("__")[0]
  240. play_from = int(ids[-1].split("__")[-1])
  241. lines = get_lines(f"{url}.htm")
  242. m3u8_url = lines[play_from]
  243. if m3u8_url.endswith("m3u8"):
  244. data = list(requests.get(url=m3u8_url, headers=getHeaders("")).content)[3354:]
  245. data = zlib.decompress(bytes(data), 16 + zlib.MAX_WBITS).decode()
  246. m3u8_raw_data = re.sub(r".*?\.ts", add_domain, data)
  247. m3u8_url = f"data:application/vnd.apple.mpegurl;base64,{base64.b64encode(m3u8_raw_data.encode('utf-8')).decode()}"
  248. return {
  249. "header": "",
  250. "parse": "0",
  251. "playUrl": "",
  252. "url": m3u8_url
  253. }
  254. except Exception as e:
  255. print(e)
  256. return {}
  257. if __name__ == '__main__':
  258. # res = searchContent("灰影人", "")
  259. # res = detailContent('bdys01$/dongzuo/22321', "")
  260. # func = "playerContent"
  261. res = playerContent("bdys01___/play/22321-0__0", "", "")
  262. # res = eval(func)("68614-1-1")
  263. # res = get_lines("/play/22321-0.htm")
  264. print(res)