123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291 |
- #coding=utf-8
- #!/usr/bin/python
- import sys
- sys.path.append('..')
- from base.spider import Spider
- import json
- import time
- from urllib.parse import quote_plus
- import requests
- from bs4 import BeautifulSoup
- import ddddocr
- import urllib3
- import re
- import hashlib
- from Crypto.Cipher import AES
- from binascii import b2a_hex
- from cryptography.hazmat.primitives import padding
- from cryptography.hazmat.primitives.ciphers import algorithms
- import zlib
- import base64
- urllib3.util.timeout.Timeout._validate_timeout = lambda *args: 5 if args[2] != 'total' else None
- Tag = "bdys01"
- Tag_name = "哔滴影视"
- siteUrl = "https://www.bdys01.com"
- def getHeaders(url):
- headers = {}
- if url:
- headers.setdefault("Referer", url)
- headers.setdefault("Accept-Encoding", "gzip, deflate, br")
- headers.setdefault("DNT", "1")
- headers.setdefault("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0")
- headers.setdefault("Accept", "*/*")
- headers.setdefault("Accept-Language", "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2")
- return headers
- def cacu(code):
- if "=" in code:
- code = code[:code.find("=")]
- elif code[-1] == "2" or code[-1] == "7":
- code = code[:-1]
- if code[-1] == "4" or code[-1] == "-":
- code = code[:-1]
- code = code.replace("I", "1")
- code = code.replace("l", "1")
- if code.isdigit():
- if len(code) > 4:
- code = code[:4]
- return int(code[:2]) - int(code[2:])
- elif "+" in code:
- code = code.split("+")
- return int(code[0]) + int(code[1])
- elif "-" in code:
- code = code.split("-")
- return int(code[0]) - int(code[1])
- elif "x" in code:
- code = code.split("x")
- return int(code[0]) * int(code[1])
- def verifyCode(key):
- retry = 5
- while retry:
- try:
- session = requests.session()
- ocr = ddddocr.DdddOcr()
- img = session.get(
- url=f"https://www.bdys01.com/search/verifyCode?t={str(int(round(time.time() * 1000)))}",
- headers=getHeaders(siteUrl)
- ).content
- # with open("verifyCode.jpg", 'wb') as f:
- # f.write(img)
- code = cacu(ocr.classification(img))
- url = f"{siteUrl}/search/{quote_plus(key)}?code={code}"
- res = session.get(
- url=url,
- headers=getHeaders(url.split("?")[0])
- ).text
- if "/search/verifyCode?t=" not in res:
- return res
- # time.sleep(1)
- except Exception as e:
- print(e)
- if e.__class__.__name__ == 'ConnectTimeout':
- break
- finally:
- retry = retry - 1
- def pkcs7_padding(data):
- padder = padding.PKCS7(algorithms.AES.block_size).padder()
- padded_data = padder.update(data) + padder.finalize()
- return padded_data
- def encrypt(text, key):
- cryptor = AES.new(key.encode('utf-8'), AES.MODE_ECB)
- ciphertext = cryptor.encrypt(pkcs7_padding(text.encode('utf-8')))
- return b2a_hex(ciphertext).decode().upper()
- def get_lines(path):
- try:
- lines = []
- pid = re.search("pid = (\d*)", requests.get(url=f'{siteUrl}{path}', headers=getHeaders(siteUrl)).text).group(1)
- t = str(int(round(time.time() * 1000)))
- key = hashlib.md5(f"{pid}-{t}".encode(encoding='UTF-8')).hexdigest()[0:16]
- sg = encrypt(f"{pid}-{t}", key)
- play_url = f"{siteUrl}/lines?t={t}&sg={sg}&pid={pid}"
- data = requests.get(url=play_url, headers=getHeaders(play_url)).json()["data"]
- if len(data) == 1:
- play_line = requests.post(
- url=f"{siteUrl}/god/{pid}",
- data={
- "t": t,
- "sg": sg,
- "verifyCode": 666
- },
- headers=getHeaders(siteUrl)
- ).json().get("url", "")
- if not play_line:
- play_line = requests.post(
- url=f"{siteUrl}/god/{pid}?type=1",
- data={
- "t": t,
- "sg": sg,
- "verifyCode": 888
- },
- headers=getHeaders(siteUrl)
- ).json().get("url", "")
- if "rkey" in play_line:
- realurl = play_line.replace("?rkey", str(int(round(time.time() * 1000))) + ".mp4?ver=6010&rkey")
- elif "ixigua" in play_line:
- realurl = play_line
- else:
- realurl = play_line.replace("http:", "https:") + "/" + str(int(round(time.time() * 1000))) + ".mp4"
- lines.append(realurl)
- else:
- for item in data:
- if item == "m3u8_2" or item == "m3u8":
- play_lines = data[item].split(",")
- for line in play_lines:
- if "mp4" in line:
- lines.append(line)
- else:
- lines.append(line.replace("www.bde4.cc", "www.bdys01.com"))
- elif item == "url3":
- if "mp4" in data[item]:
- lines.append(data[item])
- else:
- lines.append(data[item])
- return lines
- except Exception as e:
- print(e)
- return []
- def add_domain(matched):
- url = "https://vod.bdys.me/" + matched.group(0)
- return url
- def searchContent(key, token):
- try:
- res = verifyCode(key)
- searchResult = BeautifulSoup(res, "html.parser")
- videos = []
- lists = searchResult.select("div.row.row-0")
- for vod in lists:
- vod_name = vod.select_one("div.card-body.py-0.pe-1").a["title"]
- if key in vod_name:
- videos.append({
- "vod_id": f'{Tag}${vod.a["href"].split(".")[0]}',
- "vod_name": vod_name,
- "vod_pic": vod.img["src"],
- "vod_remarks": Tag_name + " " + vod.select_one("div.card-body.py-0.pe-1").a.get_text()
- })
- return videos
- except Exception as e:
- print(e)
- return []
- def detailContent(ids, token):
- try:
- id = ids.split("$")[-1]
- url = f"{siteUrl}/{id}.htm"
- doc = BeautifulSoup(requests.get(url=url, headers=getHeaders(siteUrl)).text, "html.parser").select_one(
- "div.container-xl.clear-padding-sm.my-3.py-1")
- # 取基本数据
- sourcediv = doc.select_one("div.card-body")
- module_info_items = sourcediv.select("p")
- director = ""
- actor = ""
- vod_remarks = ""
- type_name = ""
- vod_year = ""
- vod_area = ""
- for item in module_info_items:
- if item.strong:
- if "导演" in item.strong.get_text():
- director = ",".join(i.get_text() for i in item.select("a"))
- elif "主演" in item.strong.get_text():
- actor = ",".join(i.get_text() for i in item.select("a"))
- elif "摘要" in item.strong.get_text():
- vod_remarks = item.span.get_text()
- elif "类型" in item.strong.get_text():
- type_name = ",".join(i.get_text() for i in item.select("a"))
- elif "上映日期" in item.strong.get_text():
- vod_year = ",".join(i.get_text() for i in item.select("a"))
- elif "制片国家/地区" in item.strong.get_text():
- vod_area = item.get_text().replace("制片国家/地区", "").replace("[", "").replace("]", "")
- vodList = {
- "vod_id": f'{Tag}${id}',
- "vod_name": sourcediv.h2.get_text(),
- "vod_pic": sourcediv.img["src"],
- "type_name": type_name,
- "vod_year": vod_year,
- "vod_area": vod_area,
- "vod_remarks": vod_remarks,
- "vod_actor": actor,
- "vod_director": director,
- "vod_content": doc.select_one("div.card.collapse").select_one("div.card-body").get_text().strip(),
- }
- vod_play = {}
- # 取播放列表数据
- sources = doc.select("a.btn.btn-square")
- lines_count = 0
- for source in sources:
- lines_count = len(get_lines(source["href"]))
- if lines_count:
- break
- for i in range(lines_count):
- sourceName = f"线路{i + 1}"
- vodItems = []
- playList = ""
- for source in sources:
- vodItems.append(
- source.get_text() + "$" + f"{Tag}___" + source["href"].split(".")[0] + f"__{(i + 1) % lines_count}")
- if len(vodItems):
- playList = "#".join(vodItems)
- vod_play.setdefault(sourceName, playList)
- if len(vod_play):
- vod_play_from = "$$$".join(vod_play.keys())
- vod_play_url = "$$$".join(vod_play.values())
- vodList.setdefault("vod_play_from", vod_play_from)
- vodList.setdefault("vod_play_url", vod_play_url)
- return [vodList]
- except Exception as e:
- print(e)
- return []
- def playerContent(ids, flag, token):
- try:
- ids = ids.split("___")
- url = ids[-1].split("__")[0]
- play_from = int(ids[-1].split("__")[-1])
- lines = get_lines(f"{url}.htm")
- m3u8_url = lines[play_from]
- if m3u8_url.endswith("m3u8"):
- data = list(requests.get(url=m3u8_url, headers=getHeaders("")).content)[3354:]
- data = zlib.decompress(bytes(data), 16 + zlib.MAX_WBITS).decode()
- m3u8_raw_data = re.sub(r".*?\.ts", add_domain, data)
- m3u8_url = f"data:application/vnd.apple.mpegurl;base64,{base64.b64encode(m3u8_raw_data.encode('utf-8')).decode()}"
- return {
- "header": "",
- "parse": "0",
- "playUrl": "",
- "url": m3u8_url
- }
- except Exception as e:
- print(e)
- return {}
- if __name__ == '__main__':
- # res = searchContent("灰影人", "")
- # res = detailContent('bdys01$/dongzuo/22321', "")
- # func = "playerContent"
- res = playerContent("bdys01___/play/22321-0__0", "", "")
- # res = eval(func)("68614-1-1")
- # res = get_lines("/play/22321-0.htm")
- print(res)
|