xxm247
/
fastbui
mirror da https://github.com/fastbuild7099/fastbuild7099


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447
							"""

作者 老王叔叔  內容均從互聯網收集而來 僅供交流學習使用 版權歸原創者所有 如侵犯了您的權益 請通知作者 將及時刪除侵權內容
                    ====================kaiyuebinguan====================

"""

from Crypto.Util.Padding import unpad
from urllib.parse import unquote
from Crypto.Cipher import ARC4
from base.spider import Spider
from bs4 import BeautifulSoup
import urllib.request
import urllib.parse
import binascii
import requests
import base64
import json
import time
import sys
import re
import os

sys.path.append('..')

xurl = "https://www.subaibai.com"

headerx = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
    'authority': 'www.subaibai.com',
    'Referer': 'https://www.subaibai.com/',
    'Origin': 'https://www.subaibai.com/'
          }

pm = ''

class Spider(Spider):
    global xurl
    global headerx

    def getName(self):
        return "首页"

    def init(self, extend):
        pass

    def isVideoFormat(self, url):
        pass

    def manualVideoCheck(self):
        pass

    def extract_middle_text(self, text, start_str, end_str, pl, start_index1: str = '', end_index2: str = ''):
        if pl == 3:
            plx = []
            while True:
                start_index = text.find(start_str)
                if start_index == -1:
                    break
                end_index = text.find(end_str, start_index + len(start_str))
                if end_index == -1:
                    break
                middle_text = text[start_index + len(start_str):end_index]
                plx.append(middle_text)
                text = text.replace(start_str + middle_text + end_str, '')
            if len(plx) > 0:
                purl = ''
                for i in range(len(plx)):
                    matches = re.findall(start_index1, plx[i])
                    output = ""
                    for match in matches:
                        match3 = re.search(r'(?:^|[^0-9])(\d+)(?:[^0-9]|$)', match[1])
                        if match3:
                            number = match3.group(1)
                        else:
                            number = 0
                        if 'http' not in match[0]:
                            output += f"#{ match[1]}${number}{xurl}{match[0]}"
                        else:
                            output += f"#{ match[1]}${number}{match[0]}"
                    output = output[1:]
                    purl = purl + output + "$$$"
                purl = purl[:-3]
                return purl
            else:
                return ""
        else:
            start_index = text.find(start_str)
            if start_index == -1:
                return ""
            end_index = text.find(end_str, start_index + len(start_str))
            if end_index == -1:
                return ""

        if pl == 0:
            middle_text = text[start_index + len(start_str):end_index]
            return middle_text.replace("\\", "")

        if pl == 1:
            middle_text = text[start_index + len(start_str):end_index]
            matches = re.findall(start_index1, middle_text)
            if matches:
                jg = ' '.join(matches)
                return jg

        if pl == 2:
            middle_text = text[start_index + len(start_str):end_index]
            matches = re.findall(start_index1, middle_text)
            if matches:
                new_list = [f for item in matches]
                jg = '$$$'.join(new_list)
                return jg

    def homeContent(self, filter):
        result = {}
        result = {"class": [{"type_id": "new-movie", "type_name": "电影"},
                            {"type_id": "tv-drama", "type_name": "剧集"},
                            {"type_id": "hot-month", "type_name": "热门电影"},
                            {"type_id": "high-movie", "type_name": "高分电影"},
                            {"type_id": "cartoon-movie", "type_name": "动漫电影"},
                            {"type_id": "hongkong-movie", "type_name": "香港经典"},
                            {"type_id": "domestic-drama", "type_name": "国产剧"},
                            {"type_id": "american-drama", "type_name": "欧美剧"},
                            {"type_id": "korean-drama", "type_name": "韩剧"},
                            {"type_id": "anime-drama", "type_name": "动漫剧"}]
                  }

        return result

    def homeVideoContent(self):
        videos = []

        try:

            detail = requests.get(url=xurl, headers=headerx)
            detail.encoding = "utf-8"
            res = detail.text

            doc = BeautifulSoup(res, "lxml")

            soups = doc.find_all('div', class_="bt_img")

            for soup in soups:
                vods = soup.find_all('li')

                for vod in vods:

                    name = vod.find('img')['alt']

                    ids = vod.find('h3', class_="dytit")
                    id = ids.find('a')['href']
                    id = id.replace('www.subaibaiys.com', 'www.subaibai.com')

                    pic = vod.find('img')['data-original']

                    if 'http' not in pic:
                        pic = xurl + pic

                    remark = self.extract_middle_text(str(vod), 'class="rating">', '</div>', 0)

                    video = {
                        "vod_id": id,
                        "vod_name": name,
                        "vod_pic": pic,
                        "vod_remarks": remark
                             }
                    videos.append(video)

            result = {'list': videos}
            return result
        except:
            pass

    def categoryContent(self, cid, pg, filter, ext):
        result = {}
        videos = []

        if pg:
            page = int(pg)
        else:
            page = 1

        if page == '1':
            url = f'{xurl}/{cid}'

        else:
            url = f'{xurl}/{cid}/page/{str(page)}'

        try:
            detail = requests.get(url=url, headers=headerx)
            detail.encoding = "utf-8"
            res = detail.text
            doc = BeautifulSoup(res, "lxml")

            soups = doc.find_all('div', class_="bt_img")

            for soup in soups:
                vods = soup.find_all('li')

                for vod in vods:

                    name = vod.find('img')['alt']

                    ids = vod.find('h3', class_="dytit")
                    id = ids.find('a')['href']
                    id = id.replace('www.subaibaiys.com', 'www.subaibai.com')

                    pic = vod.find('img')['data-original']

                    if 'http' not in pic:
                        pic = xurl + pic

                    remark = self.extract_middle_text(str(vod), 'class="rating">', '</div>', 0)

                    video = {
                        "vod_id": id,
                        "vod_name": name,
                        "vod_pic": pic,
                        "vod_remarks": remark
                            }
                    videos.append(video)

        except:
            pass
        result = {'list': videos}
        result['page'] = pg
        result['pagecount'] = 9999
        result['limit'] = 90
        result['total'] = 999999
        return result

    def detailContent(self, ids):
        global pm
        did = ids[0]
        result = {}
        videos = []

        if 'http' not in did:
            did = xurl + did

        res1 = requests.get(url=did, headers=headerx)
        res1.encoding = "utf-8"
        res = res1.text

        url = 'https://www.subaibai.com'
        response = requests.get(url)
        response.encoding = 'utf-8'
        code = response.text
        name = self.extract_middle_text(code, "s1='", "'", 0)
        Jumps = self.extract_middle_text(code, "s2='", "'", 0)

        content = self.extract_middle_text(res,'<div class="yp_context">','</p>', 0)
        content = content.replace('\t', '').replace('<p>', '').replace(' ', '').replace('\n', '')

        if name not in content:
            bofang = Jumps
        else:
            bofang = self.extract_middle_text(res, '<div class="paly_list_btn">', '</div>', 3, 'href="(.*?)">(.*?)</a>')
            bofang = bofang.replace('www.subaibaiys.com', 'www.subaibai.com').replace('立即播放&nbsp;&nbsp;', '')

        videos.append({
            "vod_id": did,
            "vod_actor": '',
            "vod_director": '',
            "vod_content": content,
            "vod_play_from": '专线',
            "vod_play_url": bofang
                     })

        result['list'] = videos
        return result

    def playerContent(self, flag, id, vipFlags):
        parts = id.split("http")

        xiutan = 1

        if xiutan == 0:
            if len(parts) > 1:
                before_https, after_https = parts[0], 'http' + parts[1]

            if '' in after_https:
                url = after_https
            else:
                res = requests.get(url=after_https, headers=headerx)
                res = res.text

                url = self.extract_middle_text(res, '},"url":"', '"', 0).replace('\\', '')

                #  =======================================

                # url = base64.b64decode(url).decode('utf-8')

                # url = unquote(url)

                # import base64
                # base64_decoded_bytes = base64.b64decode(url)
                # base64_decoded_string = base64_decoded_bytes.decode('utf-8')
                # url = unquote(base64_decoded_string)
                # url="https://"+self.extract_middle_text(url,'https://','.m3u8',0)+'.m3u8'

                #  =======================================

            result = {}
            result["parse"] = xiutan
            result["playUrl"] = ''
            result["url"] = url
            result["header"] = headerx
            return result

                #  =======================================

        if xiutan == 1:
            if len(parts) > 1:
                before_https, after_https = parts[0], 'http' + parts[1]
            result = {}
            result["parse"] = xiutan
            result["playUrl"] = ''
            result["url"] = after_https
            result["header"] = headerx
            return result

    def searchContentPage(self, key, quick, page):
        result = {}
        videos = []

        if not page:
            page = '1'
        if page == '1':
            url = f'{xurl}/?s={key}'

        else:
            url = f'{xurl}//page/{str(page)}?s={key}'

        detail = requests.get(url=url, headers=headerx)
        detail.encoding = "utf-8"
        res = detail.text
        doc = BeautifulSoup(res, "lxml")

        soups = doc.find_all('div', class_="bt_img")

        for soup in soups:
            vods = soup.find_all('li')

            for vod in vods:

                name = vod.find('img')['alt']

                ids = vod.find('h3', class_="dytit")
                id = ids.find('a')['href']
                id = id.replace('www.subaibaiys.com', 'www.subaibai.com')

                pic = vod.find('img')['data-original']

                if 'http' not in pic:
                    pic = xurl + pic

                remark = self.extract_middle_text(str(vod), 'class="rating">', '</div>', 0)

                video = {
                    "vod_id": id,
                    "vod_name": name,
                    "vod_pic": pic,
                    "vod_remarks": remark
                }
                videos.append(video)

        result['list'] = videos
        result['page'] = page
        result['pagecount'] = 9999
        result['limit'] = 90
        result['total'] = 999999
        return result

    def searchContent(self, key, quick):
        return self.searchContentPage(key, quick, '1')

    def localProxy(self, params):
        if params['type'] == "m3u8":
            return self.proxyM3u8(params)
        elif params['type'] == "media":
            return self.proxyMedia(params)
        elif params['type'] == "ts":
            return self.proxyTs(params)
        return None


"""

   =======================================

   换行 \n   零个或者多个空格 \s+   数字型 int   文本型 str   分页{} '年代':'2021'       

   性能要求高"lxml"   处理不规范的HTML"html5lib"   简单应用"html.parser"   解析XML"xml"

   =======================================

   /rss/index.xml?wd=爱情&page=1                                搜索有验证

   /index.php/ajax/suggest?mid=1&wd=爱情&page=1&limit=30        搜索有验证

   /index.php/ajax/data?mid=1&tid={cateId}&class={class}&area={area}&page={catePg}&limit=30   分类有验证

   /index.php/vod/type/class/{cid}/id/41/page/{str(page)}/year/{NdType}.html        隐藏分类

   /{cateId}-{area}-{by}-{class}-{lang}-{letter}---{catePg}---{year}.html

   短剧 穿越 古装 仙侠 女频 恋爱 反转 现代 都市 剧情 玄幻 脑洞 悬疑  

   =======================================

   aaa = self.extract_middle_text(res, 'bbb', 'ccc', 0)
   aaa = aaa.replace('aaa', '').replace('bbb', '') 替换多余
   取头 取尾  （不循环)   截取项  （不循环)   长用于直链  二次截取                0号子程序

   aaa =self.extract_middle_text(res, 'bbb', 'ccc',1,'html">(.*?)<')
   aaa = aaa.replace('aaa', '').replace('bbb', '') 替换多余
   取头 取尾  （不循环)   截取项  （循环)     长用于详情  和2号区别没有$$$        1号子程序

   aaa = self.extract_middle_text(res, 'bbb','ccc', 2,'<span class=".*?" id=".*?">(.*?)</span>')
   aaa = aaa.replace('aaa', '').replace('bbb', '') 替换多余
   取头 取尾  （不循环)   截取项  （循环)     只能用于线路数组  里面包含$$$       2号子程序

   aaa = self.extract_middle_text(res, 'bbb', 'ccc', 3,'href="(.*?)" class=".*?">(.*?)</a>')
   aaa = aaa.replace('aaa', '').replace('bbb', '') 替换多余
   取头 取尾  （循环)     截取项  （循环)    长用于播放数组                     3号子程序

   =======================================

"""

if __name__ == '__main__':
    spider_instance = Spider()

    # res=spider_instance.homeContent('filter')  #  分类🚨

    # res = spider_instance.homeVideoContent()  # 首页🚨

    # res=spider_instance.categoryContent('new-movie', 2, 'filter', {})  #  分页🚨

    res = spider_instance.detailContent(['https://www.subaibai.com/movie/56949.html'])  #  详情页🚨

    # res = spider_instance.playerContent('1', '0https://www.mjzj.me/74354-1-1.html', 'vipFlags')  #  播放页🚨

    # res = spider_instance.searchContentPage('爱情', 'quick', '2')  # 搜索页🚨

    print(res)