123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414 |
- # coding=utf-8
- # !/usr/bin/python
- import sys
- sys.path.append('..')
- try:
- # from base.spider import Spider as BaseSpider
- from base.spider import BaseSpider
- except ImportError:
- from t4.base.spider import BaseSpider
- import time
- import re
- from urllib import request, parse
- import urllib
- import urllib.request
- from xml.etree.ElementTree import fromstring, ElementTree as et
- """
- 配置示例:
- t4的配置里ext节点会自动变成api对应query参数extend,但t4的ext字符串不支持路径格式,比如./开头或者.json结尾
- api里会自动含有ext参数是base64编码后的选中的筛选条件
- {
- "key":"hipy_t4_新浪资源",
- "name":"新浪资源(hipy_t4)",
- "type":4,
- "api":"http://192.168.31.49:5707/api/v1/vod/新浪资源",
- "searchable":1,
- "quickSearch":0,
- "filterable":1,
- "ext":""
- },
- {
- "key": "hipy_t3_新浪资源",
- "name": "新浪资源(hipy_t3)",
- "type": 3,
- "api": "{{host}}/txt/hipy/新浪资源.py",
- "searchable": 1,
- "quickSearch": 0,
- "filterable": 1,
- "ext": ""
- },
- """
- class Spider(BaseSpider): # 元类 默认的元类 type
- def getName(self):
- return "新浪资源" # 除去少儿不宜的内容
- filterate = False
- def init(self, extend=""):
- print("============{0}============".format(extend))
- pass
- def isVideoFormat(self, url):
- pass
- def manualVideoCheck(self):
- pass
- def homeContent(self, filter):
- result = {}
- timeClass = time.localtime(time.time())
- cateManual = {
- '动漫': '3',
- '动漫电影': '17',
- '综艺': '4',
- '纪录片': '5',
- '动作片': '6',
- '爱情片': '7',
- '科幻片': '8',
- '战争片': '9',
- '剧情片': '10',
- '恐怖片': '11',
- '喜剧片': '12',
- '大陆剧': '13',
- '港澳剧': '14',
- '台湾剧': '15',
- '欧美剧': '16',
- '韩剧': '18',
- '日剧': '20',
- '泰剧': '21',
- '体育': '23'
- }
- # if timeClass.tm_hour>22:
- # cateManual['伦理片']='22'
- # self.filterate=False
- classes = []
- for k in cateManual:
- classes.append({
- 'type_name': k,
- 'type_id': cateManual[k]
- })
- result['class'] = classes
- if (filter):
- result['filters'] = self.config['filter']
- return result
- def homeVideoContent(self):
- xmlTxt = self.custom_webReadFile(
- urlStr='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=list&h=24')
- tree = et(fromstring(xmlTxt))
- root = tree.getroot()
- listXml = root.iter('list')
- videos = self.custom_list(html=listXml)
- result = {
- 'list': videos
- }
- return result
- def categoryContent(self, tid, pg, filter, extend):
- result = {}
- videos = []
- pagecount = 1
- limit = 20
- total = 9999
- Url = 'https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=list&t={0}&pg={1}'.format(
- tid, pg)
- xmlTxt = self.custom_webReadFile(urlStr=Url)
- tree = et(fromstring(xmlTxt))
- root = tree.getroot()
- listXml = root.iter('list')
- for vod in listXml:
- pagecount = vod.attrib['pagecount']
- limit = vod.attrib['pagesize']
- total = vod.attrib['recordcount']
- videos = self.custom_list(html=root.iter('list'))
- result['list'] = videos
- result['page'] = pg
- result['pagecount'] = pagecount
- result['limit'] = limit
- result['total'] = total
- return result
- def detailContent(self, array):
- result = {}
- aid = array[0].split('###')
- id = aid[1]
- logo = aid[2]
- title = aid[0]
- vod_play_from = ['播放线路', ]
- vod_year = ''
- vod_actor = ''
- vod_content = ''
- vod_director = ''
- type_name = ''
- vod_area = ''
- vod_lang = ''
- vodItems = []
- vod_play_url = []
- try:
- url = 'https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=detail&ids=' + id
- xmlTxt = self.custom_webReadFile(urlStr=url)
- jRoot = et(fromstring(xmlTxt))
- xmlList = jRoot.iter('list')
- for vod in xmlList:
- for x in vod:
- for v in x:
- if v.tag == 'actor':
- vod_actor = v.text
- if v.tag == 'director':
- vod_director = v.text
- if v.tag == 'des':
- vod_content = v.text
- if v.tag == 'area':
- vod_area = v.text
- if v.tag == 'year':
- vod_year = v.text
- if v.tag == 'type':
- type_name = v.text
- if v.tag == 'lang':
- vod_lang = v.text
- temporary = self.custom_RegexGetText(Text=xmlTxt, RegexText=r'<dd flag="xlyun">(.+?)</dd>', Index=1)
- temporary = temporary.replace('<![CDATA[', '').replace(']]>', '')
- vodItems = self.custom_EpisodesList(temporary)
- joinStr = "#".join(vodItems)
- vod_play_url.append(joinStr)
- except:
- pass
- vod = {
- "vod_id": array[0],
- "vod_name": title,
- "vod_pic": logo,
- "type_name": type_name,
- "vod_year": vod_year,
- "vod_area": vod_area,
- "vod_remarks": vod_lang,
- "vod_actor": vod_actor,
- "vod_director": vod_director,
- "vod_content": vod_content
- }
- vod['vod_play_from'] = "$$$".join(vod_play_from)
- vod['vod_play_url'] = "$$$".join(vod_play_url)
- result = {
- 'list': [
- vod
- ]
- }
- if self.filterate == True and self.custom_RegexGetText(Text=type_name, RegexText=r'(伦理|倫理|福利)',
- Index=1) != '':
- result = {'list': []}
- return result
- def searchContent(self, key, quick, pg=1):
- Url = 'https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=list&wd={0}&pg={1}'.format(
- urllib.parse.quote(key), '1')
- xmlTxt = self.custom_webReadFile(urlStr=Url)
- tree = et(fromstring(xmlTxt))
- root = tree.getroot()
- listXml = root.iter('list')
- videos = self.custom_list(html=listXml)
- result = {
- 'list': videos
- }
- return result
- def playerContent(self, flag, id, vipFlags):
- result = {}
- parse = 1
- url = id
- htmlTxt = self.custom_webReadFile(urlStr=url, header=self.header)
- url = self.custom_RegexGetText(Text=htmlTxt, RegexText=r'(https{0,1}://.+?\.m3u8)', Index=1)
- if url.find('.m3u8') < 1:
- url = id
- parse = 0
- result["parse"] = parse # 0=直接播放、1=嗅探
- result["playUrl"] = ''
- result["url"] = url
- result['jx'] = 0 # VIP解析,0=不解析、1=解析
- result["header"] = ''
- return result
- config = {
- "player": {},
- "filter": {}
- }
- header = {}
- def localProxy(self, params):
- return [200, "video/MP2T", ""]
- # -----------------------------------------------自定义函数-----------------------------------------------
- # 正则取文本
- def custom_RegexGetText(self, Text, RegexText, Index):
- returnTxt = ""
- Regex = re.search(RegexText, Text, re.M | re.S)
- if Regex is None:
- returnTxt = ""
- else:
- returnTxt = Regex.group(Index)
- return returnTxt
- # 分类取结果
- def custom_list(self, html):
- ListRe = html
- videos = []
- temporary = []
- for vod in ListRe:
- for value in vod:
- for x in value:
- if x.tag == 'name':
- title = x.text
- if x.tag == 'id':
- id = x.text
- if x.tag == 'type':
- tid = x.text
- if x.tag == 'last':
- last = x.text
- temporary.append({
- "name": title,
- "id": id,
- "last": last
- })
- if len(temporary) > 0:
- idTxt = ''
- for vod in temporary:
- idTxt = idTxt + vod['id'] + ','
- if len(idTxt) > 1:
- idTxt = idTxt[0:-1]
- url = 'https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=detail&ids=' + idTxt
- xmlTxt = self.custom_webReadFile(urlStr=url)
- jRoot = et(fromstring(xmlTxt))
- xmlList = jRoot.iter('list')
- for vod in xmlList:
- for x in vod:
- for v in x:
- if v.tag == 'name':
- title = v.text
- if v.tag == 'id':
- vod_id = v.text
- if v.tag == 'pic':
- img = v.text
- if v.tag == 'note':
- remarks = v.text
- if v.tag == 'year':
- vod_year = v.text
- if v.tag == 'type':
- type_name = v.text
- if self.filterate == True and self.custom_RegexGetText(Text=type_name,
- RegexText=r'(伦理|倫理|福利)',
- Index=1) != '':
- continue
- vod_id = '{0}###{1}###{2}'.format(title, vod_id, img)
- # vod_id='{0}###{1}###{2}###{3}###{4}###{5}###{6}###{7}###{8}###{9}###{10}'.format(title,vod_id,img,vod_actor,vod_director,'/'.join(type_name),'/'.join(vod_time),'/'.join(vod_area),vod_lang,vod_content,vod_play_url)
- # print(vod_id)
- videos.append({
- "vod_id": vod_id,
- "vod_name": title,
- "vod_pic": img,
- "vod_year": vod_year,
- "vod_remarks": remarks
- })
- return videos
- # 访问网页
- def custom_webReadFile(self, urlStr, header=None, codeName='utf-8'):
- html = ''
- if header == None:
- header = {
- "Referer": urlStr,
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36',
- "Host": self.custom_RegexGetText(Text=urlStr, RegexText='https*://(.*?)(/|$)', Index=1)
- }
- # import ssl
- # ssl._create_default_https_context = ssl._create_unverified_context#全局取消证书验证
- req = urllib.request.Request(url=urlStr, headers=header) # ,headers=header
- with urllib.request.urlopen(req) as response:
- html = response.read().decode(codeName)
- return html
- # 取剧集区
- def custom_lineList(self, Txt, mark, after):
- circuit = []
- origin = Txt.find(mark)
- while origin > 8:
- end = Txt.find(after, origin)
- circuit.append(Txt[origin:end])
- origin = Txt.find(mark, end)
- return circuit
- # 正则取文本,返回数组
- def custom_RegexGetTextLine(self, Text, RegexText, Index):
- returnTxt = []
- pattern = re.compile(RegexText, re.M | re.S)
- ListRe = pattern.findall(Text)
- if len(ListRe) < 1:
- return returnTxt
- for value in ListRe:
- returnTxt.append(value)
- return returnTxt
- # 取集数
- def custom_EpisodesList(self, html):
- ListRe = html.split('#')
- videos = []
- for vod in ListRe:
- t = vod.split('$')
- url = t[1]
- title = t[0]
- if len(url) == 0:
- continue
- videos.append(title + "$" + url)
- return videos
- # 取分类
- def custom_classification(self):
- xmlTxt = self.custom_webReadFile(
- urlStr='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/')
- tree = et(fromstring(xmlTxt))
- root = tree.getroot()
- classXml = root.iter('class')
- temporaryClass = {}
- for vod in classXml:
- for value in vod:
- if self.custom_RegexGetText(Text=value.text, RegexText=r'(福利|倫理片|伦理片)', Index=1) != '':
- continue
- temporaryClass[value.text] = value.attrib['id']
- print("'{0}':'{1}',".format(value.text, value.attrib['id']))
- return temporaryClass
- if __name__ == '__main__':
- from t4.core.loader import t4_spider_init
- spider = Spider()
- t4_spider_init(spider)
- print(spider.homeContent(True))
- print(spider.homeVideoContent())
- # T=Spider()
- # T. homeContent(filter=False)
- # T.custom_classification()
- # l=T.homeVideoContent()
- # l=T.searchContent(key='柯南',quick='')
- # l=T.categoryContent(tid='22',pg='1',filter=False,extend={})
- # for x in l['list']:
- # print(x['vod_name'])
- # mubiao= l['list'][2]['vod_id']
- # # print(mubiao)
- # playTabulation=T.detailContent(array=[mubiao,])
- # # print(playTabulation)
- # vod_play_from=playTabulation['list'][0]['vod_play_from']
- # vod_play_url=playTabulation['list'][0]['vod_play_url']
- # url=vod_play_url.split('$$$')
- # vod_play_from=vod_play_from.split('$$$')[0]
- # url=url[0].split('$')
- # url=url[1].split('#')[0]
- # # print(url)
- # m3u8=T.playerContent(flag=vod_play_from,id=url,vipFlags=True)
- # print(m3u8)
|