123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350 |
- #coding=utf-8
- #!/usr/bin/python
- import sys
- sys.path.append('..')
- from base.spider import Spider
- import time
- import re
- from urllib import request, parse
- import urllib
- import urllib.request
- from xml.etree.ElementTree import fromstring, ElementTree as et
- class Spider(Spider): # 元类 默认的元类 type
- def getName():
- return "新浪资源"#除去少儿不宜的内容
- filterate=True
- def init(self,extend=""):
- print("============{0}============".format(extend))
- pass
- def isVideoFormat(self,url):
- pass
- def manualVideoCheck(self):
- pass
- def homeContent(self,filter):
- result = {}
- timeClass =time.localtime(time.time())
- cateManual ={
- '动漫':'3',
- '动漫电影':'17',
- '综艺':'4',
- '纪录片':'5',
- '动作片':'6',
- '爱情片':'7',
- '科幻片':'8',
- '战争片':'9',
- '剧情片':'10',
- '恐怖片':'11',
- '喜剧片':'12',
- '大陆剧':'13',
- '港澳剧':'14',
- '台湾剧':'15',
- '欧美剧':'16',
- '韩剧':'18',
- '日剧':'20',
- '泰剧':'21',
- '体育':'23'
- }
- if timeClass.tm_hour>22:
- cateManual['伦理片']='22'
- filterate=False
- classes = []
- for k in cateManual:
- classes.append({
- 'type_name':k,
- 'type_id':cateManual[k]
- })
- result['class'] = classes
- if(filter):
- result['filters'] = self.config['filter']
- return result
- def homeVideoContent(self):
- xmlTxt=self.custom_webReadFile(urlStr='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=list&h=24')
- tree = et(fromstring(xmlTxt))
- root = tree.getroot()
- listXml=root.iter('list')
- videos = self.custom_list(html=listXml)
- result = {
- 'list':videos
- }
- return result
- def categoryContent(self,tid,pg,filter,extend):
- result = {}
- videos=[]
- pagecount=1
- limit=20
- total=9999
- Url='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=list&t={0}&pg={1}'.format(tid,pg)
- xmlTxt=self.custom_webReadFile(urlStr=Url)
- tree = et(fromstring(xmlTxt))
- root = tree.getroot()
- listXml=root.iter('list')
- for vod in listXml:
- pagecount=vod.attrib['pagecount']
- limit=vod.attrib['pagesize']
- total=vod.attrib['recordcount']
- videos = self.custom_list(html=root.iter('list'))
- result['list'] = videos
- result['page'] = pg
- result['pagecount'] = pagecount
- result['limit'] = limit
- result['total'] = total
- return result
- def detailContent(self,array):
- result = {}
- aid = array[0].split('###')
- id=aid[1]
- logo = aid[2]
- title = aid[0]
- vod_play_from=['1080zyk',]
- vod_year=''
- vod_actor=''
- vod_content=''
- vod_director=''
- type_name=''
- vod_area=''
- vod_lang=''
- vodItems=[]
- vod_play_url=[]
- try:
- url='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=detail&ids='+id
- xmlTxt=self.custom_webReadFile(urlStr=url)
- jRoot = et(fromstring(xmlTxt))
- xmlList=jRoot.iter('list')
- for vod in xmlList:
- for x in vod:
- for v in x:
- if v.tag=='actor':
- vod_actor=v.text
- if v.tag=='director':
- vod_director=v.text
- if v.tag=='des':
- vod_content=v.text
- if v.tag=='area':
- vod_area=v.text
- if v.tag=='year':
- vod_year=v.text
- if v.tag=='type':
- type_name=v.text
- if v.tag=='lang':
- vod_lang=v.text
- temporary=self.custom_RegexGetText(Text=xmlTxt,RegexText=r'<dd flag="xlyun">(.+?)</dd>',Index=1)
- temporary=temporary.replace('<![CDATA[','').replace(']]>','')
- vodItems=self.custom_EpisodesList(temporary)
- joinStr = "#".join(vodItems)
- vod_play_url.append(joinStr)
- except :
- pass
- vod = {
- "vod_id":array[0],
- "vod_name":title,
- "vod_pic":logo,
- "type_name":type_name,
- "vod_year":vod_year,
- "vod_area":vod_area,
- "vod_remarks":vod_lang,
- "vod_actor":vod_actor,
- "vod_director":vod_director,
- "vod_content":vod_content
- }
- vod['vod_play_from'] = "$$$".join(vod_play_from)
- vod['vod_play_url'] = "$$$".join(vod_play_url)
- result = {
- 'list':[
- vod
- ]
- }
- if self.filterate==True and self.custom_RegexGetText(Text=type_name,RegexText=r'(伦理|倫理|福利)',Index=1)!='':
- result={'list':[]}
- return result
- def searchContent(self,key,quick):
- Url='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=list&wd={0}&pg={1}'.format(urllib.parse.quote(key),'1')
- xmlTxt=self.custom_webReadFile(urlStr=Url)
- tree = et(fromstring(xmlTxt))
- root = tree.getroot()
- listXml=root.iter('list')
- videos = self.custom_list(html=listXml)
- result = {
- 'list':videos
- }
- return result
- def playerContent(self,flag,id,vipFlags):
- result = {}
- parse=0
- url=id
- htmlTxt=self.custom_webReadFile(urlStr=url,header=self.header)
- url=self.custom_RegexGetText(Text=htmlTxt,RegexText=r'(https{0,1}://.+?\.m3u8)',Index=1)
- if url.find('.m3u8')<1:
- url=id
- parse=0
- result["parse"] = parse#0=直接播放、1=嗅探
- result["playUrl"] =''
- result["url"] = url
- result['jx'] = 0#VIP解析,0=不解析、1=解析
- result["header"] = ''
- return result
- config = {
- "player": {},
- "filter": {}
- }
- header = {}
- def localProxy(self,param):
- return [200, "video/MP2T", action, ""]
- #-----------------------------------------------自定义函数-----------------------------------------------
- #正则取文本
- def custom_RegexGetText(self,Text,RegexText,Index):
- returnTxt=""
- Regex=re.search(RegexText, Text, re.M|re.S)
- if Regex is None:
- returnTxt=""
- else:
- returnTxt=Regex.group(Index)
- return returnTxt
- #分类取结果
- def custom_list(self,html):
- ListRe=html
- videos = []
- temporary=[]
- for vod in ListRe:
- for value in vod:
- for x in value:
- if x.tag=='name':
- title=x.text
- if x.tag=='id':
- id=x.text
- if x.tag=='type':
- tid=x.text
- if x.tag=='last':
- last=x.text
- temporary.append({
- "name":title,
- "id":id,
- "last":last
- })
-
- if len(temporary)>0:
- idTxt=''
- for vod in temporary:
- idTxt=idTxt+vod['id']+','
- if len(idTxt)>1:
- idTxt=idTxt[0:-1]
- url='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=detail&ids='+idTxt
- xmlTxt=self.custom_webReadFile(urlStr=url)
- jRoot = et(fromstring(xmlTxt))
- xmlList=jRoot.iter('list')
- for vod in xmlList:
- for x in vod:
- for v in x:
- if v.tag=='name':
- title=v.text
- if v.tag=='id':
- vod_id=v.text
- if v.tag=='pic':
- img=v.text
- if v.tag=='note':
- remarks=v.text
- if v.tag=='year':
- vod_year=v.text
- if v.tag=='type':
- type_name=v.text
- if self.filterate==True and self.custom_RegexGetText(Text=type_name,RegexText=r'(伦理|倫理|福利)',Index=1)!='':
- continue
- vod_id='{0}###{1}###{2}'.format(title,vod_id,img)
- # vod_id='{0}###{1}###{2}###{3}###{4}###{5}###{6}###{7}###{8}###{9}###{10}'.format(title,vod_id,img,vod_actor,vod_director,'/'.join(type_name),'/'.join(vod_time),'/'.join(vod_area),vod_lang,vod_content,vod_play_url)
- # print(vod_id)
- videos.append({
- "vod_id":vod_id,
- "vod_name":title,
- "vod_pic":img,
- "vod_year":vod_year,
- "vod_remarks":remarks
- })
- return videos
- #访问网页
- def custom_webReadFile(self,urlStr,header=None,codeName='utf-8'):
- html=''
- if header==None:
- header={
- "Referer":urlStr,
- 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36',
- "Host":self.custom_RegexGetText(Text=urlStr,RegexText='https*://(.*?)(/|$)',Index=1)
- }
- import ssl
- ssl._create_default_https_context = ssl._create_unverified_context#全局取消证书验证
- req=urllib.request.Request(url=urlStr,headers=header)#,headers=header
- with urllib.request.urlopen(req) as response:
- html = response.read().decode(codeName)
- return html
-
- #取剧集区
- def custom_lineList(self,Txt,mark,after):
- circuit=[]
- origin=Txt.find(mark)
- while origin>8:
- end=Txt.find(after,origin)
- circuit.append(Txt[origin:end])
- origin=Txt.find(mark,end)
- return circuit
- #正则取文本,返回数组
- def custom_RegexGetTextLine(self,Text,RegexText,Index):
- returnTxt=[]
- pattern = re.compile(RegexText, re.M|re.S)
- ListRe=pattern.findall(Text)
- if len(ListRe)<1:
- return returnTxt
- for value in ListRe:
- returnTxt.append(value)
- return returnTxt
- #取集数
- def custom_EpisodesList(self,html):
- ListRe=html.split('#')
- videos = []
- for vod in ListRe:
- t= vod.split('$')
- url =t[1]
- title =t[0]
- if len(url) == 0:
- continue
- videos.append(title+"$"+url)
- return videos
- #取分类
- def custom_classification(self):
- xmlTxt=self.custom_webReadFile(urlStr='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/')
- tree = et(fromstring(xmlTxt))
- root = tree.getroot()
- classXml=root.iter('class')
- temporaryClass={}
- for vod in classXml:
- for value in vod:
- if self.custom_RegexGetText(Text=value.text,RegexText=r'(福利|倫理片|伦理片)',Index=1)!='':
- continue
- temporaryClass[value.text]=value.attrib['id']
- print("'{0}':'{1}',".format(value.text,value.attrib['id']))
- return temporaryClass
- # T=Spider()
- # T. homeContent(filter=False)
- # T.custom_classification()
- # l=T.homeVideoContent()
- # l=T.searchContent(key='柯南',quick='')
- # l=T.categoryContent(tid='22',pg='1',filter=False,extend={})
- # for x in l['list']:
- # print(x['vod_name'])
- # mubiao= l['list'][2]['vod_id']
- # # print(mubiao)
- # playTabulation=T.detailContent(array=[mubiao,])
- # # print(playTabulation)
- # vod_play_from=playTabulation['list'][0]['vod_play_from']
- # vod_play_url=playTabulation['list'][0]['vod_play_url']
- # url=vod_play_url.split('$$$')
- # vod_play_from=vod_play_from.split('$$$')[0]
- # url=url[0].split('$')
- # url=url[1].split('#')[0]
- # # print(url)
- # m3u8=T.playerContent(flag=vod_play_from,id=url,vipFlags=True)
- # print(m3u8)
|