py_sina_mm.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
  1. #coding=utf-8
  2. #!/usr/bin/python
  3. import sys
  4. sys.path.append('..')
  5. from base.spider import Spider
  6. import time
  7. import re
  8. from urllib import request, parse
  9. import urllib
  10. import urllib.request
  11. from xml.etree.ElementTree import fromstring, ElementTree as et
  12. class Spider(Spider): # 元类 默认的元类 type
  13. def getName():
  14. return "新浪资源"#除去少儿不宜的内容
  15. filterate=True
  16. def init(self,extend=""):
  17. print("============{0}============".format(extend))
  18. pass
  19. def isVideoFormat(self,url):
  20. pass
  21. def manualVideoCheck(self):
  22. pass
  23. def homeContent(self,filter):
  24. result = {}
  25. timeClass =time.localtime(time.time())
  26. cateManual ={
  27. '动漫':'3',
  28. '动漫电影':'17',
  29. '综艺':'4',
  30. '纪录片':'5',
  31. '动作片':'6',
  32. '爱情片':'7',
  33. '科幻片':'8',
  34. '战争片':'9',
  35. '剧情片':'10',
  36. '恐怖片':'11',
  37. '喜剧片':'12',
  38. '大陆剧':'13',
  39. '港澳剧':'14',
  40. '台湾剧':'15',
  41. '欧美剧':'16',
  42. '韩剧':'18',
  43. '日剧':'20',
  44. '泰剧':'21',
  45. '体育':'23'
  46. }
  47. if timeClass.tm_hour>22:
  48. cateManual['伦理片']='22'
  49. filterate=False
  50. classes = []
  51. for k in cateManual:
  52. classes.append({
  53. 'type_name':k,
  54. 'type_id':cateManual[k]
  55. })
  56. result['class'] = classes
  57. if(filter):
  58. result['filters'] = self.config['filter']
  59. return result
  60. def homeVideoContent(self):
  61. xmlTxt=self.custom_webReadFile(urlStr='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=list&h=24')
  62. tree = et(fromstring(xmlTxt))
  63. root = tree.getroot()
  64. listXml=root.iter('list')
  65. videos = self.custom_list(html=listXml)
  66. result = {
  67. 'list':videos
  68. }
  69. return result
  70. def categoryContent(self,tid,pg,filter,extend):
  71. result = {}
  72. videos=[]
  73. pagecount=1
  74. limit=20
  75. total=9999
  76. Url='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=list&t={0}&pg={1}'.format(tid,pg)
  77. xmlTxt=self.custom_webReadFile(urlStr=Url)
  78. tree = et(fromstring(xmlTxt))
  79. root = tree.getroot()
  80. listXml=root.iter('list')
  81. for vod in listXml:
  82. pagecount=vod.attrib['pagecount']
  83. limit=vod.attrib['pagesize']
  84. total=vod.attrib['recordcount']
  85. videos = self.custom_list(html=root.iter('list'))
  86. result['list'] = videos
  87. result['page'] = pg
  88. result['pagecount'] = pagecount
  89. result['limit'] = limit
  90. result['total'] = total
  91. return result
  92. def detailContent(self,array):
  93. result = {}
  94. aid = array[0].split('###')
  95. id=aid[1]
  96. logo = aid[2]
  97. title = aid[0]
  98. vod_play_from=['1080zyk',]
  99. vod_year=''
  100. vod_actor=''
  101. vod_content=''
  102. vod_director=''
  103. type_name=''
  104. vod_area=''
  105. vod_lang=''
  106. vodItems=[]
  107. vod_play_url=[]
  108. try:
  109. url='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=detail&ids='+id
  110. xmlTxt=self.custom_webReadFile(urlStr=url)
  111. jRoot = et(fromstring(xmlTxt))
  112. xmlList=jRoot.iter('list')
  113. for vod in xmlList:
  114. for x in vod:
  115. for v in x:
  116. if v.tag=='actor':
  117. vod_actor=v.text
  118. if v.tag=='director':
  119. vod_director=v.text
  120. if v.tag=='des':
  121. vod_content=v.text
  122. if v.tag=='area':
  123. vod_area=v.text
  124. if v.tag=='year':
  125. vod_year=v.text
  126. if v.tag=='type':
  127. type_name=v.text
  128. if v.tag=='lang':
  129. vod_lang=v.text
  130. temporary=self.custom_RegexGetText(Text=xmlTxt,RegexText=r'<dd flag="xlyun">(.+?)</dd>',Index=1)
  131. temporary=temporary.replace('<![CDATA[','').replace(']]>','')
  132. vodItems=self.custom_EpisodesList(temporary)
  133. joinStr = "#".join(vodItems)
  134. vod_play_url.append(joinStr)
  135. except :
  136. pass
  137. vod = {
  138. "vod_id":array[0],
  139. "vod_name":title,
  140. "vod_pic":logo,
  141. "type_name":type_name,
  142. "vod_year":vod_year,
  143. "vod_area":vod_area,
  144. "vod_remarks":vod_lang,
  145. "vod_actor":vod_actor,
  146. "vod_director":vod_director,
  147. "vod_content":vod_content
  148. }
  149. vod['vod_play_from'] = "$$$".join(vod_play_from)
  150. vod['vod_play_url'] = "$$$".join(vod_play_url)
  151. result = {
  152. 'list':[
  153. vod
  154. ]
  155. }
  156. if self.filterate==True and self.custom_RegexGetText(Text=type_name,RegexText=r'(伦理|倫理|福利)',Index=1)!='':
  157. result={'list':[]}
  158. return result
  159. def searchContent(self,key,quick):
  160. Url='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=list&wd={0}&pg={1}'.format(urllib.parse.quote(key),'1')
  161. xmlTxt=self.custom_webReadFile(urlStr=Url)
  162. tree = et(fromstring(xmlTxt))
  163. root = tree.getroot()
  164. listXml=root.iter('list')
  165. videos = self.custom_list(html=listXml)
  166. result = {
  167. 'list':videos
  168. }
  169. return result
  170. def playerContent(self,flag,id,vipFlags):
  171. result = {}
  172. parse=0
  173. url=id
  174. htmlTxt=self.custom_webReadFile(urlStr=url,header=self.header)
  175. url=self.custom_RegexGetText(Text=htmlTxt,RegexText=r'(https{0,1}://.+?\.m3u8)',Index=1)
  176. if url.find('.m3u8')<1:
  177. url=id
  178. parse=0
  179. result["parse"] = parse#0=直接播放、1=嗅探
  180. result["playUrl"] =''
  181. result["url"] = url
  182. result['jx'] = 0#VIP解析,0=不解析、1=解析
  183. result["header"] = ''
  184. return result
  185. config = {
  186. "player": {},
  187. "filter": {}
  188. }
  189. header = {}
  190. def localProxy(self,param):
  191. return [200, "video/MP2T", action, ""]
  192. #-----------------------------------------------自定义函数-----------------------------------------------
  193. #正则取文本
  194. def custom_RegexGetText(self,Text,RegexText,Index):
  195. returnTxt=""
  196. Regex=re.search(RegexText, Text, re.M|re.S)
  197. if Regex is None:
  198. returnTxt=""
  199. else:
  200. returnTxt=Regex.group(Index)
  201. return returnTxt
  202. #分类取结果
  203. def custom_list(self,html):
  204. ListRe=html
  205. videos = []
  206. temporary=[]
  207. for vod in ListRe:
  208. for value in vod:
  209. for x in value:
  210. if x.tag=='name':
  211. title=x.text
  212. if x.tag=='id':
  213. id=x.text
  214. if x.tag=='type':
  215. tid=x.text
  216. if x.tag=='last':
  217. last=x.text
  218. temporary.append({
  219. "name":title,
  220. "id":id,
  221. "last":last
  222. })
  223. if len(temporary)>0:
  224. idTxt=''
  225. for vod in temporary:
  226. idTxt=idTxt+vod['id']+','
  227. if len(idTxt)>1:
  228. idTxt=idTxt[0:-1]
  229. url='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=detail&ids='+idTxt
  230. xmlTxt=self.custom_webReadFile(urlStr=url)
  231. jRoot = et(fromstring(xmlTxt))
  232. xmlList=jRoot.iter('list')
  233. for vod in xmlList:
  234. for x in vod:
  235. for v in x:
  236. if v.tag=='name':
  237. title=v.text
  238. if v.tag=='id':
  239. vod_id=v.text
  240. if v.tag=='pic':
  241. img=v.text
  242. if v.tag=='note':
  243. remarks=v.text
  244. if v.tag=='year':
  245. vod_year=v.text
  246. if v.tag=='type':
  247. type_name=v.text
  248. if self.filterate==True and self.custom_RegexGetText(Text=type_name,RegexText=r'(伦理|倫理|福利)',Index=1)!='':
  249. continue
  250. vod_id='{0}###{1}###{2}'.format(title,vod_id,img)
  251. # vod_id='{0}###{1}###{2}###{3}###{4}###{5}###{6}###{7}###{8}###{9}###{10}'.format(title,vod_id,img,vod_actor,vod_director,'/'.join(type_name),'/'.join(vod_time),'/'.join(vod_area),vod_lang,vod_content,vod_play_url)
  252. # print(vod_id)
  253. videos.append({
  254. "vod_id":vod_id,
  255. "vod_name":title,
  256. "vod_pic":img,
  257. "vod_year":vod_year,
  258. "vod_remarks":remarks
  259. })
  260. return videos
  261. #访问网页
  262. def custom_webReadFile(self,urlStr,header=None,codeName='utf-8'):
  263. html=''
  264. if header==None:
  265. header={
  266. "Referer":urlStr,
  267. 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36',
  268. "Host":self.custom_RegexGetText(Text=urlStr,RegexText='https*://(.*?)(/|$)',Index=1)
  269. }
  270. import ssl
  271. ssl._create_default_https_context = ssl._create_unverified_context#全局取消证书验证
  272. req=urllib.request.Request(url=urlStr,headers=header)#,headers=header
  273. with urllib.request.urlopen(req) as response:
  274. html = response.read().decode(codeName)
  275. return html
  276. #取剧集区
  277. def custom_lineList(self,Txt,mark,after):
  278. circuit=[]
  279. origin=Txt.find(mark)
  280. while origin>8:
  281. end=Txt.find(after,origin)
  282. circuit.append(Txt[origin:end])
  283. origin=Txt.find(mark,end)
  284. return circuit
  285. #正则取文本,返回数组
  286. def custom_RegexGetTextLine(self,Text,RegexText,Index):
  287. returnTxt=[]
  288. pattern = re.compile(RegexText, re.M|re.S)
  289. ListRe=pattern.findall(Text)
  290. if len(ListRe)<1:
  291. return returnTxt
  292. for value in ListRe:
  293. returnTxt.append(value)
  294. return returnTxt
  295. #取集数
  296. def custom_EpisodesList(self,html):
  297. ListRe=html.split('#')
  298. videos = []
  299. for vod in ListRe:
  300. t= vod.split('$')
  301. url =t[1]
  302. title =t[0]
  303. if len(url) == 0:
  304. continue
  305. videos.append(title+"$"+url)
  306. return videos
  307. #取分类
  308. def custom_classification(self):
  309. xmlTxt=self.custom_webReadFile(urlStr='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/')
  310. tree = et(fromstring(xmlTxt))
  311. root = tree.getroot()
  312. classXml=root.iter('class')
  313. temporaryClass={}
  314. for vod in classXml:
  315. for value in vod:
  316. if self.custom_RegexGetText(Text=value.text,RegexText=r'(福利|倫理片|伦理片)',Index=1)!='':
  317. continue
  318. temporaryClass[value.text]=value.attrib['id']
  319. print("'{0}':'{1}',".format(value.text,value.attrib['id']))
  320. return temporaryClass
  321. # T=Spider()
  322. # T. homeContent(filter=False)
  323. # T.custom_classification()
  324. # l=T.homeVideoContent()
  325. # l=T.searchContent(key='柯南',quick='')
  326. # l=T.categoryContent(tid='22',pg='1',filter=False,extend={})
  327. # for x in l['list']:
  328. # print(x['vod_name'])
  329. # mubiao= l['list'][2]['vod_id']
  330. # # print(mubiao)
  331. # playTabulation=T.detailContent(array=[mubiao,])
  332. # # print(playTabulation)
  333. # vod_play_from=playTabulation['list'][0]['vod_play_from']
  334. # vod_play_url=playTabulation['list'][0]['vod_play_url']
  335. # url=vod_play_url.split('$$$')
  336. # vod_play_from=vod_play_from.split('$$$')[0]
  337. # url=url[0].split('$')
  338. # url=url[1].split('#')[0]
  339. # # print(url)
  340. # m3u8=T.playerContent(flag=vod_play_from,id=url,vipFlags=True)
  341. # print(m3u8)