新浪资源.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. # coding=utf-8
  2. # !/usr/bin/python
  3. import sys
  4. sys.path.append('..')
  5. try:
  6. # from base.spider import Spider as BaseSpider
  7. from base.spider import BaseSpider
  8. except ImportError:
  9. from t4.base.spider import BaseSpider
  10. import time
  11. import re
  12. from urllib import request, parse
  13. import urllib
  14. import urllib.request
  15. from xml.etree.ElementTree import fromstring, ElementTree as et
  16. """
  17. 配置示例:
  18. t4的配置里ext节点会自动变成api对应query参数extend,但t4的ext字符串不支持路径格式,比如./开头或者.json结尾
  19. api里会自动含有ext参数是base64编码后的选中的筛选条件
  20. {
  21. "key":"hipy_t4_新浪资源",
  22. "name":"新浪资源(hipy_t4)",
  23. "type":4,
  24. "api":"http://192.168.31.49:5707/api/v1/vod/新浪资源",
  25. "searchable":1,
  26. "quickSearch":0,
  27. "filterable":1,
  28. "ext":""
  29. },
  30. {
  31. "key": "hipy_t3_新浪资源",
  32. "name": "新浪资源(hipy_t3)",
  33. "type": 3,
  34. "api": "{{host}}/txt/hipy/新浪资源.py",
  35. "searchable": 1,
  36. "quickSearch": 0,
  37. "filterable": 1,
  38. "ext": ""
  39. },
  40. """
  41. class Spider(BaseSpider): # 元类 默认的元类 type
  42. def getName(self):
  43. return "新浪资源" # 除去少儿不宜的内容
  44. filterate = False
  45. def init(self, extend=""):
  46. print("============{0}============".format(extend))
  47. pass
  48. def isVideoFormat(self, url):
  49. pass
  50. def manualVideoCheck(self):
  51. pass
  52. def homeContent(self, filter):
  53. result = {}
  54. timeClass = time.localtime(time.time())
  55. cateManual = {
  56. '动漫': '3',
  57. '动漫电影': '17',
  58. '综艺': '4',
  59. '纪录片': '5',
  60. '动作片': '6',
  61. '爱情片': '7',
  62. '科幻片': '8',
  63. '战争片': '9',
  64. '剧情片': '10',
  65. '恐怖片': '11',
  66. '喜剧片': '12',
  67. '大陆剧': '13',
  68. '港澳剧': '14',
  69. '台湾剧': '15',
  70. '欧美剧': '16',
  71. '韩剧': '18',
  72. '日剧': '20',
  73. '泰剧': '21',
  74. '体育': '23'
  75. }
  76. # if timeClass.tm_hour>22:
  77. # cateManual['伦理片']='22'
  78. # self.filterate=False
  79. classes = []
  80. for k in cateManual:
  81. classes.append({
  82. 'type_name': k,
  83. 'type_id': cateManual[k]
  84. })
  85. result['class'] = classes
  86. if (filter):
  87. result['filters'] = self.config['filter']
  88. return result
  89. def homeVideoContent(self):
  90. xmlTxt = self.custom_webReadFile(
  91. urlStr='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=list&h=24')
  92. tree = et(fromstring(xmlTxt))
  93. root = tree.getroot()
  94. listXml = root.iter('list')
  95. videos = self.custom_list(html=listXml)
  96. result = {
  97. 'list': videos
  98. }
  99. return result
  100. def categoryContent(self, tid, pg, filter, extend):
  101. result = {}
  102. videos = []
  103. pagecount = 1
  104. limit = 20
  105. total = 9999
  106. Url = 'https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=list&t={0}&pg={1}'.format(
  107. tid, pg)
  108. xmlTxt = self.custom_webReadFile(urlStr=Url)
  109. tree = et(fromstring(xmlTxt))
  110. root = tree.getroot()
  111. listXml = root.iter('list')
  112. for vod in listXml:
  113. pagecount = vod.attrib['pagecount']
  114. limit = vod.attrib['pagesize']
  115. total = vod.attrib['recordcount']
  116. videos = self.custom_list(html=root.iter('list'))
  117. result['list'] = videos
  118. result['page'] = pg
  119. result['pagecount'] = pagecount
  120. result['limit'] = limit
  121. result['total'] = total
  122. return result
  123. def detailContent(self, array):
  124. result = {}
  125. aid = array[0].split('###')
  126. id = aid[1]
  127. logo = aid[2]
  128. title = aid[0]
  129. vod_play_from = ['播放线路', ]
  130. vod_year = ''
  131. vod_actor = ''
  132. vod_content = ''
  133. vod_director = ''
  134. type_name = ''
  135. vod_area = ''
  136. vod_lang = ''
  137. vodItems = []
  138. vod_play_url = []
  139. try:
  140. url = 'https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=detail&ids=' + id
  141. xmlTxt = self.custom_webReadFile(urlStr=url)
  142. jRoot = et(fromstring(xmlTxt))
  143. xmlList = jRoot.iter('list')
  144. for vod in xmlList:
  145. for x in vod:
  146. for v in x:
  147. if v.tag == 'actor':
  148. vod_actor = v.text
  149. if v.tag == 'director':
  150. vod_director = v.text
  151. if v.tag == 'des':
  152. vod_content = v.text
  153. if v.tag == 'area':
  154. vod_area = v.text
  155. if v.tag == 'year':
  156. vod_year = v.text
  157. if v.tag == 'type':
  158. type_name = v.text
  159. if v.tag == 'lang':
  160. vod_lang = v.text
  161. temporary = self.custom_RegexGetText(Text=xmlTxt, RegexText=r'<dd flag="xlyun">(.+?)</dd>', Index=1)
  162. temporary = temporary.replace('<![CDATA[', '').replace(']]>', '')
  163. vodItems = self.custom_EpisodesList(temporary)
  164. joinStr = "#".join(vodItems)
  165. vod_play_url.append(joinStr)
  166. except:
  167. pass
  168. vod = {
  169. "vod_id": array[0],
  170. "vod_name": title,
  171. "vod_pic": logo,
  172. "type_name": type_name,
  173. "vod_year": vod_year,
  174. "vod_area": vod_area,
  175. "vod_remarks": vod_lang,
  176. "vod_actor": vod_actor,
  177. "vod_director": vod_director,
  178. "vod_content": vod_content
  179. }
  180. vod['vod_play_from'] = "$$$".join(vod_play_from)
  181. vod['vod_play_url'] = "$$$".join(vod_play_url)
  182. result = {
  183. 'list': [
  184. vod
  185. ]
  186. }
  187. if self.filterate == True and self.custom_RegexGetText(Text=type_name, RegexText=r'(伦理|倫理|福利)',
  188. Index=1) != '':
  189. result = {'list': []}
  190. return result
  191. def searchContent(self, key, quick, pg=1):
  192. Url = 'https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=list&wd={0}&pg={1}'.format(
  193. urllib.parse.quote(key), '1')
  194. xmlTxt = self.custom_webReadFile(urlStr=Url)
  195. tree = et(fromstring(xmlTxt))
  196. root = tree.getroot()
  197. listXml = root.iter('list')
  198. videos = self.custom_list(html=listXml)
  199. result = {
  200. 'list': videos
  201. }
  202. return result
  203. def playerContent(self, flag, id, vipFlags):
  204. result = {}
  205. parse = 1
  206. url = id
  207. htmlTxt = self.custom_webReadFile(urlStr=url, header=self.header)
  208. url = self.custom_RegexGetText(Text=htmlTxt, RegexText=r'(https{0,1}://.+?\.m3u8)', Index=1)
  209. if url.find('.m3u8') < 1:
  210. url = id
  211. parse = 0
  212. result["parse"] = parse # 0=直接播放、1=嗅探
  213. result["playUrl"] = ''
  214. result["url"] = url
  215. result['jx'] = 0 # VIP解析,0=不解析、1=解析
  216. result["header"] = ''
  217. return result
  218. config = {
  219. "player": {},
  220. "filter": {}
  221. }
  222. header = {}
  223. def localProxy(self, params):
  224. return [200, "video/MP2T", ""]
  225. # -----------------------------------------------自定义函数-----------------------------------------------
  226. # 正则取文本
  227. def custom_RegexGetText(self, Text, RegexText, Index):
  228. returnTxt = ""
  229. Regex = re.search(RegexText, Text, re.M | re.S)
  230. if Regex is None:
  231. returnTxt = ""
  232. else:
  233. returnTxt = Regex.group(Index)
  234. return returnTxt
  235. # 分类取结果
  236. def custom_list(self, html):
  237. ListRe = html
  238. videos = []
  239. temporary = []
  240. for vod in ListRe:
  241. for value in vod:
  242. for x in value:
  243. if x.tag == 'name':
  244. title = x.text
  245. if x.tag == 'id':
  246. id = x.text
  247. if x.tag == 'type':
  248. tid = x.text
  249. if x.tag == 'last':
  250. last = x.text
  251. temporary.append({
  252. "name": title,
  253. "id": id,
  254. "last": last
  255. })
  256. if len(temporary) > 0:
  257. idTxt = ''
  258. for vod in temporary:
  259. idTxt = idTxt + vod['id'] + ','
  260. if len(idTxt) > 1:
  261. idTxt = idTxt[0:-1]
  262. url = 'https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=detail&ids=' + idTxt
  263. xmlTxt = self.custom_webReadFile(urlStr=url)
  264. jRoot = et(fromstring(xmlTxt))
  265. xmlList = jRoot.iter('list')
  266. for vod in xmlList:
  267. for x in vod:
  268. for v in x:
  269. if v.tag == 'name':
  270. title = v.text
  271. if v.tag == 'id':
  272. vod_id = v.text
  273. if v.tag == 'pic':
  274. img = v.text
  275. if v.tag == 'note':
  276. remarks = v.text
  277. if v.tag == 'year':
  278. vod_year = v.text
  279. if v.tag == 'type':
  280. type_name = v.text
  281. if self.filterate == True and self.custom_RegexGetText(Text=type_name,
  282. RegexText=r'(伦理|倫理|福利)',
  283. Index=1) != '':
  284. continue
  285. vod_id = '{0}###{1}###{2}'.format(title, vod_id, img)
  286. # vod_id='{0}###{1}###{2}###{3}###{4}###{5}###{6}###{7}###{8}###{9}###{10}'.format(title,vod_id,img,vod_actor,vod_director,'/'.join(type_name),'/'.join(vod_time),'/'.join(vod_area),vod_lang,vod_content,vod_play_url)
  287. # print(vod_id)
  288. videos.append({
  289. "vod_id": vod_id,
  290. "vod_name": title,
  291. "vod_pic": img,
  292. "vod_year": vod_year,
  293. "vod_remarks": remarks
  294. })
  295. return videos
  296. # 访问网页
  297. def custom_webReadFile(self, urlStr, header=None, codeName='utf-8'):
  298. html = ''
  299. if header == None:
  300. header = {
  301. "Referer": urlStr,
  302. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36',
  303. "Host": self.custom_RegexGetText(Text=urlStr, RegexText='https*://(.*?)(/|$)', Index=1)
  304. }
  305. # import ssl
  306. # ssl._create_default_https_context = ssl._create_unverified_context#全局取消证书验证
  307. req = urllib.request.Request(url=urlStr, headers=header) # ,headers=header
  308. with urllib.request.urlopen(req) as response:
  309. html = response.read().decode(codeName)
  310. return html
  311. # 取剧集区
  312. def custom_lineList(self, Txt, mark, after):
  313. circuit = []
  314. origin = Txt.find(mark)
  315. while origin > 8:
  316. end = Txt.find(after, origin)
  317. circuit.append(Txt[origin:end])
  318. origin = Txt.find(mark, end)
  319. return circuit
  320. # 正则取文本,返回数组
  321. def custom_RegexGetTextLine(self, Text, RegexText, Index):
  322. returnTxt = []
  323. pattern = re.compile(RegexText, re.M | re.S)
  324. ListRe = pattern.findall(Text)
  325. if len(ListRe) < 1:
  326. return returnTxt
  327. for value in ListRe:
  328. returnTxt.append(value)
  329. return returnTxt
  330. # 取集数
  331. def custom_EpisodesList(self, html):
  332. ListRe = html.split('#')
  333. videos = []
  334. for vod in ListRe:
  335. t = vod.split('$')
  336. url = t[1]
  337. title = t[0]
  338. if len(url) == 0:
  339. continue
  340. videos.append(title + "$" + url)
  341. return videos
  342. # 取分类
  343. def custom_classification(self):
  344. xmlTxt = self.custom_webReadFile(
  345. urlStr='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/')
  346. tree = et(fromstring(xmlTxt))
  347. root = tree.getroot()
  348. classXml = root.iter('class')
  349. temporaryClass = {}
  350. for vod in classXml:
  351. for value in vod:
  352. if self.custom_RegexGetText(Text=value.text, RegexText=r'(福利|倫理片|伦理片)', Index=1) != '':
  353. continue
  354. temporaryClass[value.text] = value.attrib['id']
  355. print("'{0}':'{1}',".format(value.text, value.attrib['id']))
  356. return temporaryClass
  357. if __name__ == '__main__':
  358. from t4.core.loader import t4_spider_init
  359. spider = Spider()
  360. t4_spider_init(spider)
  361. print(spider.homeContent(True))
  362. print(spider.homeVideoContent())
  363. # T=Spider()
  364. # T. homeContent(filter=False)
  365. # T.custom_classification()
  366. # l=T.homeVideoContent()
  367. # l=T.searchContent(key='柯南',quick='')
  368. # l=T.categoryContent(tid='22',pg='1',filter=False,extend={})
  369. # for x in l['list']:
  370. # print(x['vod_name'])
  371. # mubiao= l['list'][2]['vod_id']
  372. # # print(mubiao)
  373. # playTabulation=T.detailContent(array=[mubiao,])
  374. # # print(playTabulation)
  375. # vod_play_from=playTabulation['list'][0]['vod_play_from']
  376. # vod_play_url=playTabulation['list'][0]['vod_play_url']
  377. # url=vod_play_url.split('$$$')
  378. # vod_play_from=vod_play_from.split('$$$')[0]
  379. # url=url[0].split('$')
  380. # url=url[1].split('#')[0]
  381. # # print(url)
  382. # m3u8=T.playerContent(flag=vod_play_from,id=url,vipFlags=True)
  383. # print(m3u8)