py_live_new.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. #coding=utf-8
  2. #!/usr/bin/python
  3. import sys
  4. import json
  5. import time
  6. import hashlib
  7. from base64 import b64decode
  8. from difflib import SequenceMatcher
  9. from urllib.parse import quote, unquote
  10. from concurrent.futures import ThreadPoolExecutor, as_completed
  11. sys.path.append('..')
  12. from base.spider import Spider
  13. class Spider(Spider): # 元类 默认的元类 type
  14. def getName(self):
  15. return "直播"
  16. def init(self, extend):
  17. try:
  18. self.extendDict = json.loads(extend)
  19. except:
  20. self.extendDict = {}
  21. def destroy(self):
  22. pass
  23. def isVideoFormat(self, url):
  24. pass
  25. def manualVideoCheck(self):
  26. pass
  27. def homeVideoContent(self):
  28. result = {}
  29. return result
  30. def homeContent(self, filter):
  31. result = {}
  32. try:
  33. url = self.extendDict['url']
  34. data = self.fetch(url, headers=self.header, timeout=5).json()
  35. result['class'] = data['classes']
  36. if filter:
  37. result['filters'] = data['filter']
  38. except:
  39. result['class'] = [{"type_id": 'douyu', "type_name": "斗鱼"}]
  40. result['filters'] = {'douyu': {'key': '斗鱼', 'name': '斗鱼', "value": [{"n": "一起看", "v": "208"}]}}
  41. return result
  42. def categoryContent(self, cid, page, filter, ext):
  43. result = {}
  44. videos = []
  45. header = self.header.copy()
  46. if cid == 'bilibili':
  47. if 'B站' in ext:
  48. tid = ext['B站']
  49. else:
  50. try:
  51. r = self.fetch(json.loads(self.extendDict)['url'], headers=header, timeout=5)
  52. tid = r.json()['filter'][cid][0]['value'][0]['v']
  53. except:
  54. tid = '1'
  55. url = f'https://api.live.bilibili.com/xlive/web-interface/v1/second/getList?platform=web&parent_area_id={tid}&page={page}'
  56. data = self.fetch(url, headers=header, timeout=5).json()
  57. vodList = data['data']['list']
  58. append = 'bilibili'
  59. imgnm = 'cover'
  60. vidnm = 'roomid'
  61. titlenm = 'title'
  62. remarknm = 'uname'
  63. if data['data']['has_more'] == 1:
  64. pagecount = page + 1
  65. else:
  66. pagecount = page
  67. elif cid == 'douyu':
  68. if '斗鱼' in ext:
  69. tid = ext['斗鱼']
  70. else:
  71. try:
  72. r = self.fetch(json.loads(self.extend)['url'], headers=header)
  73. tid = r.json()['filter'][cid][0]['value'][0]['v']
  74. except:
  75. tid = '208'
  76. url = f'https://www.douyu.com/gapi/rkc/directory/mixList/2_{tid}/{page}'
  77. r = self.fetch(url, headers=header, timeout=5)
  78. data = r.json()
  79. vodList = data['data']['rl']
  80. pagecount = data['data']['pgcnt']
  81. append = 'douyu'
  82. imgnm = 'rs1'
  83. vidnm = 'rid'
  84. titlenm = 'rn'
  85. remarknm = 'nn'
  86. elif cid == 'huya':
  87. if '虎牙' in ext:
  88. tid = ext['虎牙']
  89. else:
  90. try:
  91. r = self.fetch(json.loads(self.extend)['url'], headers=header)
  92. tid = r.json()['filter'][cid][0]['value'][0]['v']
  93. except:
  94. tid = '2135'
  95. header['Referer'] = 'https://www.huya.com/'
  96. url = f'https://www.huya.com/cache.php?m=LiveList&do=getLiveListByPage&gameId={tid}&tagAll=0&callback=getLiveListJsonpCallback&page={page}'
  97. r = self.fetch(url, headers=header, timeout=5)
  98. data = json.loads(self.regStr(reg="getLiveListJsonpCallback\((.*)\)", src=r.text))
  99. vodList = data['data']['datas']
  100. pagecount = data['data']['totalPage']
  101. append = 'huya'
  102. imgnm = 'screenshot'
  103. vidnm = 'profileRoom'
  104. titlenm = 'introduction'
  105. remarknm = 'nick'
  106. else:
  107. vodList = []
  108. pagecount = page
  109. append = ''
  110. imgnm = ''
  111. vidnm = ''
  112. titlenm = ''
  113. remarknm = ''
  114. for vod in vodList:
  115. img = vod[imgnm]
  116. vid = vod[vidnm]
  117. title = vod[titlenm]
  118. remark = vod[remarknm]
  119. videos.append({
  120. "vod_id": title + '###' + append + '###' + str(vid),
  121. "vod_name": title,
  122. "vod_pic": img,
  123. "vod_remarks": remark
  124. })
  125. lenvodList = len(vodList)
  126. result['list'] = videos
  127. result['page'] = page
  128. result['pagecount'] = pagecount
  129. result['limit'] = lenvodList
  130. result['total'] = lenvodList
  131. return result
  132. def detailContent(self, did):
  133. did = did[0]
  134. header = self.header.copy()
  135. didList = did.split('###')
  136. title = didList[0]
  137. if didList[1] == 'bilibili':
  138. url = f'https://api.live.bilibili.com/room/v1/Room/playUrl?cid={didList[2]}&qn=20000&platform=h5'
  139. data = self.fetch(url, headers=header).json()
  140. platformList = ['B站']
  141. playurlList = [data['data']['quality_description'][0]['desc'] + '$' + data['data']['durl'][0]['url']]
  142. elif didList[1] == 'douyu':
  143. params = quote(json.dumps({"rid": didList[2]}))
  144. #url = f'https://api-lmteam.koyeb.app/live/douyu?params={params}'
  145. url = f'http://maomao.kandiantv.cn/douyu1.php?id={didList[2]}'
  146. platformList = ['斗鱼']
  147. playurlList = [f'直播${url}']
  148. elif didList[1] == 'huya':
  149. import html
  150. header['Content-Type'] = 'application/x-www-form-urlencoded'
  151. url = 'https://www.huya.com/' + didList[2]
  152. r = self.fetch(url, headers=header, timeout=5)
  153. try:
  154. data = json.loads(self.regStr(reg='stream: ([\s\S]*?)\n', src=r.text))
  155. except:
  156. data = json.loads(b64decode(self.regStr(reg='"stream": "([\s\S]*?)"', src=r.text)).decode())
  157. platformList = []
  158. playurlList = []
  159. i = 1
  160. for pL in data['data'][0]['gameStreamInfoList']:
  161. platformList.append('虎牙{}'.format(str(i)))
  162. baseurl = pL['sHlsUrl'] + '/' + pL['sStreamName'] + '.' + pL['sHlsUrlSuffix']
  163. srcAntiCode = html.unescape(pL['sHlsAntiCode'])
  164. c = srcAntiCode.split('&')
  165. c = [i for i in c if i != '']
  166. n = {i.split('=')[0]: i.split('=')[1] for i in c}
  167. fm = unquote(n['fm'])
  168. u = b64decode(fm).decode('utf-8')
  169. hash_prefix = u.split('_')[0]
  170. ctype = n.get('ctype', '')
  171. txyp = n.get('txyp', '')
  172. fs = n.get('fs', '')
  173. t = n.get('t', '')
  174. seqid = str(int(time.time() * 1e3 + 1463993859134))
  175. wsTime = hex(int(time.time()) + 3600).replace('0x', '')
  176. hash = hashlib.md5('_'.join([hash_prefix, '1463993859134', pL['sStreamName'], hashlib.md5((seqid + '|' + ctype + '|' + t).encode('utf-8')).hexdigest(), wsTime]).encode('utf-8')).hexdigest()
  177. ratio = ''
  178. purl = "{}?wsSecret={}&wsTime={}&seqid={}&ctype={}&ver=1&txyp={}&fs={}&ratio={}&u={}&t={}&sv=2107230339".format(baseurl, hash, wsTime, seqid, ctype, txyp, fs, ratio, '1463993859134', t)
  179. playurlList.append('直播$' + purl)
  180. i += 1
  181. else:
  182. playurlList = []
  183. platformList = []
  184. vod = {
  185. "vod_id": didList[2],
  186. "vod_name": title,
  187. }
  188. vod['vod_play_from'] = '$$$'.join(platformList)
  189. vod['vod_play_url'] = '$$$'.join(playurlList)
  190. result = {'list': [vod]}
  191. return result
  192. def searchContent(self, key, quick):
  193. return self.searchContentPage(key, False, '1')
  194. def searchContentPage(self, key, quick, page):
  195. items = []
  196. page = int(page)
  197. keyword = key
  198. if page == 1:
  199. siteList = ['bb', 'dy', 'hy']
  200. else:
  201. siteList = self.getCache('livesiteList_{}_{}'.format(keyword, page))
  202. self.delCache('livesiteList_{}_{}'.format(keyword, page))
  203. if not siteList:
  204. return {'list': items}
  205. contents = []
  206. with ThreadPoolExecutor(max_workers=3) as executor:
  207. searchList = []
  208. try:
  209. for site in siteList:
  210. tag = site
  211. api = ''
  212. future = executor.submit(self.runSearch, keyword, tag, page, api)
  213. searchList.append(future)
  214. for future in as_completed(searchList, timeout=30):
  215. contents.append(future.result())
  216. except:
  217. executor.shutdown(wait=False)
  218. nextpageList = []
  219. for content in contents:
  220. if content is None:
  221. continue
  222. key = list(content.keys())[0]
  223. infos = content[key]
  224. items = items + content[key][0]
  225. nextpageList.append(infos[1])
  226. if not infos[1]:
  227. siteList.remove(key)
  228. self.setCache('livesiteList_{}_{}'.format(keyword, page+1), siteList)
  229. result = {
  230. 'list': items
  231. }
  232. return result
  233. def runSearch(self, key, tag, page, api):
  234. try:
  235. defname = 'self.search' + tag
  236. result = eval(defname)(key, tag, page, api)
  237. return result
  238. except:
  239. pass
  240. def searchbb(self, key, tag, pg, api):
  241. items = []
  242. header = self.header.copy()
  243. header['Cookie'] = 'buvid3=0'
  244. url = f'https://api.bilibili.com/x/web-interface/search/type?page={pg}&page_size=10&order=online&search_type=live_user&keyword={key}'
  245. data = self.fetch(url, headers=header).json()
  246. vList = data['data']['result']
  247. for video in vList:
  248. if video['live_status'] == 0:
  249. continue
  250. title = self.removeHtmlTags(video['uname'])
  251. if SequenceMatcher(None, title, key).ratio() < 0.6 and key not in title:
  252. continue
  253. items.append({
  254. 'vod_id': '{}###bilibili###{}'.format(title, video['roomid']),
  255. 'vod_name': title,
  256. 'vod_pic': 'https:' + video['uface'],
  257. "vod_remarks": 'B站直播'
  258. })
  259. return {tag: [items, pg * 10 < len(items)]}
  260. def searchdy(self, key, tag, pg, api):
  261. items = []
  262. header = self.header.copy()
  263. url = f'https://www.douyu.com/japi/search/api/searchUser?kw={key}&page={pg}&pageSize=10&filterType=1'
  264. data = self.fetch(url, headers=header, timeout=5).json()
  265. vList = data['data']['relateUser']
  266. for video in vList:
  267. if video['anchorInfo']['isLive'] != 1:
  268. continue
  269. title = video['anchorInfo']['nickName']
  270. if SequenceMatcher(None, title, key).ratio() < 0.6 and key not in title:
  271. continue
  272. items.append({
  273. 'vod_id': '{}###douyu###{}'.format(title, video['anchorInfo']['rid']),
  274. 'vod_name': title,
  275. 'vod_pic': video['anchorInfo']['roomSrc'],
  276. "vod_remarks": '斗鱼直播'
  277. })
  278. return {tag: [items, pg * 10 < len(items)]}
  279. def searchhy(self, key, tag, pg, api):
  280. items = []
  281. header = self.header.copy()
  282. header['Cookie'] = 'buvid3=0'
  283. start = str((pg-1)*40)
  284. url = f'https://search.cdn.huya.com/?m=Search&do=getSearchContent&typ=-5&livestate=1&q={key}&start={start}&rows=40'
  285. r = self.fetch(url, headers=header)
  286. data = r.json()
  287. vList = data['response']['1']['docs']
  288. for video in vList:
  289. title = video['game_nick']
  290. if SequenceMatcher(None, title, key).ratio() < 0.6 and key not in title:
  291. continue
  292. items.append({
  293. 'vod_id': '{}###huya###{}'.format(title, video['room_id']),
  294. 'vod_name': title,
  295. 'vod_pic': video['game_avatarUrl180'],
  296. "vod_remarks": '虎牙直播'
  297. })
  298. return {tag: [items, pg * 40 < len(items)]}
  299. def playerContent(self, flag, pid, vipFlags):
  300. result = {}
  301. header = self.header.copy()
  302. # header['Referer'] = "https://www.bilibili.com"
  303. result["parse"] = 0
  304. result["playUrl"] = ''
  305. result["url"] = pid
  306. result["header"] = header
  307. return result
  308. def localProxy(self, param):
  309. return [200, "video/MP2T", ""]
  310. def removeHtmlTags(self, src):
  311. from re import sub, compile
  312. clean = compile('<.*?>')
  313. return sub(clean, '', src)
  314. header = {
  315. "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36"
  316. }