py_live.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356
  1. #coding=utf-8
  2. #!/usr/bin/python
  3. import sys
  4. import json
  5. import time
  6. import hashlib
  7. from base64 import b64decode
  8. from difflib import SequenceMatcher
  9. from urllib.parse import quote, unquote
  10. from concurrent.futures import ThreadPoolExecutor, as_completed
  11. sys.path.append('..')
  12. from base.spider import Spider
  13. class Spider(Spider): # 元类 默认的元类 type
  14. def getName(self):
  15. return "直播"
  16. def init(self, extend):
  17. try:
  18. self.extendDict = json.loads(extend)
  19. except:
  20. self.extendDict = {}
  21. def isVideoFormat(self, url):
  22. pass
  23. def manualVideoCheck(self):
  24. pass
  25. def homeVideoContent(self):
  26. result = {'list': []}
  27. return result
  28. def homeContent(self, filter):
  29. result = {}
  30. try:
  31. url = self.extendDict['url']
  32. data = self.fetch(url, headers=self.header, timeout=5).json()
  33. result['class'] = data['classes']
  34. if filter:
  35. result['filters'] = data['filter']
  36. except:
  37. result['class'] = [{"type_id": 'douyu', "type_name": "斗鱼"}]
  38. result['filters'] = {'douyu': {'key': '斗鱼', 'name': '斗鱼', "value": [{"n": "一起看", "v": "208"}]}}
  39. return result
  40. def categoryContent(self, cid, page, filter, ext):
  41. result = {}
  42. videos = []
  43. header = self.header.copy()
  44. if cid == 'bilibili':
  45. if 'B站' in ext:
  46. tid = ext['B站']
  47. else:
  48. try:
  49. r = self.fetch(json.loads(self.extendDict)['url'], headers=header, timeout=5)
  50. tid = r.json()['filter'][cid][0]['value'][0]['v']
  51. except:
  52. tid = '1'
  53. url = f'https://api.live.bilibili.com/xlive/web-interface/v1/second/getList?platform=web&parent_area_id={tid}&page={page}'
  54. data = self.fetch(url, headers=header, timeout=5).json()
  55. vodList = data['data']['list']
  56. append = 'bilibili'
  57. imgnm = 'cover'
  58. vidnm = 'roomid'
  59. titlenm = 'title'
  60. remarknm = 'uname'
  61. if data['data']['has_more'] == 1:
  62. pagecount = page + 1
  63. else:
  64. pagecount = page
  65. elif cid == 'douyu':
  66. if '斗鱼' in ext:
  67. tid = ext['斗鱼']
  68. else:
  69. try:
  70. r = self.fetch(json.loads(self.extend)['url'], headers=header)
  71. tid = r.json()['filter'][cid][0]['value'][0]['v']
  72. except:
  73. tid = '208'
  74. url = f'https://www.douyu.com/gapi/rkc/directory/mixList/2_{tid}/{page}'
  75. r = self.fetch(url, headers=header, timeout=5)
  76. data = r.json()
  77. vodList = data['data']['rl']
  78. pagecount = data['data']['pgcnt']
  79. append = 'douyu'
  80. imgnm = 'rs1'
  81. vidnm = 'rid'
  82. titlenm = 'rn'
  83. remarknm = 'nn'
  84. elif cid == 'huya':
  85. if '虎牙' in ext:
  86. tid = ext['虎牙']
  87. else:
  88. try:
  89. r = self.fetch(json.loads(self.extend)['url'], headers=header)
  90. tid = r.json()['filter'][cid][0]['value'][0]['v']
  91. except:
  92. tid = '2135'
  93. header['Referer'] = 'https://www.huya.com/'
  94. url = f'https://www.huya.com/cache.php?m=LiveList&do=getLiveListByPage&gameId={tid}&tagAll=0&callback=getLiveListJsonpCallback&page={page}'
  95. r = self.fetch(url, headers=header, timeout=5)
  96. data = json.loads(self.regStr(reg="getLiveListJsonpCallback\((.*)\)", src=r.text))
  97. vodList = data['data']['datas']
  98. pagecount = data['data']['totalPage']
  99. append = 'huya'
  100. imgnm = 'screenshot'
  101. vidnm = 'profileRoom'
  102. titlenm = 'introduction'
  103. remarknm = 'nick'
  104. else:
  105. vodList = []
  106. pagecount = page
  107. append = ''
  108. imgnm = ''
  109. vidnm = ''
  110. titlenm = ''
  111. remarknm = ''
  112. for vod in vodList:
  113. img = vod[imgnm]
  114. vid = vod[vidnm]
  115. title = vod[titlenm]
  116. remark = vod[remarknm]
  117. videos.append({
  118. "vod_id": title + '###' + append + '###' + str(vid),
  119. "vod_name": title,
  120. "vod_pic": img,
  121. "vod_remarks": remark
  122. })
  123. lenvodList = len(vodList)
  124. result['list'] = videos
  125. result['page'] = page
  126. result['pagecount'] = pagecount
  127. result['limit'] = lenvodList
  128. result['total'] = lenvodList
  129. return result
  130. def detailContent(self, did):
  131. did = did[0]
  132. header = self.header.copy()
  133. didList = did.split('###')
  134. title = didList[0]
  135. if didList[1] == 'bilibili':
  136. url = f'https://api.live.bilibili.com/room/v1/Room/playUrl?cid={didList[2]}&qn=20000&platform=h5'
  137. data = self.fetch(url, headers=header).json()
  138. platformList = ['B站直播']
  139. playurlList = [data['data']['quality_description'][0]['desc'] + '$' + data['data']['durl'][0]['url']]
  140. elif didList[1] == 'douyu':
  141. params = quote(json.dumps({"rid": didList[2]}))
  142. url = f'https://api-lmteam.koyeb.app/live/douyu?params={params}'
  143. platformList = ['斗鱼直播']
  144. playurlList = [f'斗鱼直播${url}']
  145. elif didList[1] == 'huya':
  146. import html
  147. header['Content-Type'] = 'application/x-www-form-urlencoded'
  148. url = 'https://www.huya.com/' + didList[2]
  149. r = self.fetch(url, headers=header, timeout=5)
  150. try:
  151. data = json.loads(self.regStr(reg='stream: ([\s\S]*?)\n', src=r.text))
  152. except:
  153. data = json.loads(b64decode(self.regStr(reg='"stream": "([\s\S]*?)"', src=r.text)).decode())
  154. platformList = []
  155. playurlList = []
  156. i = 1
  157. for pL in data['data'][0]['gameStreamInfoList']:
  158. platformList.append('线路{}'.format(str(i)))
  159. baseurl = pL['sHlsUrl'] + '/' + pL['sStreamName'] + '.' + pL['sHlsUrlSuffix']
  160. srcAntiCode = html.unescape(pL['sHlsAntiCode'])
  161. c = srcAntiCode.split('&')
  162. c = [i for i in c if i != '']
  163. n = {i.split('=')[0]: i.split('=')[1] for i in c}
  164. fm = unquote(n['fm'])
  165. u = b64decode(fm).decode('utf-8')
  166. hash_prefix = u.split('_')[0]
  167. ctype = n.get('ctype', '')
  168. txyp = n.get('txyp', '')
  169. fs = n.get('fs', '')
  170. t = n.get('t', '')
  171. seqid = str(int(time.time() * 1e3 + 1463993859134))
  172. wsTime = hex(int(time.time()) + 3600).replace('0x', '')
  173. hash = hashlib.md5('_'.join([hash_prefix, '1463993859134', pL['sStreamName'], hashlib.md5((seqid + '|' + ctype + '|' + t).encode('utf-8')).hexdigest(), wsTime]).encode('utf-8')).hexdigest()
  174. ratio = ''
  175. purl = "{}?wsSecret={}&wsTime={}&seqid={}&ctype={}&ver=1&txyp={}&fs={}&ratio={}&u={}&t={}&sv=2107230339".format(baseurl, hash, wsTime, seqid, ctype, txyp, fs, ratio, '1463993859134', t)
  176. playurlList.append('直播$' + purl)
  177. i += 1
  178. else:
  179. playurlList = []
  180. platformList = []
  181. vod = {
  182. "vod_id": didList[2],
  183. "vod_name": title,
  184. }
  185. vod['vod_play_from'] = '$$$'.join(platformList)
  186. vod['vod_play_url'] = '$$$'.join(playurlList)
  187. result = {'list': [vod]}
  188. return result
  189. def searchContent(self, key, quick):
  190. return self.searchContentPage(key, False, '1')
  191. def searchContentPage(self, key, quick, page):
  192. items = []
  193. page = int(page)
  194. keyword = key
  195. if page == 1:
  196. siteList = ['bb', 'dy', 'hy']
  197. else:
  198. siteList = self.getCache('livesiteList_{}_{}'.format(keyword, page))
  199. self.delCache('livesiteList_{}_{}'.format(keyword, page))
  200. if not siteList:
  201. return {'list': items}
  202. contents = []
  203. with ThreadPoolExecutor(max_workers=3) as executor:
  204. searchList = []
  205. try:
  206. for site in siteList:
  207. tag = site
  208. api = ''
  209. future = executor.submit(self.runSearch, keyword, tag, page, api)
  210. searchList.append(future)
  211. for future in as_completed(searchList, timeout=30):
  212. contents.append(future.result())
  213. except:
  214. executor.shutdown(wait=False)
  215. nextpageList = []
  216. for content in contents:
  217. if content is None:
  218. continue
  219. key = list(content.keys())[0]
  220. infos = content[key]
  221. items = items + content[key][0]
  222. nextpageList.append(infos[1])
  223. if not infos[1]:
  224. siteList.remove(key)
  225. self.setCache('livesiteList_{}_{}'.format(keyword, page+1), siteList)
  226. result = {
  227. 'list': items
  228. }
  229. return result
  230. def runSearch(self, key, tag, page, api):
  231. try:
  232. defname = 'self.search' + tag
  233. result = eval(defname)(key, tag, page, api)
  234. return result
  235. except:
  236. pass
  237. def searchbb(self, key, tag, pg, api):
  238. items = []
  239. header = self.header.copy()
  240. header['Cookie'] = 'buvid3=0'
  241. url = f'https://api.bilibili.com/x/web-interface/search/type?page={pg}&page_size=10&order=online&search_type=live_user&keyword={key}'
  242. data = self.fetch(url, headers=header).json()
  243. vList = data['data']['result']
  244. for video in vList:
  245. if video['live_status'] == 0:
  246. continue
  247. title = self.removeHtmlTags(video['uname'])
  248. if SequenceMatcher(None, title, key).ratio() < 0.6 and key not in title:
  249. continue
  250. items.append({
  251. 'vod_id': '{}###bilibili###{}'.format(title, video['roomid']),
  252. 'vod_name': title,
  253. 'vod_pic': 'https:' + video['uface'],
  254. "vod_remarks": 'B站直播'
  255. })
  256. return {tag: [items, pg * 10 < len(items)]}
  257. def searchdy(self, key, tag, pg, api):
  258. items = []
  259. header = self.header.copy()
  260. url = f'https://www.douyu.com/japi/search/api/searchUser?kw={key}&page={pg}&pageSize=10&filterType=1'
  261. data = self.fetch(url, headers=header, timeout=5).json()
  262. vList = data['data']['relateUser']
  263. for video in vList:
  264. if video['anchorInfo']['isLive'] != 1:
  265. continue
  266. title = video['anchorInfo']['nickName']
  267. if SequenceMatcher(None, title, key).ratio() < 0.6 and key not in title:
  268. continue
  269. items.append({
  270. 'vod_id': '{}###douyin###{}'.format(title, video['anchorInfo']['rid']),
  271. 'vod_name': title,
  272. 'vod_pic': video['anchorInfo']['roomSrc'],
  273. "vod_remarks": '斗鱼直播'
  274. })
  275. return {tag: [items, pg * 10 < len(items)]}
  276. def searchhy(self, key, tag, pg, api):
  277. items = []
  278. header = self.header.copy()
  279. header['Cookie'] = 'buvid3=0'
  280. start = str((pg-1)*40)
  281. url = f'https://search.cdn.huya.com/?m=Search&do=getSearchContent&typ=-5&livestate=1&q={key}&start={start}&rows=40'
  282. r = self.fetch(url, headers=header)
  283. data = r.json()
  284. vList = data['response']['1']['docs']
  285. for video in vList:
  286. title = video['game_nick']
  287. if SequenceMatcher(None, title, key).ratio() < 0.6 and key not in title:
  288. continue
  289. items.append({
  290. 'vod_id': '{}###huya###{}'.format(title, video['room_id']),
  291. 'vod_name': title,
  292. 'vod_pic': video['game_avatarUrl180'],
  293. "vod_remarks": '虎牙直播'
  294. })
  295. return {tag: [items, pg * 40 < len(items)]}
  296. def playerContent(self, flag, pid, vipFlags):
  297. result = {}
  298. header = self.header.copy()
  299. result["parse"] = 0
  300. result["playUrl"] = ''
  301. result["url"] = pid
  302. result["header"] = header
  303. return result, 14400
  304. def localProxy(self, param):
  305. return [200, "video/MP2T", {}, ""]
  306. def removeHtmlTags(self, src):
  307. from re import sub, compile
  308. clean = compile('<.*?>')
  309. return sub(clean, '', src)
  310. def getCache(self, key):
  311. value = self.fetch(f'http://127.0.0.1:9978/cache?do=get&key={key}', timeout=5).text
  312. if len(value) > 0:
  313. if value.startswith('{') and value.endswith('}') or value.startswith('[') and value.endswith(']'):
  314. value = json.loads(value)
  315. if type(value) == dict:
  316. if not 'expiresAt' in value or value['expiresAt'] >= int(time.time()):
  317. return value
  318. else:
  319. self.delCache(key)
  320. return None
  321. return value
  322. else:
  323. return None
  324. def setCache(self, key, value):
  325. if len(value) > 0:
  326. if type(value) == dict or type(value) == list:
  327. value = json.dumps(value, ensure_ascii=False)
  328. self.post(f'http://127.0.0.1:9978/cache?do=set&key={key}', data={"value": value}, timeout=5)
  329. def delCache(self, key):
  330. self.fetch(f'http://127.0.0.1:9978/cache?do=del&key={key}', timeout=5)
  331. header = {
  332. "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36"
  333. }