live.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. #coding=utf-8
  2. #!/usr/bin/python
  3. import sys
  4. import json
  5. import time
  6. import hashlib
  7. from base64 import b64decode
  8. from difflib import SequenceMatcher
  9. from urllib.parse import quote, unquote
  10. from concurrent.futures import ThreadPoolExecutor, as_completed
  11. sys.path.append('..')
  12. from base.spider import Spider
  13. class Spider(Spider): # 元类 默认的元类 type
  14. def getName(self):
  15. return "直播"
  16. def init(self, extend):
  17. try:
  18. self.extendDict = json.loads(extend)
  19. except:
  20. self.extendDict = {}
  21. def isVideoFormat(self, url):
  22. pass
  23. def manualVideoCheck(self):
  24. pass
  25. def homeVideoContent(self):
  26. result = {}
  27. return result
  28. def homeContent(self, filter):
  29. result = {}
  30. try:
  31. url = self.extendDict['url']
  32. data = self.fetch(url, headers=self.header, timeout=5).json()
  33. result['class'] = data['classes']
  34. if filter:
  35. result['filters'] = data['filter']
  36. except:
  37. result['class'] = [{"type_id": 'douyu', "type_name": "斗鱼"}]
  38. result['filters'] = {'douyu': {'key': '斗鱼', 'name': '斗鱼', "value": [{"n": "一起看", "v": "208"}]}}
  39. return result
  40. def categoryContent(self, cid, page, filter, ext):
  41. result = {}
  42. videos = []
  43. header = self.header.copy()
  44. if cid == 'bilibili':
  45. if 'B站' in ext:
  46. tid = ext['B站']
  47. else:
  48. try:
  49. r = self.fetch(json.loads(self.extendDict)['url'], headers=header, timeout=5)
  50. tid = r.json()['filter'][cid][0]['value'][0]['v']
  51. except:
  52. tid = '1'
  53. url = f'https://api.live.bilibili.com/xlive/web-interface/v1/second/getList?platform=web&parent_area_id={tid}&page={page}'
  54. data = self.fetch(url, headers=header, timeout=5).json()
  55. vodList = data['data']['list']
  56. append = 'bilibili'
  57. imgnm = 'cover'
  58. vidnm = 'roomid'
  59. titlenm = 'title'
  60. remarknm = 'uname'
  61. if data['data']['has_more'] == 1:
  62. pagecount = page + 1
  63. else:
  64. pagecount = page
  65. elif cid == 'douyu':
  66. if '斗鱼' in ext:
  67. tid = ext['斗鱼']
  68. else:
  69. try:
  70. r = self.fetch(json.loads(self.extend)['url'], headers=header)
  71. tid = r.json()['filter'][cid][0]['value'][0]['v']
  72. except:
  73. tid = '208'
  74. url = f'https://www.douyu.com/gapi/rkc/directory/mixList/2_{tid}/{page}'
  75. r = self.fetch(url, headers=header, timeout=5)
  76. data = r.json()
  77. vodList = data['data']['rl']
  78. pagecount = data['data']['pgcnt']
  79. append = 'douyu'
  80. imgnm = 'rs1'
  81. vidnm = 'rid'
  82. titlenm = 'rn'
  83. remarknm = 'nn'
  84. elif cid == 'huya':
  85. if '虎牙' in ext:
  86. tid = ext['虎牙']
  87. else:
  88. try:
  89. r = self.fetch(json.loads(self.extend)['url'], headers=header)
  90. tid = r.json()['filter'][cid][0]['value'][0]['v']
  91. except:
  92. tid = '2135'
  93. header['Referer'] = 'https://www.huya.com/'
  94. url = f'https://www.huya.com/cache.php?m=LiveList&do=getLiveListByPage&gameId={tid}&tagAll=0&callback=getLiveListJsonpCallback&page={page}'
  95. r = self.fetch(url, headers=header, timeout=5)
  96. data = json.loads(self.regStr(reg="getLiveListJsonpCallback\((.*)\)", src=r.text))
  97. vodList = data['data']['datas']
  98. pagecount = data['data']['totalPage']
  99. append = 'huya'
  100. imgnm = 'screenshot'
  101. vidnm = 'profileRoom'
  102. titlenm = 'introduction'
  103. remarknm = 'nick'
  104. else:
  105. vodList = []
  106. pagecount = page
  107. append = ''
  108. imgnm = ''
  109. vidnm = ''
  110. titlenm = ''
  111. remarknm = ''
  112. for vod in vodList:
  113. img = vod[imgnm]
  114. vid = vod[vidnm]
  115. title = vod[titlenm]
  116. remark = vod[remarknm]
  117. videos.append({
  118. "vod_id": title + '###' + append + '###' + str(vid),
  119. "vod_name": title,
  120. "vod_pic": img,
  121. "vod_remarks": remark
  122. })
  123. lenvodList = len(vodList)
  124. result['list'] = videos
  125. result['page'] = page
  126. result['pagecount'] = pagecount
  127. result['limit'] = lenvodList
  128. result['total'] = lenvodList
  129. return result
  130. def detailContent(self, did):
  131. did = did[0]
  132. header = self.header.copy()
  133. didList = did.split('###')
  134. title = didList[0]
  135. if didList[1] == 'bilibili':
  136. url = f'https://api.live.bilibili.com/room/v1/Room/playUrl?cid={didList[2]}&qn=20000&platform=h5'
  137. data = self.fetch(url, headers=header).json()
  138. platformList = ['B站']
  139. playurlList = [data['data']['quality_description'][0]['desc'] + '$' + data['data']['durl'][0]['url']]
  140. elif didList[1] == 'douyu':
  141. params = quote(json.dumps({"rid": didList[2]}))
  142. #url = f'https://api-lmteam.koyeb.app/live/douyu?params={params}'
  143. url = f'http://maomao.kandiantv.cn/douyu1.php?id={didList[2]}'
  144. platformList = ['斗鱼']
  145. playurlList = [f'直播${url}']
  146. elif didList[1] == 'huya':
  147. import html
  148. header['Content-Type'] = 'application/x-www-form-urlencoded'
  149. url = 'https://www.huya.com/' + didList[2]
  150. r = self.fetch(url, headers=header, timeout=5)
  151. try:
  152. data = json.loads(self.regStr(reg='stream: ([\s\S]*?)\n', src=r.text))
  153. except:
  154. data = json.loads(b64decode(self.regStr(reg='"stream": "([\s\S]*?)"', src=r.text)).decode())
  155. platformList = []
  156. playurlList = []
  157. i = 1
  158. for pL in data['data'][0]['gameStreamInfoList']:
  159. platformList.append('虎牙{}'.format(str(i)))
  160. baseurl = pL['sHlsUrl'] + '/' + pL['sStreamName'] + '.' + pL['sHlsUrlSuffix']
  161. srcAntiCode = html.unescape(pL['sHlsAntiCode'])
  162. c = srcAntiCode.split('&')
  163. c = [i for i in c if i != '']
  164. n = {i.split('=')[0]: i.split('=')[1] for i in c}
  165. fm = unquote(n['fm'])
  166. u = b64decode(fm).decode('utf-8')
  167. hash_prefix = u.split('_')[0]
  168. ctype = n.get('ctype', '')
  169. txyp = n.get('txyp', '')
  170. fs = n.get('fs', '')
  171. t = n.get('t', '')
  172. seqid = str(int(time.time() * 1e3 + 1463993859134))
  173. wsTime = hex(int(time.time()) + 3600).replace('0x', '')
  174. hash = hashlib.md5('_'.join([hash_prefix, '1463993859134', pL['sStreamName'], hashlib.md5((seqid + '|' + ctype + '|' + t).encode('utf-8')).hexdigest(), wsTime]).encode('utf-8')).hexdigest()
  175. ratio = ''
  176. purl = "{}?wsSecret={}&wsTime={}&seqid={}&ctype={}&ver=1&txyp={}&fs={}&ratio={}&u={}&t={}&sv=2107230339".format(baseurl, hash, wsTime, seqid, ctype, txyp, fs, ratio, '1463993859134', t)
  177. playurlList.append('直播$' + purl)
  178. i += 1
  179. else:
  180. playurlList = []
  181. platformList = []
  182. vod = {
  183. "vod_id": didList[2],
  184. "vod_name": title,
  185. }
  186. vod['vod_play_from'] = '$$$'.join(platformList)
  187. vod['vod_play_url'] = '$$$'.join(playurlList)
  188. result = {'list': [vod]}
  189. return result
  190. def searchContent(self, key, quick):
  191. return self.searchContentPage(key, False, '1')
  192. def searchContentPage(self, key, quick, page):
  193. items = []
  194. page = int(page)
  195. keyword = key
  196. if page == 1:
  197. siteList = ['bb', 'dy', 'hy']
  198. else:
  199. siteList = self.getCache('livesiteList_{}_{}'.format(keyword, page))
  200. self.delCache('livesiteList_{}_{}'.format(keyword, page))
  201. if not siteList:
  202. return {'list': items}
  203. contents = []
  204. with ThreadPoolExecutor(max_workers=3) as executor:
  205. searchList = []
  206. try:
  207. for site in siteList:
  208. tag = site
  209. api = ''
  210. future = executor.submit(self.runSearch, keyword, tag, page, api)
  211. searchList.append(future)
  212. for future in as_completed(searchList, timeout=30):
  213. contents.append(future.result())
  214. except:
  215. executor.shutdown(wait=False)
  216. nextpageList = []
  217. for content in contents:
  218. if content is None:
  219. continue
  220. key = list(content.keys())[0]
  221. infos = content[key]
  222. items = items + content[key][0]
  223. nextpageList.append(infos[1])
  224. if not infos[1]:
  225. siteList.remove(key)
  226. self.setCache('livesiteList_{}_{}'.format(keyword, page+1), siteList)
  227. result = {
  228. 'list': items
  229. }
  230. return result
  231. def runSearch(self, key, tag, page, api):
  232. try:
  233. defname = 'self.search' + tag
  234. result = eval(defname)(key, tag, page, api)
  235. return result
  236. except:
  237. pass
  238. def searchbb(self, key, tag, pg, api):
  239. items = []
  240. header = self.header.copy()
  241. header['Cookie'] = 'buvid3=0'
  242. url = f'https://api.bilibili.com/x/web-interface/search/type?page={pg}&page_size=10&order=online&search_type=live_user&keyword={key}'
  243. data = self.fetch(url, headers=header).json()
  244. vList = data['data']['result']
  245. for video in vList:
  246. if video['live_status'] == 0:
  247. continue
  248. title = self.removeHtmlTags(video['uname'])
  249. if SequenceMatcher(None, title, key).ratio() < 0.6 and key not in title:
  250. continue
  251. items.append({
  252. 'vod_id': '{}###bilibili###{}'.format(title, video['roomid']),
  253. 'vod_name': title,
  254. 'vod_pic': 'https:' + video['uface'],
  255. "vod_remarks": 'B站直播'
  256. })
  257. return {tag: [items, pg * 10 < len(items)]}
  258. def searchdy(self, key, tag, pg, api):
  259. items = []
  260. header = self.header.copy()
  261. url = f'https://www.douyu.com/japi/search/api/searchUser?kw={key}&page={pg}&pageSize=10&filterType=1'
  262. data = self.fetch(url, headers=header, timeout=5).json()
  263. vList = data['data']['relateUser']
  264. for video in vList:
  265. if video['anchorInfo']['isLive'] != 1:
  266. continue
  267. title = video['anchorInfo']['nickName']
  268. if SequenceMatcher(None, title, key).ratio() < 0.6 and key not in title:
  269. continue
  270. items.append({
  271. 'vod_id': '{}###douyu###{}'.format(title, video['anchorInfo']['rid']),
  272. 'vod_name': title,
  273. 'vod_pic': video['anchorInfo']['roomSrc'],
  274. "vod_remarks": '斗鱼直播'
  275. })
  276. return {tag: [items, pg * 10 < len(items)]}
  277. def searchhy(self, key, tag, pg, api):
  278. items = []
  279. header = self.header.copy()
  280. header['Cookie'] = 'buvid3=0'
  281. start = str((pg-1)*40)
  282. url = f'https://search.cdn.huya.com/?m=Search&do=getSearchContent&typ=-5&livestate=1&q={key}&start={start}&rows=40'
  283. r = self.fetch(url, headers=header)
  284. data = r.json()
  285. vList = data['response']['1']['docs']
  286. for video in vList:
  287. title = video['game_nick']
  288. if SequenceMatcher(None, title, key).ratio() < 0.6 and key not in title:
  289. continue
  290. items.append({
  291. 'vod_id': '{}###huya###{}'.format(title, video['room_id']),
  292. 'vod_name': title,
  293. 'vod_pic': video['game_avatarUrl180'],
  294. "vod_remarks": '虎牙直播'
  295. })
  296. return {tag: [items, pg * 40 < len(items)]}
  297. def playerContent(self, flag, pid, vipFlags):
  298. result = {}
  299. header = self.header.copy()
  300. # header['Referer'] = "https://www.bilibili.com"
  301. result["parse"] = 0
  302. result["playUrl"] = ''
  303. result["url"] = pid
  304. result["header"] = header
  305. return result
  306. def localProxy(self, param):
  307. return [200, "video/MP2T", {}, ""]
  308. def removeHtmlTags(self, src):
  309. from re import sub, compile
  310. clean = compile('<.*?>')
  311. return sub(clean, '', src)
  312. def getCache(self, key):
  313. value = self.fetch(f'http://127.0.0.1:9978/cache?do=get&key={key}', timeout=5).text
  314. if len(value) > 0:
  315. if value.startswith('{') and value.endswith('}') or value.startswith('[') and value.endswith(']'):
  316. value = json.loads(value)
  317. if type(value) == dict:
  318. if not 'expiresAt' in value or value['expiresAt'] >= int(time.time()):
  319. return value
  320. else:
  321. self.delCache(key)
  322. return None
  323. return value
  324. else:
  325. return None
  326. def setCache(self, key, value):
  327. if len(value) > 0:
  328. if type(value) == dict or type(value) == list:
  329. value = json.dumps(value, ensure_ascii=False)
  330. self.post(f'http://127.0.0.1:9978/cache?do=set&key={key}', data={"value": value}, timeout=5)
  331. def delCache(self, key):
  332. self.fetch(f'http://127.0.0.1:9978/cache?do=del&key={key}', timeout=5)
  333. header = {
  334. "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36"
  335. }