腾.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
  1. # -*- coding: utf-8 -*-
  2. # by @嗷呜
  3. import json
  4. import sys
  5. import uuid
  6. import copy
  7. sys.path.append('..')
  8. from base.spider import Spider
  9. from concurrent.futures import ThreadPoolExecutor, as_completed
  10. class Spider(Spider):
  11. def init(self, extend=""):
  12. self.dbody = {
  13. "page_params": {
  14. "channel_id": "",
  15. "filter_params": "sort=75",
  16. "page_type": "channel_operation",
  17. "page_id": "channel_list_second_page"
  18. }
  19. }
  20. self.body = self.dbody
  21. pass
  22. def getName(self):
  23. pass
  24. def isVideoFormat(self, url):
  25. pass
  26. def manualVideoCheck(self):
  27. pass
  28. def destroy(self):
  29. pass
  30. host = 'https://v.qq.com'
  31. apihost = 'https://pbaccess.video.qq.com'
  32. headers = {
  33. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.5410.0 Safari/537.36',
  34. 'origin': host,
  35. 'referer': f'{host}/'
  36. }
  37. def homeContent(self, filter):
  38. cdata = {
  39. "电视剧": "100113",
  40. "电影": "100173",
  41. "综艺": "100109",
  42. "纪录片": "100105",
  43. "动漫": "100119",
  44. "少儿": "100150",
  45. "短剧": "110755"
  46. }
  47. result = {}
  48. classes = []
  49. filters = {}
  50. for k in cdata:
  51. classes.append({
  52. 'type_name': k,
  53. 'type_id': cdata[k]
  54. })
  55. with ThreadPoolExecutor(max_workers=len(classes)) as executor:
  56. futures = [executor.submit(self.get_filter_data, item['type_id']) for item in classes]
  57. for future in futures:
  58. cid, data = future.result()
  59. if not data.get('data', {}).get('module_list_datas'):
  60. continue
  61. filter_dict = {}
  62. try:
  63. items = data['data']['module_list_datas'][-1]['module_datas'][-1]['item_data_lists']['item_datas']
  64. for item in items:
  65. if not item.get('item_params', {}).get('index_item_key'):
  66. continue
  67. params = item['item_params']
  68. filter_key = params['index_item_key']
  69. if filter_key not in filter_dict:
  70. filter_dict[filter_key] = {
  71. 'key': filter_key,
  72. 'name': params['index_name'],
  73. 'value': []
  74. }
  75. filter_dict[filter_key]['value'].append({
  76. 'n': params['option_name'],
  77. 'v': params['option_value']
  78. })
  79. except (IndexError, KeyError):
  80. continue
  81. filters[cid] = list(filter_dict.values())
  82. result['class'] = classes
  83. result['filters'] = filters
  84. return result
  85. def homeVideoContent(self):
  86. json_data = {'page_context':None,'page_params':{'page_id':'100101','page_type':'channel','skip_privacy_types':'0','support_click_scan':'1','new_mark_label_enabled':'1','ams_cookies':'',},'page_bypass_params':{'params':{'caller_id':'','data_mode':'default','page_id':'','page_type':'channel','platform_id':'2','user_mode':'default',},'scene':'channel','abtest_bypass_id':'',}}
  87. data = self.post(f'{self.apihost}/trpc.vector_layout.page_view.PageService/getPage',headers=self.headers, json=json_data).json()
  88. vlist = []
  89. for it in data['data']['CardList'][0]['children_list']['list']['cards']:
  90. if it.get('params'):
  91. p = it['params']
  92. tag = json.loads(p.get('uni_imgtag', '{}') or p.get('imgtag', '{}') or '{}')
  93. id = it.get('id') or p.get('cid')
  94. name = p.get('mz_title') or p.get('title')
  95. if name and 'http' not in id:
  96. vlist.append({
  97. 'vod_id': id,
  98. 'vod_name': name,
  99. 'vod_pic': p.get('image_url'),
  100. 'vod_year': tag.get('tag_2', {}).get('text'),
  101. 'vod_remarks': tag.get('tag_4', {}).get('text')
  102. })
  103. return {'list': vlist}
  104. def categoryContent(self, tid, pg, filter, extend):
  105. result = {}
  106. params = {
  107. "sort": extend.get('sort', '75'),
  108. "attr": extend.get('attr', '-1'),
  109. "itype": extend.get('itype', '-1'),
  110. "ipay": extend.get('ipay', '-1'),
  111. "iarea": extend.get('iarea', '-1'),
  112. "iyear": extend.get('iyear', '-1'),
  113. "theater": extend.get('theater', '-1'),
  114. "award": extend.get('award', '-1'),
  115. "recommend": extend.get('recommend', '-1')
  116. }
  117. if pg == '1':
  118. self.body = self.dbody.copy()
  119. self.body['page_params']['channel_id'] = tid
  120. self.body['page_params']['filter_params'] = self.josn_to_params(params)
  121. data = self.post(
  122. f'{self.apihost}/trpc.universal_backend_service.page_server_rpc.PageServer/GetPageData?video_appid=1000005&vplatform=2&vversion_name=8.9.10&new_mark_label_enabled=1',
  123. json=self.body, headers=self.headers).json()
  124. ndata = data['data']
  125. if ndata['has_next_page']:
  126. result['pagecount'] = 9999
  127. self.body['page_context'] = ndata['next_page_context']
  128. else:
  129. result['pagecount'] = int(pg)
  130. vlist = []
  131. for its in ndata['module_list_datas'][-1]['module_datas'][-1]['item_data_lists']['item_datas']:
  132. id = its.get('item_params', {}).get('cid')
  133. if id:
  134. p = its['item_params']
  135. tag = json.loads(p.get('uni_imgtag', '{}') or p.get('imgtag', '{}') or '{}')
  136. name = p.get('mz_title') or p.get('title')
  137. pic = p.get('new_pic_hz') or p.get('new_pic_vt')
  138. vlist.append({
  139. 'vod_id': id,
  140. 'vod_name': name,
  141. 'vod_pic': pic,
  142. 'vod_year': tag.get('tag_2', {}).get('text'),
  143. 'vod_remarks': tag.get('tag_4', {}).get('text')
  144. })
  145. result['list'] = vlist
  146. result['page'] = pg
  147. result['limit'] = 90
  148. result['total'] = 999999
  149. return result
  150. def detailContent(self, ids):
  151. vbody = {"page_params":{"req_from":"web","cid":ids[0],"vid":"","lid":"","page_type":"detail_operation","page_id":"detail_page_introduction"},"has_cache":1}
  152. body = {"page_params":{"req_from":"web_vsite","page_id":"vsite_episode_list","page_type":"detail_operation","id_type":"1","page_size":"","cid":ids[0],"vid":"","lid":"","page_num":"","page_context":"","detail_page_type":"1"},"has_cache":1}
  153. with ThreadPoolExecutor(max_workers=2) as executor:
  154. future_detail = executor.submit(self.get_vdata, vbody)
  155. future_episodes = executor.submit(self.get_vdata, body)
  156. vdata = future_detail.result()
  157. data = future_episodes.result()
  158. pdata = self.process_tabs(data, body, ids)
  159. if not pdata:
  160. return self.handle_exception(None, "No pdata available")
  161. try:
  162. star_list = vdata['data']['module_list_datas'][0]['module_datas'][0]['item_data_lists']['item_datas'][
  163. 0].get('sub_items', {}).get('star_list', {}).get('item_datas', [])
  164. actors = [star['item_params']['name'] for star in star_list]
  165. names = ['腾讯视频', '预告片']
  166. plist, ylist = self.process_pdata(pdata, ids)
  167. if not plist:
  168. del names[0]
  169. if not ylist:
  170. del names[1]
  171. vod = self.build_vod(vdata, actors, plist, ylist, names)
  172. return {'list': [vod]}
  173. except Exception as e:
  174. return self.handle_exception(e, "Error processing detail")
  175. def searchContent(self, key, quick, pg="1"):
  176. headers = self.headers.copy()
  177. headers.update({'Content-Type': 'application/json'})
  178. body = {'version':'25021101','clientType':1,'filterValue':'','uuid':str(uuid.uuid4()),'retry':0,'query':key,'pagenum':int(pg)-1,'pagesize':30,'queryFrom':0,'searchDatakey':'','transInfo':'','isneedQc':True,'preQid':'','adClientInfo':'','extraInfo':{'isNewMarkLabel':'1','multi_terminal_pc':'1','themeType':'1',},}
  179. data = self.post(f'{self.apihost}/trpc.videosearch.mobile_search.MultiTerminalSearch/MbSearch?vplatform=2',
  180. json=body, headers=headers).json()
  181. vlist = []
  182. vname=["电视剧", "电影", "综艺", "纪录片", "动漫", "少儿", "短剧"]
  183. v=data['data']['normalList']['itemList']
  184. d=data['data']['areaBoxList'][0]['itemList']
  185. q=v+d
  186. if v[0].get('doc') and v[0]['doc'].get('id') =='MainNeed':q=d+v
  187. for k in q:
  188. if k.get('doc') and k.get('videoInfo') and k['doc'].get('id') and '外站' not in k['videoInfo'].get('subTitle') and k['videoInfo'].get('title') and k['videoInfo'].get('typeName') in vname:
  189. img_tag = k.get('videoInfo', {}).get('imgTag')
  190. if img_tag is not None and isinstance(img_tag, str):
  191. try:
  192. tag = json.loads(img_tag)
  193. except json.JSONDecodeError as e:
  194. tag = {}
  195. else:
  196. tag = {}
  197. pic = k.get('videoInfo', {}).get('imgUrl')
  198. vlist.append({
  199. 'vod_id': k['doc']['id'],
  200. 'vod_name': self.removeHtmlTags(k['videoInfo']['title']),
  201. 'vod_pic': pic,
  202. 'vod_year': k['videoInfo'].get('typeName') +' '+ tag.get('tag_2', {}).get('text', ''),
  203. 'vod_remarks': tag.get('tag_4', {}).get('text', '')
  204. })
  205. return {'list': vlist, 'page': pg}
  206. def playerContent(self, flag, id, vipFlags):
  207. ids = id.split('@')
  208. url = f"{self.host}/x/cover/{ids[0]}/{ids[1]}.html"
  209. return {'jx':1,'parse': 1, 'url': url, 'header': ''}
  210. def localProxy(self, param):
  211. pass
  212. def get_filter_data(self, cid):
  213. hbody = self.dbody.copy()
  214. hbody['page_params']['channel_id'] = cid
  215. data = self.post(
  216. f'{self.apihost}/trpc.universal_backend_service.page_server_rpc.PageServer/GetPageData?video_appid=1000005&vplatform=2&vversion_name=8.9.10&new_mark_label_enabled=1',
  217. json=hbody, headers=self.headers).json()
  218. return cid, data
  219. def get_vdata(self, body):
  220. try:
  221. vdata = self.post(
  222. f'{self.apihost}/trpc.universal_backend_service.page_server_rpc.PageServer/GetPageData?video_appid=3000010&vplatform=2&vversion_name=8.2.96',
  223. json=body, headers=self.headers
  224. ).json()
  225. return vdata
  226. except Exception as e:
  227. print(f"Error in get_vdata: {str(e)}")
  228. return {'data': {'module_list_datas': []}}
  229. def process_pdata(self, pdata, ids):
  230. plist = []
  231. ylist = []
  232. for k in pdata:
  233. if k.get('item_id'):
  234. pid = f"{k['item_params']['union_title']}${ids[0]}@{k['item_id']}"
  235. if '预告' in k['item_params']['union_title']:
  236. ylist.append(pid)
  237. else:
  238. plist.append(pid)
  239. return plist, ylist
  240. def build_vod(self, vdata, actors, plist, ylist, names):
  241. d = vdata['data']['module_list_datas'][0]['module_datas'][0]['item_data_lists']['item_datas'][0]['item_params']
  242. urls = []
  243. if plist:
  244. urls.append('#'.join(plist))
  245. if ylist:
  246. urls.append('#'.join(ylist))
  247. vod = {
  248. 'type_name': d.get('sub_genre', ''),
  249. 'vod_name': d.get('title', ''),
  250. 'vod_year': d.get('year', ''),
  251. 'vod_area': d.get('area_name', ''),
  252. 'vod_remarks': d.get('holly_online_time', '') or d.get('hotval', ''),
  253. 'vod_actor': ','.join(actors),
  254. 'vod_content': d.get('cover_description', ''),
  255. 'vod_play_from': '$$$'.join(names),
  256. 'vod_play_url': '$$$'.join(urls)
  257. }
  258. return vod
  259. def handle_exception(self, e, message):
  260. print(f"{message}: {str(e)}")
  261. return {'list': [{'vod_play_from': '哎呀翻车啦', 'vod_play_url': '翻车啦#555'}]}
  262. def process_tabs(self, data, body, ids):
  263. try:
  264. pdata = data['data']['module_list_datas'][-1]['module_datas'][-1]['item_data_lists']['item_datas']
  265. tabs = data['data']['module_list_datas'][-1]['module_datas'][-1]['module_params'].get('tabs')
  266. if tabs and len(json.loads(tabs)):
  267. tabs = json.loads(tabs)
  268. remaining_tabs = tabs[1:]
  269. task_queue = []
  270. for tab in remaining_tabs:
  271. nbody = copy.deepcopy(body)
  272. nbody['page_params']['page_context'] = tab['page_context']
  273. task_queue.append(nbody)
  274. with ThreadPoolExecutor(max_workers=10) as executor:
  275. future_map = {executor.submit(self.get_vdata, task): idx for idx, task in enumerate(task_queue)}
  276. results = [None] * len(task_queue)
  277. for future in as_completed(future_map.keys()):
  278. idx = future_map[future]
  279. results[idx] = future.result()
  280. for result in results:
  281. if result:
  282. page_data = result['data']['module_list_datas'][-1]['module_datas'][-1]['item_data_lists'][
  283. 'item_datas']
  284. pdata.extend(page_data)
  285. return pdata
  286. except Exception as e:
  287. print(f"Error processing episodes: {str(e)}")
  288. return []
  289. def josn_to_params(self, params, skip_empty=False):
  290. query = []
  291. for k, v in params.items():
  292. if skip_empty and not v:
  293. continue
  294. query.append(f"{k}={v}")
  295. return "&".join(query)