cntv央视_ok.py 46 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060
  1. # coding=utf-8
  2. # !/usr/bin/python
  3. import os.path
  4. import random
  5. import sys
  6. sys.path.append('..')
  7. from base.spider import Spider
  8. import json
  9. import time
  10. import base64
  11. import datetime
  12. import re
  13. from urllib import request, parse
  14. from pathlib import Path
  15. import urllib
  16. import urllib.request
  17. """
  18. 配置示例:
  19. t4的配置里ext节点会自动变成api对应query参数extend,但t4的ext字符串不支持路径格式,比如./开头或者.json结尾
  20. api里会自动含有ext参数是base64编码后的选中的筛选条件
  21. 错误示例,ext含有json:
  22. {
  23. "key":"hipy_cntv央视",
  24. "name":"cntv央视(hipy_t4)",
  25. "type":4,
  26. "api":"http://192.168.31.49:5707/api/v1/vod/cntv央视?api_ext={{host}}/txt/hipy/cntv央视.json",
  27. "searchable":1,
  28. "quickSearch":1,
  29. "filterable":0,
  30. "ext":"cntv央视.json"
  31. }
  32. 正确示例。同时存在ext和api_ext会优先取ext作为extend加载init
  33. {
  34. "key":"hipy_t4_cntv央视",
  35. "name":"cntv央视(hipy_t4)",
  36. "type":4,
  37. "api":"http://192.168.31.49:5707/api/v1/vod/cntv央视?api_ext={{host}}/txt/hipy/cntv央视.json",
  38. "searchable":1,
  39. "quickSearch":0,
  40. "filterable":1,
  41. "ext":"{{host}}/files/hipy/cntv央视.json"
  42. },
  43. {
  44. "key": "hipy_t3_cntv央视",
  45. "name": "cntv央视(hipy_t3)",
  46. "type": 3,
  47. "api": "{{host}}/txt/hipy/cntv央视.py",
  48. "searchable": 1,
  49. "quickSearch": 0,
  50. "filterable": 1,
  51. "ext": "{{host}}/files/hipy/cntv央视.json"
  52. },
  53. """
  54. class Spider(Spider): # 元类 默认的元类 type
  55. module = None
  56. def getDependence(self):
  57. return ['base_spider']
  58. def getName(self):
  59. return "中央电视台" # 可搜索
  60. def init_api_ext_file(self):
  61. ext_file = __file__.replace('.py', '.json')
  62. print(f'ext_file:{ext_file}')
  63. # 特别节目网页: https://tv.cctv.com/yxg/index.shtml?spm=C28340.PlFTqGe6Zk8M.E2PQtIunpEaz.65
  64. # 特别节目分类筛选获取页面: https://tv.cctv.com/yxg/tbjm/index.shtml
  65. # 纪录片网页: https://tv.cctv.com/yxg/index.shtml?spm=C28340.PlFTqGe6Zk8M.E2PQtIunpEaz.65
  66. # 纪录片分类筛选获取页面:https://tv.cctv.com/yxg/jlp/index.shtml
  67. # ==================== 获取特别节目的筛选条件 ======================
  68. r = self.fetch('https://tv.cctv.com/yxg/tbjm/index.shtml')
  69. html = r.text
  70. html = self.html(html)
  71. filter_tbjm = []
  72. lis = html.xpath('//*[@id="pindao"]/li')
  73. li_value = []
  74. for li in lis:
  75. li_value.append({
  76. 'n': ''.join(li.xpath('./span//text()')),
  77. 'v': ''.join(li.xpath('@datacd')),
  78. })
  79. # print(li_value)
  80. filter_tbjm.append({
  81. "key": "datapd-channel",
  82. "name": "频道",
  83. "value": li_value
  84. })
  85. lis = html.xpath('//*[@id="fenlei"]/li')
  86. li_value = []
  87. for li in lis:
  88. li_value.append({
  89. 'n': ''.join(li.xpath('./span//text()')),
  90. 'v': ''.join(li.xpath('@datalx')),
  91. })
  92. # print(li_value)
  93. filter_tbjm.append({
  94. "key": "datafl-sc",
  95. "name": "类型",
  96. "value": li_value
  97. })
  98. lis = html.xpath('//*[@id="zimu"]/li')
  99. li_value = []
  100. for li in lis:
  101. li_value.append({
  102. 'n': ''.join(li.xpath('./span//text()')),
  103. 'v': ''.join(li.xpath('@datazm')),
  104. })
  105. # print(li_value)
  106. filter_tbjm.append({
  107. "key": "dataszm-letter",
  108. "name": "首字母",
  109. "value": li_value
  110. })
  111. print(filter_tbjm)
  112. # ==================== 纪录片筛选获取 ======================
  113. r = self.fetch('https://tv.cctv.com/yxg/jlp/index.shtml')
  114. html = r.text
  115. html = self.html(html)
  116. filter_jlp = []
  117. lis = html.xpath('//*[@id="pindao"]/li')
  118. li_value = []
  119. for li in lis:
  120. li_value.append({
  121. 'n': ''.join(li.xpath('./span//text()')),
  122. 'v': ''.join(li.xpath('@datacd')),
  123. })
  124. # print(li_value)
  125. filter_jlp.append({
  126. "key": "datapd-channel",
  127. "name": "频道",
  128. "value": li_value
  129. })
  130. lis = html.xpath('//*[@id="fenlei"]/li')
  131. li_value = []
  132. for li in lis:
  133. li_value.append({
  134. 'n': ''.join(li.xpath('./span//text()')),
  135. 'v': ''.join(li.xpath('@datalx')),
  136. })
  137. # print(li_value)
  138. filter_jlp.append({
  139. "key": "datafl-sc",
  140. "name": "类型",
  141. "value": li_value
  142. })
  143. lis = html.xpath('//*[@id="nianfen"]/li')
  144. li_value = []
  145. for li in lis:
  146. li_value.append({
  147. 'n': ''.join(li.xpath('./span//text()')),
  148. 'v': ''.join(li.xpath('@datanf')),
  149. })
  150. # print(li_value)
  151. filter_jlp.append({
  152. "key": "datanf-year",
  153. "name": "年份",
  154. "value": li_value
  155. })
  156. lis = html.xpath('//*[@id="zimu"]/li')
  157. li_value = []
  158. for li in lis:
  159. li_value.append({
  160. 'n': ''.join(li.xpath('./span//text()')),
  161. 'v': ''.join(li.xpath('@datazm')),
  162. })
  163. # print(li_value)
  164. filter_jlp.append({
  165. "key": "dataszm-letter",
  166. "name": "首字母",
  167. "value": li_value
  168. })
  169. print(filter_jlp)
  170. ext_file_dict = {
  171. "特别节目": filter_tbjm,
  172. "纪录片": filter_jlp,
  173. }
  174. # print(json.dumps(ext_file_dict,ensure_ascii=False,indent=4))
  175. with open(ext_file, mode='w+', encoding='utf-8') as f:
  176. # f.write(json.dumps(ext_file_dict,ensure_ascii=False,indent=4))
  177. f.write(json.dumps(ext_file_dict, ensure_ascii=False))
  178. def init(self, extend=""):
  179. def init_file(ext_file):
  180. ext_file = Path(ext_file).as_posix()
  181. # print(f'ext_file:{ext_file}')
  182. if os.path.exists(ext_file):
  183. # print('存在扩展文件')
  184. with open(ext_file, mode='r', encoding='utf-8') as f:
  185. try:
  186. ext_dict = json.loads(f.read())
  187. # print(ext_dict)
  188. self.config['filter'].update(ext_dict)
  189. except Exception as e:
  190. print(f'更新扩展筛选条件发生错误:{e}')
  191. print("============依赖列表:{0}============".format(extend))
  192. ext = extend
  193. print("============ext:{0}============".format(ext))
  194. if isinstance(ext, str) and ext:
  195. if ext.startswith('./'):
  196. ext_file = os.path.join(os.path.dirname(__file__), ext)
  197. init_file(ext_file)
  198. elif ext.startswith('http'):
  199. try:
  200. r = self.fetch(ext)
  201. self.config['filter'].update(r.json())
  202. except Exception as e:
  203. print(f'更新扩展筛选条件发生错误:{e}')
  204. elif not ext.startswith('./') and not ext.startswith('http'):
  205. ext_file = os.path.join(os.path.dirname(__file__), './' + ext + '.json')
  206. init_file(ext_file)
  207. # ==================== 栏目大全加载年月筛选 ======================
  208. lanmu_list = self.config['filter']['栏目大全']
  209. lanmu_keys_list = [lanmu['key'] for lanmu in lanmu_list]
  210. if 'year' not in lanmu_keys_list:
  211. currentYear = datetime.date.today().year
  212. yearList = [{"n": "全部", "v": ""}]
  213. for year in range(currentYear, currentYear - 10, -1):
  214. yearList.append({"n": year, "v": year})
  215. yearDict = {"key": "year", "name": "年份", "value": yearList}
  216. lanmu_list.append(yearDict)
  217. if 'month' not in lanmu_keys_list:
  218. monthList = [{"n": "全部", "v": ""}]
  219. for month in range(1, 13):
  220. text = str(month).rjust(2, '0')
  221. monthList.append({"n": text, "v": text})
  222. monthDict = {"key": "month", "name": "月份", "value": monthList}
  223. lanmu_list.append(monthDict)
  224. # 装载模块,这里只要一个就够了
  225. if isinstance(extend, list):
  226. for lib in extend:
  227. if '.Spider' in str(type(lib)):
  228. self.module = lib
  229. break
  230. def destroy(self):
  231. pass
  232. def isVideoFormat(self, url):
  233. pass
  234. def manualVideoCheck(self):
  235. pass
  236. def homeContent(self, filter):
  237. result = {}
  238. cateManual = {
  239. "4K专区": "4K专区",
  240. "栏目大全": "栏目大全",
  241. "特别节目": "特别节目",
  242. "纪录片": "纪录片",
  243. "电视剧": "电视剧",
  244. "动画片": "动画片",
  245. "频道直播": "频道直播",
  246. }
  247. classes = []
  248. for k in cateManual:
  249. classes.append({
  250. 'type_name': k,
  251. 'type_id': cateManual[k]
  252. })
  253. result['class'] = classes
  254. if (filter):
  255. result['filters'] = self.config['filter']
  256. return result
  257. def homeVideoContent(self):
  258. result = {
  259. 'list': []
  260. }
  261. if self.module:
  262. result = self.module.homeVideoContent()
  263. return result
  264. def categoryContent(self, tid, pg, filter, extend):
  265. result = {}
  266. month = "" # 月
  267. year = "" # 年
  268. area = '' # 地区
  269. channel = '' # 频道
  270. datafl = '' # 类型
  271. letter = '' # 字母
  272. year_prefix = '' # 栏目大全的年月筛选过滤
  273. pagecount = 24
  274. if tid == '动画片':
  275. id = urllib.parse.quote(tid)
  276. if 'datadq-area' in extend.keys():
  277. area = urllib.parse.quote(extend['datadq-area'])
  278. if 'dataszm-letter' in extend.keys():
  279. letter = extend['dataszm-letter']
  280. if 'datafl-sc' in extend.keys():
  281. datafl = urllib.parse.quote(extend['datafl-sc'])
  282. url = 'https://api.cntv.cn/list/getVideoAlbumList?channelid=CHAL1460955899450127&area={0}&sc={4}&fc={1}&letter={2}&p={3}&n=24&serviceId=tvcctv&topv=1&t=json'.format(
  283. area, id, letter, pg, datafl)
  284. elif tid == '纪录片':
  285. id = urllib.parse.quote(tid)
  286. if 'datapd-channel' in extend.keys():
  287. channel = urllib.parse.quote(extend['datapd-channel'])
  288. if 'datafl-sc' in extend.keys():
  289. datafl = urllib.parse.quote(extend['datafl-sc'])
  290. if 'datanf-year' in extend.keys():
  291. year = extend['datanf-year']
  292. if 'dataszm-letter' in extend.keys():
  293. letter = extend['dataszm-letter']
  294. url = 'https://api.cntv.cn/list/getVideoAlbumList?channelid=CHAL1460955924871139&fc={0}&channel={1}&sc={2}&year={3}&letter={4}&p={5}&n=24&serviceId=tvcctv&topv=1&t=json'.format(
  295. id, channel, datafl, year, letter, pg)
  296. elif tid == '电视剧':
  297. id = urllib.parse.quote(tid)
  298. if 'datafl-sc' in extend.keys():
  299. datafl = urllib.parse.quote(extend['datafl-sc'])
  300. if 'datanf-year' in extend.keys():
  301. year = extend['datanf-year']
  302. if 'dataszm-letter' in extend.keys():
  303. letter = extend['dataszm-letter']
  304. url = 'https://api.cntv.cn/list/getVideoAlbumList?channelid=CHAL1460955853485115&area={0}&sc={1}&fc={2}&year={3}&letter={4}&p={5}&n=24&serviceId=tvcctv&topv=1&t=json'.format(
  305. area, datafl, id, year, letter, pg)
  306. elif tid == '特别节目':
  307. id = urllib.parse.quote(tid)
  308. if 'datapd-channel' in extend.keys():
  309. channel = urllib.parse.quote(extend['datapd-channel'])
  310. if 'datafl-sc' in extend.keys():
  311. datafl = urllib.parse.quote(extend['datafl-sc'])
  312. if 'dataszm-letter' in extend.keys():
  313. letter = extend['dataszm-letter']
  314. url = 'https://api.cntv.cn/list/getVideoAlbumList?channelid=CHAL1460955953877151&channel={0}&sc={1}&fc={2}&bigday=&letter={3}&p={4}&n=24&serviceId=tvcctv&topv=1&t=json'.format(
  315. channel, datafl, id, letter, pg)
  316. elif tid == '栏目大全':
  317. cid = '' # 频道
  318. if 'cid' in extend.keys():
  319. cid = extend['cid']
  320. fc = '' # 分类
  321. if 'fc' in extend.keys():
  322. fc = extend['fc']
  323. fl = '' # 字母
  324. if 'fl' in extend.keys():
  325. fl = extend['fl']
  326. year = extend.get('year') or ''
  327. month = extend.get('month') or ''
  328. if year:
  329. year_prefix = year + month
  330. url = 'https://api.cntv.cn/lanmu/columnSearch?&fl={0}&fc={1}&cid={2}&p={3}&n=20&serviceId=tvcctv&t=json&cb=ko'.format(
  331. fl, fc, cid, pg)
  332. pagecount = 20
  333. elif tid == '4K专区':
  334. cid = 'CHAL1558416868484111'
  335. url = 'https://api.cntv.cn/NewVideo/getLastVideoList4K?serviceId=cctv4k&cid={0}&p={1}&n={2}&t=json&cb=ko'.format(
  336. cid, pg, pagecount
  337. )
  338. elif tid == '频道直播':
  339. url = 'https://tv.cctv.com/epg/index.shtml'
  340. else:
  341. url = 'https://tv.cctv.com/epg/index.shtml'
  342. videos = []
  343. htmlText = self.fetch(url).text
  344. if tid == '栏目大全':
  345. index = htmlText.rfind(');')
  346. if index > -1:
  347. htmlText = htmlText[3:index]
  348. videos = self.get_list1(html=htmlText, tid=tid, year_prefix=year_prefix)
  349. elif tid == '4K专区':
  350. index = htmlText.rfind(');')
  351. if index > -1:
  352. htmlText = htmlText[3:index]
  353. videos = self.get_list_4k(html=htmlText, tid=tid)
  354. elif tid == '频道直播':
  355. html = self.html(htmlText)
  356. lis = html.xpath('//*[@id="jiemudan01"]//div[contains(@class,"channel_con")]//ul/li')
  357. for li in lis:
  358. vid = ''.join(li.xpath('./img/@title'))
  359. pic = ''.join(li.xpath('./img/@src'))
  360. pic = self.urljoin('https://tv.cctv.com/epg/index.shtml', pic)
  361. videos.append({
  362. 'vod_id': '||'.join([tid, vid, f'https://tv.cctv.com/live/{vid}/', pic]),
  363. 'vod_name': vid,
  364. 'vod_pic': pic,
  365. 'vod_mark': '',
  366. })
  367. else:
  368. videos = self.get_list(html=htmlText, tid=tid)
  369. # print(videos)
  370. result['list'] = videos
  371. result['page'] = pg
  372. result['pagecount'] = 9999 if len(videos) >= pagecount else pg
  373. result['limit'] = 90
  374. result['total'] = 999999
  375. return result
  376. def detailContent(self, array):
  377. result = {}
  378. year_prefix = ''
  379. did = array[0]
  380. if '$$$' in did:
  381. year_prefix = did.split('$$$')[0]
  382. did = did.split('$$$')[1]
  383. aid = did.split('||')
  384. tid = aid[0]
  385. title = aid[1]
  386. lastVideo = aid[2]
  387. logo = aid[3]
  388. if tid == '频道直播':
  389. vod = {
  390. "vod_id": did,
  391. "vod_name": title.replace(' ', ''),
  392. "vod_pic": logo,
  393. "vod_content": f'频道{title}正在直播中',
  394. "vod_play_from": '道长在线直播',
  395. "vod_play_url": f'在线观看${title}||{lastVideo}',
  396. }
  397. result = {'list': [vod]}
  398. return result
  399. id = aid[4]
  400. vod_year = aid[5]
  401. actors = aid[6] if len(aid) > 6 else ''
  402. brief = aid[7] if len(aid) > 7 else '' # get请求最长255,这个描述会有可能直接被干没了。
  403. fromId = 'CCTV'
  404. if tid == "栏目大全":
  405. lastUrl = 'https://api.cntv.cn/video/videoinfoByGuid?guid={0}&serviceId=tvcctv'.format(id)
  406. # htmlTxt = self.webReadFile(urlStr=lastUrl, header=self.header)
  407. htmlTxt = self.fetch(lastUrl).text
  408. topicId = json.loads(htmlTxt)['ctid']
  409. url = 'https://api.cntv.cn/NewVideo/getVideoListByColumn'
  410. # params = {
  411. # 'p': '1',
  412. # 'n': '100',
  413. # 't': 'json',
  414. # 'mode': '0',
  415. # 'sort': 'desc',
  416. # 'serviceId': 'tvcctv',
  417. # 'd': year_prefix,
  418. # 'id': topicId
  419. # }
  420. # htmlTxt = self.fetch(url,data=params).text
  421. Url = "{0}?id={1}&d=&p=1&n=100&sort=desc&mode=0&serviceId=tvcctv&t=json&d={2}".format(
  422. url, topicId, year_prefix)
  423. elif tid == "4K专区":
  424. Url = 'https://api.cntv.cn/NewVideo/getVideoListByAlbumIdNew?id={0}&serviceId=cctv4k&p=1&n=100&mode=0&pub=1'.format(
  425. id)
  426. print(Url)
  427. else:
  428. Url = 'https://api.cntv.cn/NewVideo/getVideoListByAlbumIdNew?id={0}&serviceId=tvcctv&p=1&n=100&mode=0&pub=1'.format(
  429. id)
  430. jRoot = ''
  431. videoList = []
  432. try:
  433. if tid == "搜索":
  434. fromId = '中央台'
  435. videoList = [title + "$" + lastVideo]
  436. else:
  437. # htmlTxt = self.webReadFile(urlStr=Url, header=self.header)
  438. htmlTxt = self.fetch(Url).text
  439. jRoot = json.loads(htmlTxt)
  440. data = jRoot['data']
  441. jsonList = data['list']
  442. videoList = self.get_EpisodesList(jsonList=jsonList)
  443. if len(videoList) < 1:
  444. # htmlTxt = self.webReadFile(urlStr=lastVideo, header=self.header)
  445. htmlTxt = self.fetch(lastVideo).text
  446. if tid == "电视剧" or tid == "纪录片" or tid == "4K专区":
  447. patternTxt = r"'title':\s*'(?P<title>.+?)',\n{0,1}\s*'brief':\s*'(.+?)',\n{0,1}\s*'img':\s*'(.+?)',\n{0,1}\s*'url':\s*'(?P<url>.+?)'"
  448. elif tid == "特别节目":
  449. patternTxt = r'class="tp1"><a\s*href="(?P<url>https://.+?)"\s*target="_blank"\s*title="(?P<title>.+?)"></a></div>'
  450. elif tid == "动画片":
  451. patternTxt = r"'title':\s*'(?P<title>.+?)',\n{0,1}\s*'img':\s*'(.+?)',\n{0,1}\s*'brief':\s*'(.+?)',\n{0,1}\s*'url':\s*'(?P<url>.+?)'"
  452. elif tid == "栏目大全":
  453. patternTxt = r'href="(?P<url>.+?)" target="_blank" alt="(?P<title>.+?)" title=".+?">'
  454. videoList = self.get_EpisodesList_re(htmlTxt=htmlTxt, patternTxt=patternTxt)
  455. fromId = '央视'
  456. except:
  457. pass
  458. if len(videoList) == 0:
  459. return {}
  460. vod = {
  461. "vod_id": did,
  462. "vod_name": title.replace(' ', ''),
  463. "vod_pic": logo,
  464. "type_name": tid,
  465. "vod_year": vod_year,
  466. "vod_area": "",
  467. "vod_remarks": '',
  468. "vod_actor": actors,
  469. "vod_director": '',
  470. "vod_content": brief
  471. }
  472. vod['vod_play_from'] = fromId
  473. vod['vod_play_url'] = "#".join(videoList)
  474. result = {
  475. 'list': [
  476. vod
  477. ]
  478. }
  479. return result
  480. def get_lineList(self, Txt, mark, after):
  481. circuit = []
  482. origin = Txt.find(mark)
  483. while origin > 8:
  484. end = Txt.find(after, origin)
  485. circuit.append(Txt[origin:end])
  486. origin = Txt.find(mark, end)
  487. return circuit
  488. def get_RegexGetTextLine(self, Text, RegexText, Index):
  489. returnTxt = []
  490. pattern = re.compile(RegexText, re.M | re.S)
  491. ListRe = pattern.findall(Text)
  492. if len(ListRe) < 1:
  493. return returnTxt
  494. for value in ListRe:
  495. returnTxt.append(value)
  496. return returnTxt
  497. def searchContent(self, key, quick, pg=1):
  498. key = urllib.parse.quote(key)
  499. Url = 'https://search.cctv.com/ifsearch.php?page=1&qtext={0}&sort=relevance&pageSize=20&type=video&vtime=-1&datepid=1&channel=&pageflag=0&qtext_str={0}'.format(
  500. key)
  501. # htmlTxt = self.webReadFile(urlStr=Url, header=self.header)
  502. htmlTxt = self.fetch(Url).text
  503. videos = self.get_list_search(html=htmlTxt, tid='搜索')
  504. result = {
  505. 'list': videos
  506. }
  507. return result
  508. def playerContent(self, flag, id, vipFlags):
  509. result = {}
  510. url = ''
  511. parse = 0
  512. headers = {
  513. 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1'
  514. }
  515. if flag == 'CCTV':
  516. url = self.get_m3u8(urlTxt=id)
  517. elif flag == '道长在线直播':
  518. # _url = id
  519. title = id.split('||')[0] # 获取标题
  520. _url = f'https://vdn.live.cntv.cn/api2/liveHtml5.do?channel=pc://cctv_p2p_hd{title}&channel_id={title}'
  521. htmlTxt = self.fetch(_url).text
  522. # print(htmlTxt)
  523. vdata = self.regStr(htmlTxt, "var .*?=.*?'(.*?)';")
  524. vdata = self.str2json(vdata)
  525. print(vdata)
  526. url = vdata['hls_url']['hls1']
  527. print(url)
  528. url = self.fixm3u8_url(url)
  529. else:
  530. try:
  531. # htmlTxt = self.webReadFile(urlStr=id, header=self.header)
  532. htmlTxt = self.fetch(id).text
  533. guid = self.get_RegexGetText(Text=htmlTxt, RegexText=r'var\sguid\s*=\s*"(.+?)";', Index=1)
  534. url = self.get_m3u8(urlTxt=guid)
  535. except:
  536. url = id
  537. parse = 1
  538. if url.find('https:') < 0:
  539. url = id
  540. parse = 1
  541. result["parse"] = parse # 1=嗅探,0=播放
  542. result["playUrl"] = ''
  543. result["url"] = url
  544. result["header"] = headers
  545. return result
  546. # 分类抓取地址:
  547. # 栏目大全:https://tv.cctv.com/lm/index.shtml?spm=C28340.Pu9TN9YUsfNZ.E2PQtIunpEaz.24
  548. # 电视剧:https://tv.cctv.com/yxg/index.shtml?spm=C28340.PlFTqGe6Zk8M.E2PQtIunpEaz.65#datacid=dsj&datafl=&datadq=&fc=%E7%94%B5%E8%A7%86%E5%89%A7&datanf=&dataszm=
  549. # 动画片:https://tv.cctv.com/yxg/index.shtml?spm=C28340.PlFTqGe6Zk8M.E2PQtIunpEaz.65#datacid=dhp&datafl=&datadq=&fc=%E5%8A%A8%E7%94%BB%E7%89%87&dataszm=
  550. # 记录片:https://tv.cctv.com/yxg/index.shtml?spm=C28340.PlFTqGe6Zk8M.E2PQtIunpEaz.65#datacid=jlp&datapd=&datafl=&fc=%E7%BA%AA%E5%BD%95%E7%89%87&datanf=&dataszm=
  551. # 特别节目:https://tv.cctv.com/yxg/index.shtml?spm=C28340.PlFTqGe6Zk8M.E2PQtIunpEaz.65#datacid=tbjm&datapd=&datafl=&fc=%E7%89%B9%E5%88%AB%E8%8A%82%E7%9B%AE&datajr=&dataszm=
  552. config = {
  553. "player": {},
  554. "filter": {
  555. "电视剧": [
  556. {"key": "datafl-sc", "name": "类型",
  557. "value": [{"n": "全部", "v": ""}, {"n": "谍战", "v": "谍战"}, {"n": "悬疑", "v": "悬疑"},
  558. {"n": "刑侦", "v": "刑侦"}, {"n": "历史", "v": "历史"}, {"n": "古装", "v": "古装"},
  559. {"n": "武侠", "v": "武侠"}, {"n": "军旅", "v": "军旅"}, {"n": "战争", "v": "战争"},
  560. {"n": "喜剧", "v": "喜剧"}, {"n": "青春", "v": "青春"}, {"n": "言情", "v": "言情"},
  561. {"n": "偶像", "v": "偶像"}, {"n": "家庭", "v": "家庭"}, {"n": "年代", "v": "年代"},
  562. {"n": "革命", "v": "革命"}, {"n": "农村", "v": "农村"}, {"n": "都市", "v": "都市"},
  563. {"n": "其他", "v": "其他"}]},
  564. {"key": "datadq-area", "name": "地区",
  565. "value": [{"n": "全部", "v": ""}, {"n": "中国大陆", "v": "中国大陆"}, {"n": "中国香港", "v": "香港"},
  566. {"n": "美国", "v": "美国"}, {"n": "欧洲", "v": "欧洲"}, {"n": "泰国", "v": "泰国"}]},
  567. {"key": "datanf-year", "name": "年份",
  568. "value": [{"n": "全部", "v": ""}, {"n": "2024", "v": "2024"}, {"n": "2023", "v": "2023"},
  569. {"n": "2022", "v": "2022"},
  570. {"n": "2021", "v": "2021"}, {"n": "2020", "v": "2020"}, {"n": "2019", "v": "2019"},
  571. {"n": "2018", "v": "2018"}, {"n": "2017", "v": "2017"}, {"n": "2016", "v": "2016"},
  572. {"n": "2015", "v": "2015"}, {"n": "2014", "v": "2014"}, {"n": "2013", "v": "2013"},
  573. {"n": "2012", "v": "2012"}, {"n": "2011", "v": "2011"}, {"n": "2010", "v": "2010"},
  574. {"n": "2009", "v": "2009"}, {"n": "2008", "v": "2008"}, {"n": "2007", "v": "2007"},
  575. {"n": "2006", "v": "2006"}, {"n": "2005", "v": "2005"}, {"n": "2004", "v": "2004"},
  576. {"n": "2003", "v": "2003"}, {"n": "2002", "v": "2002"}, {"n": "2001", "v": "2001"},
  577. {"n": "2000", "v": "2000"}, {"n": "1999", "v": "1999"}, {"n": "1998", "v": "1998"},
  578. {"n": "1997", "v": "1997"}]},
  579. {"key": "dataszm-letter", "name": "字母",
  580. "value": [{"n": "全部", "v": ""}, {"n": "A", "v": "A"}, {"n": "C", "v": "C"}, {"n": "E", "v": "E"},
  581. {"n": "F", "v": "F"}, {"n": "G", "v": "G"}, {"n": "H", "v": "H"}, {"n": "I", "v": "I"},
  582. {"n": "J", "v": "J"}, {"n": "K", "v": "K"}, {"n": "L", "v": "L"}, {"n": "M", "v": "M"},
  583. {"n": "N", "v": "N"}, {"n": "O", "v": "O"}, {"n": "P", "v": "P"}, {"n": "Q", "v": "Q"},
  584. {"n": "R", "v": "R"}, {"n": "S", "v": "S"}, {"n": "T", "v": "T"}, {"n": "U", "v": "U"},
  585. {"n": "V", "v": "V"}, {"n": "W", "v": "W"}, {"n": "X", "v": "X"}, {"n": "Y", "v": "Y"},
  586. {"n": "Z", "v": "Z"}, {"n": "0-9", "v": "0-9"}]}
  587. ],
  588. "动画片": [
  589. {"key": "datafl-sc", "name": "类型",
  590. "value": [{"n": "全部", "v": ""}, {"n": "亲子", "v": "亲子"}, {"n": "搞笑", "v": "搞笑"},
  591. {"n": "冒险", "v": "冒险"}, {"n": "动作", "v": "动作"}, {"n": "宠物", "v": "宠物"},
  592. {"n": "体育", "v": "体育"}, {"n": "益智", "v": "益智"}, {"n": "历史", "v": "历史"},
  593. {"n": "教育", "v": "教育"}, {"n": "校园", "v": "校园"}, {"n": "言情", "v": "言情"},
  594. {"n": "武侠", "v": "武侠"}, {"n": "经典", "v": "经典"}, {"n": "未来", "v": "未来"},
  595. {"n": "古代", "v": "古代"}, {"n": "神话", "v": "神话"}, {"n": "真人", "v": "真人"},
  596. {"n": "励志", "v": "励志"}, {"n": "热血", "v": "热血"}, {"n": "奇幻", "v": "奇幻"},
  597. {"n": "童话", "v": "童话"}, {"n": "剧情", "v": "剧情"}, {"n": "夺宝", "v": "夺宝"},
  598. {"n": "其他", "v": "其他"}]},
  599. {"key": "datadq-area", "name": "地区",
  600. "value": [{"n": "全部", "v": ""}, {"n": "中国大陆", "v": "中国大陆"}, {"n": "美国", "v": "美国"},
  601. {"n": "欧洲", "v": "欧洲"}]},
  602. {"key": "dataszm-letter", "name": "字母",
  603. "value": [{"n": "全部", "v": ""}, {"n": "A", "v": "A"}, {"n": "C", "v": "C"}, {"n": "E", "v": "E"},
  604. {"n": "F", "v": "F"}, {"n": "G", "v": "G"}, {"n": "H", "v": "H"}, {"n": "I", "v": "I"},
  605. {"n": "J", "v": "J"}, {"n": "K", "v": "K"}, {"n": "L", "v": "L"}, {"n": "M", "v": "M"},
  606. {"n": "N", "v": "N"}, {"n": "O", "v": "O"}, {"n": "P", "v": "P"}, {"n": "Q", "v": "Q"},
  607. {"n": "R", "v": "R"}, {"n": "S", "v": "S"}, {"n": "T", "v": "T"}, {"n": "U", "v": "U"},
  608. {"n": "V", "v": "V"}, {"n": "W", "v": "W"}, {"n": "X", "v": "X"}, {"n": "Y", "v": "Y"},
  609. {"n": "Z", "v": "Z"}, {"n": "0-9", "v": "0-9"}]}
  610. ],
  611. "纪录片": [
  612. {"key": "datafl-sc", "name": "类型",
  613. "value": [{"n": "全部", "v": ""}, {"n": "人文历史", "v": "人文历史"}, {"n": "人物", "v": "人物"},
  614. {"n": "军事", "v": "军事"}, {"n": "探索", "v": "探索"}, {"n": "社会", "v": "社会"},
  615. {"n": "时政", "v": "时政"}, {"n": "经济", "v": "经济"}, {"n": "科技", "v": "科技"}]},
  616. {"key": "datanf-year", "name": "年份",
  617. "value": [{"n": "全部", "v": ""}, {"n": "2024", "v": "2024"}, {"n": "2023", "v": "2023"},
  618. {"n": "2022", "v": "2022"},
  619. {"n": "2021", "v": "2021"}, {"n": "2020", "v": "2020"}, {"n": "2019", "v": "2019"},
  620. {"n": "2018", "v": "2018"}, {"n": "2017", "v": "2017"}, {"n": "2016", "v": "2016"},
  621. {"n": "2015", "v": "2015"}, {"n": "2014", "v": "2014"}, {"n": "2013", "v": "2013"},
  622. {"n": "2012", "v": "2012"}, {"n": "2011", "v": "2011"}, {"n": "2010", "v": "2010"},
  623. {"n": "2009", "v": "2009"}, {"n": "2008", "v": "2008"}]},
  624. {"key": "dataszm-letter", "name": "字母",
  625. "value": [{"n": "全部", "v": ""}, {"n": "A", "v": "A"}, {"n": "C", "v": "C"}, {"n": "E", "v": "E"},
  626. {"n": "F", "v": "F"}, {"n": "G", "v": "G"}, {"n": "H", "v": "H"}, {"n": "I", "v": "I"},
  627. {"n": "J", "v": "J"}, {"n": "K", "v": "K"}, {"n": "L", "v": "L"}, {"n": "M", "v": "M"},
  628. {"n": "N", "v": "N"}, {"n": "O", "v": "O"}, {"n": "P", "v": "P"}, {"n": "Q", "v": "Q"},
  629. {"n": "R", "v": "R"}, {"n": "S", "v": "S"}, {"n": "T", "v": "T"}, {"n": "U", "v": "U"},
  630. {"n": "V", "v": "V"}, {"n": "W", "v": "W"}, {"n": "X", "v": "X"}, {"n": "Y", "v": "Y"},
  631. {"n": "Z", "v": "Z"}, {"n": "0-9", "v": "0-9"}]}
  632. ],
  633. "特别节目": [
  634. {"key": "datafl-sc", "name": "类型",
  635. "value": [{"n": "全部", "v": ""}, {"n": "全部", "v": "全部"}, {"n": "新闻", "v": "新闻"},
  636. {"n": "经济", "v": "经济"}, {"n": "综艺", "v": "综艺"}, {"n": "体育", "v": "体育"},
  637. {"n": "军事", "v": "军事"}, {"n": "影视", "v": "影视"}, {"n": "科教", "v": "科教"},
  638. {"n": "戏曲", "v": "戏曲"}, {"n": "青少", "v": "青少"}, {"n": "音乐", "v": "音乐"},
  639. {"n": "社会", "v": "社会"}, {"n": "公益", "v": "公益"}, {"n": "其他", "v": "其他"}]},
  640. {"key": "dataszm-letter", "name": "字母",
  641. "value": [{"n": "全部", "v": ""}, {"n": "A", "v": "A"}, {"n": "C", "v": "C"}, {"n": "E", "v": "E"},
  642. {"n": "F", "v": "F"}, {"n": "G", "v": "G"}, {"n": "H", "v": "H"}, {"n": "I", "v": "I"},
  643. {"n": "J", "v": "J"}, {"n": "K", "v": "K"}, {"n": "L", "v": "L"}, {"n": "M", "v": "M"},
  644. {"n": "N", "v": "N"}, {"n": "O", "v": "O"}, {"n": "P", "v": "P"}, {"n": "Q", "v": "Q"},
  645. {"n": "R", "v": "R"}, {"n": "S", "v": "S"}, {"n": "T", "v": "T"}, {"n": "U", "v": "U"},
  646. {"n": "V", "v": "V"}, {"n": "W", "v": "W"}, {"n": "X", "v": "X"}, {"n": "Y", "v": "Y"},
  647. {"n": "Z", "v": "Z"}, {"n": "0-9", "v": "0-9"}]}
  648. ],
  649. "栏目大全": [{"key": "cid", "name": "频道",
  650. "value": [{"n": "全部", "v": ""}, {"n": "CCTV-1综合", "v": "EPGC1386744804340101"},
  651. {"n": "CCTV-2财经", "v": "EPGC1386744804340102"},
  652. {"n": "CCTV-3综艺", "v": "EPGC1386744804340103"},
  653. {"n": "CCTV-4中文国际", "v": "EPGC1386744804340104"},
  654. {"n": "CCTV-5体育", "v": "EPGC1386744804340107"},
  655. {"n": "CCTV-6电影", "v": "EPGC1386744804340108"},
  656. {"n": "CCTV-7国防军事", "v": "EPGC1386744804340109"},
  657. {"n": "CCTV-8电视剧", "v": "EPGC1386744804340110"},
  658. {"n": "CCTV-9纪录", "v": "EPGC1386744804340112"},
  659. {"n": "CCTV-10科教", "v": "EPGC1386744804340113"},
  660. {"n": "CCTV-11戏曲", "v": "EPGC1386744804340114"},
  661. {"n": "CCTV-12社会与法", "v": "EPGC1386744804340115"},
  662. {"n": "CCTV-13新闻", "v": "EPGC1386744804340116"},
  663. {"n": "CCTV-14少儿", "v": "EPGC1386744804340117"},
  664. {"n": "CCTV-15音乐", "v": "EPGC1386744804340118"},
  665. {"n": "CCTV-16奥林匹克", "v": "EPGC1634630207058998"},
  666. {"n": "CCTV-17农业农村", "v": "EPGC1563932742616872"},
  667. {"n": "CCTV-5+体育赛事", "v": "EPGC1468294755566101"}]},
  668. {"key": "fc", "name": "分类",
  669. "value": [{"n": "全部", "v": ""}, {"n": "新闻", "v": "新闻"}, {"n": "体育", "v": "体育"},
  670. {"n": "综艺", "v": "综艺"}, {"n": "健康", "v": "健康"}, {"n": "生活", "v": "生活"},
  671. {"n": "科教", "v": "科教"}, {"n": "经济", "v": "经济"}, {"n": "农业", "v": "农业"},
  672. {"n": "法治", "v": "法治"}, {"n": "军事", "v": "军事"}, {"n": "少儿", "v": "少儿"},
  673. {"n": "动画", "v": "动画"}, {"n": "纪实", "v": "纪实"}, {"n": "戏曲", "v": "戏曲"},
  674. {"n": "音乐", "v": "音乐"}, {"n": "影视", "v": "影视"}]},
  675. {"key": "fl", "name": "字母",
  676. "value": [{"n": "全部", "v": ""}, {"n": "A", "v": "A"}, {"n": "B", "v": "B"},
  677. {"n": "C", "v": "C"}, {"n": "D", "v": "D"}, {"n": "E", "v": "E"},
  678. {"n": "F", "v": "F"}, {"n": "G", "v": "G"}, {"n": "H", "v": "H"},
  679. {"n": "I", "v": "I"}, {"n": "J", "v": "J"}, {"n": "K", "v": "K"},
  680. {"n": "L", "v": "L"}, {"n": "M", "v": "M"}, {"n": "N", "v": "N"},
  681. {"n": "O", "v": "O"}, {"n": "P", "v": "P"}, {"n": "Q", "v": "Q"},
  682. {"n": "R", "v": "R"}, {"n": "S", "v": "S"}, {"n": "T", "v": "T"},
  683. {"n": "U", "v": "U"}, {"n": "V", "v": "V"}, {"n": "W", "v": "W"},
  684. {"n": "X", "v": "X"}, {"n": "Y", "v": "Y"}, {"n": "Z", "v": "Z"}]},
  685. ]
  686. }
  687. }
  688. header = {
  689. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36",
  690. "Host": "tv.cctv.com",
  691. "Referer": "https://tv.cctv.com/"
  692. }
  693. def localProxy(self, params):
  694. return [200, "video/MP2T", ""]
  695. # -----------------------------------------------自定义函数-----------------------------------------------
  696. # 访问网页
  697. def webReadFile(self, urlStr, header):
  698. html = ''
  699. req = urllib.request.Request(url=urlStr) # ,headers=header
  700. with urllib.request.urlopen(req) as response:
  701. html = response.read().decode('utf-8')
  702. return html
  703. # 判断网络地址是否存在
  704. def TestWebPage(self, urlStr, header):
  705. html = ''
  706. req = urllib.request.Request(url=urlStr, method='HEAD') # ,headers=header
  707. with urllib.request.urlopen(req) as response:
  708. html = response.getcode()
  709. return html
  710. # 正则取文本
  711. def get_RegexGetText(self, Text, RegexText, Index):
  712. returnTxt = ""
  713. Regex = re.search(RegexText, Text, re.M | re.S)
  714. if Regex is None:
  715. returnTxt = ""
  716. else:
  717. returnTxt = Regex.group(Index)
  718. return returnTxt
  719. # 取集数
  720. def get_EpisodesList(self, jsonList):
  721. videos = []
  722. for vod in jsonList:
  723. url = vod['guid']
  724. title = vod['title']
  725. if len(url) == 0:
  726. continue
  727. videos.append(title + "$" + url)
  728. return videos
  729. # 取集数
  730. def get_EpisodesList_re(self, htmlTxt, patternTxt):
  731. ListRe = re.finditer(patternTxt, htmlTxt, re.M | re.S)
  732. videos = []
  733. for vod in ListRe:
  734. url = vod.group('url')
  735. title = vod.group('title')
  736. if len(url) == 0:
  737. continue
  738. videos.append(title + "$" + url)
  739. return videos
  740. # 取剧集区
  741. def get_lineList(self, Txt, mark, after):
  742. circuit = []
  743. origin = Txt.find(mark)
  744. while origin > 8:
  745. end = Txt.find(after, origin)
  746. circuit.append(Txt[origin:end])
  747. origin = Txt.find(mark, end)
  748. return circuit
  749. # 正则取文本,返回数组
  750. def get_RegexGetTextLine(self, Text, RegexText, Index):
  751. returnTxt = []
  752. pattern = re.compile(RegexText, re.M | re.S)
  753. ListRe = pattern.findall(Text)
  754. if len(ListRe) < 1:
  755. return returnTxt
  756. for value in ListRe:
  757. returnTxt.append(value)
  758. return returnTxt
  759. # 删除html标签
  760. def removeHtml(self, txt):
  761. soup = re.compile(r'<[^>]+>', re.S)
  762. txt = soup.sub('', txt)
  763. return txt.replace("&nbsp;", " ")
  764. def hookM3u8(self, url):
  765. """
  766. https://www.52pojie.cn/thread-1932358-1-1.html
  767. JavaScript:$.ajaxSettings.async = false; var s = ""; let a = $.get(vodh5player.playerList[0].ads.contentSrc); for (var m = 0; m < a.responseText.match(/asp.*?m3u8/g).length; m++) { s = s + "https://hls.cntv.myalicdn.com//asp" + a.responseText.match(/asp.*?m3u8/g)[m].slice(7) + "\n\n" }; var blob = new Blob([s], { type: "text/plain" }); var url = URL.createObjectURL(blob); window.open(url);
  768. @param url:
  769. @return:
  770. """
  771. url = url or ''
  772. hook1 = lambda x: x.replace('asp/', 'asp//', 1)
  773. hook2 = lambda x: x.replace('hls/', 'hls//', 1)
  774. hook3 = lambda x: x.replace('https://newcntv.qcloudcdn.com', 'https://hls.cntv.myalicdn.com/', 1)
  775. hooks = [hook1, hook2, hook3]
  776. hook = random.choice(hooks)
  777. return hook(url)
  778. # 取m3u8
  779. def get_m3u8(self, urlTxt):
  780. """
  781. https://blog.csdn.net/panwang666/article/details/135347859
  782. JavaScript:jQuery.getJSON("https://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid="+guid,function(result){document.writeln(result.hls_url.link(result.hls_url));});
  783. https://newcntv.qcloudcdn.com/asp/hls/main/0303000a/3/default/3628bb15af644f588dc91ec68425b9ac/main.m3u8?maxbr=2048
  784. @param urlTxt:
  785. @return:
  786. """
  787. url = "https://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid={0}".format(urlTxt)
  788. # htmlTxt = self.webReadFile(urlStr=url, header=self.header)
  789. htmlTxt = self.fetch(url).text
  790. jo = json.loads(htmlTxt)
  791. link = jo['hls_url'].strip()
  792. # print('hls_url:',link)
  793. # 获取域名前缀
  794. urlPrefix = self.get_RegexGetText(Text=link, RegexText='(http[s]?://[a-zA-z0-9.]+)/', Index=1)
  795. # 域名前缀指定替换,然后可以获取到更高质量的视频列表
  796. # /asp/h5e/hls/2000/0303000a/3/default/3628bb15af644f588dc91ec68425b9ac/2000.m3u8
  797. new_link = link.replace(f'{urlPrefix}/asp/hls/', 'https://dh5.cntv.qcloudcdn.com/asp/h5e/hls/').split('?')[0]
  798. # print('new_link:',new_link)
  799. html = self.webReadFile(urlStr=new_link, header=self.header)
  800. content = html.strip()
  801. arr = content.split('\n')
  802. subUrl = arr[-1].split('/')
  803. # hdUrl = urlPrefix + arr[-1]
  804. # subUrl[3] = '2000'
  805. # subUrl[-1] = '2000.m3u8'
  806. # hdUrl = urlPrefix + '/'.join(subUrl)
  807. maxVideo = subUrl[-1].replace('.m3u8', '')
  808. hdUrl = link.replace('main', maxVideo)
  809. hdUrl = hdUrl.replace(urlPrefix, 'https://newcntv.qcloudcdn.com')
  810. hdRsp = self.TestWebPage(urlStr=hdUrl, header=self.header)
  811. if hdRsp == 200:
  812. url = hdUrl.split('?')[0]
  813. url = self.hookM3u8(url)
  814. self.log(f'视频链接: {url}')
  815. else:
  816. url = ''
  817. return url
  818. def fixm3u8_url(self, url):
  819. # 获取域名前缀
  820. urlPrefix = self.get_RegexGetText(Text=url, RegexText='(http[s]?://[a-zA-z0-9.]+)/', Index=1)
  821. # 域名前缀指定替换,然后可以获取到更高质量的视频列表
  822. new_link = url.split('?')[0]
  823. # print(new_link)
  824. html = self.webReadFile(urlStr=new_link, header=self.header)
  825. content = html.strip()
  826. # print(content)
  827. arr = content.split('\n')
  828. subUrl = arr[3] if 'EXT-X-VERSION' in content else arr[2]
  829. hdUrl = self.urljoin(new_link, subUrl).split('?')[0]
  830. # hdUrl = hdUrl.replace(urlPrefix, 'https://newcntv.qcloudcdn.com')
  831. hdRsp = self.TestWebPage(urlStr=hdUrl, header=self.header)
  832. if hdRsp == 200:
  833. url = hdUrl
  834. self.log(f'视频链接: {url}')
  835. else:
  836. url = ''
  837. return url
  838. # 搜索
  839. def get_list_search(self, html, tid):
  840. jRoot = json.loads(html)
  841. jsonList = jRoot['list']
  842. videos = []
  843. for vod in jsonList:
  844. url = vod['urllink']
  845. title = self.removeHtml(txt=vod['title'])
  846. img = vod['imglink']
  847. id = vod['id']
  848. brief = vod['channel']
  849. year = vod['uploadtime']
  850. if len(url) == 0:
  851. continue
  852. guids = [tid, title, url, img, id, year, '', brief]
  853. guid = "||".join(guids)
  854. videos.append({
  855. "vod_id": guid,
  856. "vod_name": title,
  857. "vod_pic": img,
  858. "vod_remarks": year
  859. })
  860. return videos
  861. def get_list1(self, html, tid, year_prefix=None):
  862. jRoot = json.loads(html)
  863. videos = []
  864. data = jRoot['response']
  865. if data is None:
  866. return []
  867. jsonList = data['docs']
  868. for vod in jsonList:
  869. id = vod['lastVIDE']['videoSharedCode']
  870. desc = vod['lastVIDE']['videoTitle']
  871. title = vod['column_name']
  872. url = vod['column_website']
  873. img = vod['column_logo']
  874. year = vod['column_playdate']
  875. brief = vod['column_brief']
  876. actors = ''
  877. if len(url) == 0:
  878. continue
  879. guids = [tid, title, url, img, id, year, actors, brief]
  880. guid = "||".join(guids)
  881. # print(vod_id)
  882. videos.append({
  883. "vod_id": year_prefix + '$$$' + guid if year_prefix else guid,
  884. "vod_name": title,
  885. "vod_pic": img,
  886. "vod_remarks": desc.split('》')[1].strip() if '》' in desc else desc.strip()
  887. })
  888. # print(videos)
  889. return videos
  890. # 分类取结果
  891. def get_list(self, html, tid):
  892. jRoot = json.loads(html)
  893. videos = []
  894. data = jRoot['data']
  895. if data is None:
  896. return []
  897. jsonList = data['list']
  898. for vod in jsonList:
  899. url = vod['url']
  900. title = vod['title']
  901. img = vod['image']
  902. id = vod['id']
  903. try:
  904. brief = vod['brief']
  905. except:
  906. brief = ''
  907. try:
  908. year = vod['year']
  909. except:
  910. year = ''
  911. try:
  912. actors = vod['actors']
  913. except:
  914. actors = ''
  915. if len(url) == 0:
  916. continue
  917. guids = [tid, title, url, img, id, year, actors, brief]
  918. guid = "||".join(guids)
  919. # print(vod_id)
  920. videos.append({
  921. "vod_id": guid,
  922. "vod_name": title,
  923. "vod_pic": img,
  924. "vod_remarks": ''
  925. })
  926. return videos
  927. # 4k分类取结果
  928. def get_list_4k(self, html, tid):
  929. jRoot = json.loads(html)
  930. videos = []
  931. data = jRoot['data']
  932. if data is None:
  933. return []
  934. jsonList = data['list']
  935. for vod in jsonList:
  936. vod_remarks = vod['title']
  937. id = vod['id']
  938. vod = vod['last_video']
  939. img = vod['image']
  940. url = vod['url']
  941. title = vod['title']
  942. brief = vod.get('brief') or ''
  943. year = vod.get('year') or ''
  944. actors = vod.get('actors') or ''
  945. if len(url) == 0:
  946. continue
  947. guids = [tid, title, url, img, id, year, actors, brief]
  948. guid = "||".join(guids)
  949. # print(vod_id)
  950. videos.append({
  951. "vod_id": guid,
  952. "vod_name": title,
  953. "vod_pic": img,
  954. "vod_remarks": vod_remarks
  955. })
  956. return videos
  957. if __name__ == '__main__':
  958. from t4.core.loader import t4_spider_init
  959. spider = Spider()
  960. t4_spider_init(spider)
  961. spider.init_api_ext_file()
  962. # print(spider.homeContent(True))
  963. # print(spider.homeVideoContent())
  964. # url = 'https://api.cntv.cn/lanmu/columnSearch?&fl=&fc=%E6%96%B0%E9%97%BB&cid=&p=1&n=20&serviceId=tvcctv&t=jsonp&cb=Callback'
  965. # url = 'https://api.cntv.cn/lanmu/columnSearch?&fl=&fc=&cid=&p=1&n=20&serviceId=tvcctv&t=json&cb=ko'
  966. # r = spider.fetch(url)
  967. # print(r.text)
  968. # home_content = spider.homeContent(None)
  969. # print(home_content)
  970. cate_content = spider.categoryContent('栏目大全', 1, {'cid': 'n'}, {})
  971. # cate_content = spider.categoryContent('频道直播', 1, None, None)
  972. print(cate_content)
  973. vid = cate_content['list'][0]['vod_id']
  974. print(vid)
  975. detail_content = spider.detailContent([vid])
  976. print(detail_content)
  977. # #
  978. vod_play_from = detail_content['list'][0]['vod_play_from']
  979. vod_play_url = detail_content['list'][0]['vod_play_url']
  980. print(vod_play_from, vod_play_url)
  981. _url = vod_play_url.split('#')[0].split('$')[1]
  982. print(_url)
  983. print('vod_play_from:', vod_play_from, ' vod_play_url:', _url)
  984. play = spider.playerContent(vod_play_from, _url, None)
  985. print(play)
  986. # play = spider.playerContent('道长在线直播', 'cctv1||https://tv.cctv.com/live/cctv1/', None)
  987. # print(play)