cctv央视.py 48 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062
  1. # coding=utf-8
  2. # !/usr/bin/python
  3. import os.path
  4. import random
  5. import sys
  6. sys.path.append('..')
  7. try:
  8. from base.spider import Spider as BaseSpider
  9. #from base.spider import BaseSpider
  10. except ImportError:
  11. from t4.base.spider import BaseSpider
  12. import json
  13. import time
  14. import base64
  15. import datetime
  16. import re
  17. from urllib import request, parse
  18. from pathlib import Path
  19. import urllib
  20. import urllib.request
  21. """
  22. 配置示例:
  23. t4的配置里ext节点会自动变成api对应query参数extend,但t4的ext字符串不支持路径格式,比如./开头或者.json结尾
  24. api里会自动含有ext参数是base64编码后的选中的筛选条件
  25. 错误示例,ext含有json:
  26. {
  27. "key":"hipy_cntv央视",
  28. "name":"cntv央视(hipy_t4)",
  29. "type":4,
  30. "api":"http://192.168.31.49:5707/api/v1/vod/cntv央视?api_ext={{host}}/txt/hipy/cntv央视.json",
  31. "searchable":1,
  32. "quickSearch":1,
  33. "filterable":0,
  34. "ext":"cntv央视.json"
  35. }
  36. 正确示例。同时存在ext和api_ext会优先取ext作为extend加载init
  37. {
  38. "key":"hipy_t4_cntv央视",
  39. "name":"cntv央视(hipy_t4)",
  40. "type":4,
  41. "api":"http://192.168.31.49:5707/api/v1/vod/cntv央视?api_ext={{host}}/txt/hipy/cntv央视.json",
  42. "searchable":1,
  43. "quickSearch":0,
  44. "filterable":1,
  45. "ext":"{{host}}/files/hipy/cntv央视.json"
  46. },
  47. {
  48. "key": "hipy_t3_cntv央视",
  49. "name": "cntv央视(hipy_t3)",
  50. "type": 3,
  51. "api": "{{host}}/txt/hipy/cntv央视.py",
  52. "searchable": 1,
  53. "quickSearch": 0,
  54. "filterable": 1,
  55. "ext": "{{host}}/files/hipy/cntv央视.json"
  56. },
  57. """
  58. class Spider(BaseSpider): # 元类 默认的元类 type
  59. module = None
  60. def getDependence(self):
  61. return ['base_spider']
  62. def getName(self):
  63. return "中央电视台" # 可搜索
  64. def init_api_ext_file(self):
  65. ext_file = __file__.replace('.py', '.json')
  66. print(f'ext_file:{ext_file}')
  67. # 特别节目网页: https://tv.cctv.com/yxg/index.shtml?spm=C28340.PlFTqGe6Zk8M.E2PQtIunpEaz.65
  68. # 特别节目分类筛选获取页面: https://tv.cctv.com/yxg/tbjm/index.shtml
  69. # 纪录片网页: https://tv.cctv.com/yxg/index.shtml?spm=C28340.PlFTqGe6Zk8M.E2PQtIunpEaz.65
  70. # 纪录片分类筛选获取页面:https://tv.cctv.com/yxg/jlp/index.shtml
  71. # ==================== 获取特别节目的筛选条件 ======================
  72. r = self.fetch('https://tv.cctv.com/yxg/tbjm/index.shtml')
  73. html = r.text
  74. html = self.html(html)
  75. filter_tbjm = []
  76. lis = html.xpath('//*[@id="pindao"]/li')
  77. li_value = []
  78. for li in lis:
  79. li_value.append({
  80. 'n': ''.join(li.xpath('./span//text()')),
  81. 'v': ''.join(li.xpath('@datacd')),
  82. })
  83. # print(li_value)
  84. filter_tbjm.append({
  85. "key": "datapd-channel",
  86. "name": "频道",
  87. "value": li_value
  88. })
  89. lis = html.xpath('//*[@id="fenlei"]/li')
  90. li_value = []
  91. for li in lis:
  92. li_value.append({
  93. 'n': ''.join(li.xpath('./span//text()')),
  94. 'v': ''.join(li.xpath('@datalx')),
  95. })
  96. # print(li_value)
  97. filter_tbjm.append({
  98. "key": "datafl-sc",
  99. "name": "类型",
  100. "value": li_value
  101. })
  102. lis = html.xpath('//*[@id="zimu"]/li')
  103. li_value = []
  104. for li in lis:
  105. li_value.append({
  106. 'n': ''.join(li.xpath('./span//text()')),
  107. 'v': ''.join(li.xpath('@datazm')),
  108. })
  109. # print(li_value)
  110. filter_tbjm.append({
  111. "key": "dataszm-letter",
  112. "name": "首字母",
  113. "value": li_value
  114. })
  115. print(filter_tbjm)
  116. # ==================== 纪录片筛选获取 ======================
  117. r = self.fetch('https://tv.cctv.com/yxg/jlp/index.shtml')
  118. html = r.text
  119. html = self.html(html)
  120. filter_jlp = []
  121. lis = html.xpath('//*[@id="pindao"]/li')
  122. li_value = []
  123. for li in lis:
  124. li_value.append({
  125. 'n': ''.join(li.xpath('./span//text()')),
  126. 'v': ''.join(li.xpath('@datacd')),
  127. })
  128. # print(li_value)
  129. filter_jlp.append({
  130. "key": "datapd-channel",
  131. "name": "频道",
  132. "value": li_value
  133. })
  134. lis = html.xpath('//*[@id="fenlei"]/li')
  135. li_value = []
  136. for li in lis:
  137. li_value.append({
  138. 'n': ''.join(li.xpath('./span//text()')),
  139. 'v': ''.join(li.xpath('@datalx')),
  140. })
  141. # print(li_value)
  142. filter_jlp.append({
  143. "key": "datafl-sc",
  144. "name": "类型",
  145. "value": li_value
  146. })
  147. lis = html.xpath('//*[@id="nianfen"]/li')
  148. li_value = []
  149. for li in lis:
  150. li_value.append({
  151. 'n': ''.join(li.xpath('./span//text()')),
  152. 'v': ''.join(li.xpath('@datanf')),
  153. })
  154. # print(li_value)
  155. filter_jlp.append({
  156. "key": "datanf-year",
  157. "name": "年份",
  158. "value": li_value
  159. })
  160. lis = html.xpath('//*[@id="zimu"]/li')
  161. li_value = []
  162. for li in lis:
  163. li_value.append({
  164. 'n': ''.join(li.xpath('./span//text()')),
  165. 'v': ''.join(li.xpath('@datazm')),
  166. })
  167. # print(li_value)
  168. filter_jlp.append({
  169. "key": "dataszm-letter",
  170. "name": "首字母",
  171. "value": li_value
  172. })
  173. print(filter_jlp)
  174. ext_file_dict = {
  175. "特别节目": filter_tbjm,
  176. "纪录片": filter_jlp,
  177. }
  178. # print(json.dumps(ext_file_dict,ensure_ascii=False,indent=4))
  179. with open(ext_file, mode='w+', encoding='utf-8') as f:
  180. # f.write(json.dumps(ext_file_dict,ensure_ascii=False,indent=4))
  181. f.write(json.dumps(ext_file_dict, ensure_ascii=False))
  182. def init(self, extend=""):
  183. def init_file(ext_file):
  184. ext_file = Path(ext_file).as_posix()
  185. # print(f'ext_file:{ext_file}')
  186. if os.path.exists(ext_file):
  187. # print('存在扩展文件')
  188. with open(ext_file, mode='r', encoding='utf-8') as f:
  189. try:
  190. ext_dict = json.loads(f.read())
  191. # print(ext_dict)
  192. self.config['filter'].update(ext_dict)
  193. except Exception as e:
  194. print(f'更新扩展筛选条件发生错误:{e}')
  195. print("============依赖列表:{0}============".format(extend))
  196. ext = self.extend
  197. print("============ext:{0}============".format(ext))
  198. if isinstance(ext, str) and ext:
  199. if ext.startswith('./'):
  200. ext_file = os.path.join(os.path.dirname(__file__), ext)
  201. init_file(ext_file)
  202. elif ext.startswith('http'):
  203. try:
  204. r = self.fetch(ext)
  205. self.config['filter'].update(r.json())
  206. except Exception as e:
  207. print(f'更新扩展筛选条件发生错误:{e}')
  208. elif not ext.startswith('./') and not ext.startswith('http'):
  209. ext_file = os.path.join(os.path.dirname(__file__), './' + ext + '.json')
  210. init_file(ext_file)
  211. # ==================== 栏目大全加载年月筛选 ======================
  212. lanmu_list = self.config['filter']['栏目大全']
  213. lanmu_keys_list = [lanmu['key'] for lanmu in lanmu_list]
  214. if 'year' not in lanmu_keys_list:
  215. currentYear = datetime.date.today().year
  216. yearList = [{"n": "全部", "v": ""}]
  217. for year in range(currentYear, currentYear - 10, -1):
  218. yearList.append({"n": year, "v": year})
  219. yearDict = {"key": "year", "name": "年份", "value": yearList}
  220. lanmu_list.append(yearDict)
  221. if 'month' not in lanmu_keys_list:
  222. monthList = [{"n": "全部", "v": ""}]
  223. for month in range(1, 13):
  224. text = str(month).rjust(2, '0')
  225. monthList.append({"n": text, "v": text})
  226. monthDict = {"key": "month", "name": "月份", "value": monthList}
  227. lanmu_list.append(monthDict)
  228. # 装载模块,这里只要一个就够了
  229. if isinstance(extend, list):
  230. for lib in extend:
  231. if '.Spider' in str(type(lib)):
  232. self.module = lib
  233. break
  234. def isVideoFormat(self, url):
  235. pass
  236. def manualVideoCheck(self):
  237. pass
  238. def homeContent(self, filter):
  239. result = {}
  240. cateManual = {
  241. "4K专区": "4K专区",
  242. "栏目大全": "栏目大全",
  243. "特别节目": "特别节目",
  244. "纪录片": "纪录片",
  245. "电视剧": "电视剧",
  246. "动画片": "动画片",
  247. }
  248. classes = []
  249. for k in cateManual:
  250. classes.append({
  251. 'type_name': k,
  252. 'type_id': cateManual[k]
  253. })
  254. result['class'] = classes
  255. if (filter):
  256. result['filters'] = self.config['filter']
  257. return result
  258. def homeVideoContent(self):
  259. result = {
  260. 'list': []
  261. }
  262. if self.module:
  263. result = self.module.homeVideoContent()
  264. return result
  265. def categoryContent(self, tid, pg, filter, extend):
  266. result = {}
  267. month = "" # 月
  268. year = "" # 年
  269. area = '' # 地区
  270. channel = '' # 频道
  271. datafl = '' # 类型
  272. letter = '' # 字母
  273. year_prefix = '' # 栏目大全的年月筛选过滤
  274. pagecount = 24
  275. if tid == '动画片':
  276. id = urllib.parse.quote(tid)
  277. if 'datadq-area' in extend.keys():
  278. area = urllib.parse.quote(extend['datadq-area'])
  279. if 'dataszm-letter' in extend.keys():
  280. letter = extend['dataszm-letter']
  281. if 'datafl-sc' in extend.keys():
  282. datafl = urllib.parse.quote(extend['datafl-sc'])
  283. url = 'https://api.cntv.cn/list/getVideoAlbumList?channelid=CHAL1460955899450127&area={0}&sc={4}&fc={1}&letter={2}&p={3}&n=24&serviceId=tvcctv&topv=1&t=json'.format(
  284. area, id, letter, pg, datafl)
  285. elif tid == '纪录片':
  286. id = urllib.parse.quote(tid)
  287. if 'datapd-channel' in extend.keys():
  288. channel = urllib.parse.quote(extend['datapd-channel'])
  289. if 'datafl-sc' in extend.keys():
  290. datafl = urllib.parse.quote(extend['datafl-sc'])
  291. if 'datanf-year' in extend.keys():
  292. year = extend['datanf-year']
  293. if 'dataszm-letter' in extend.keys():
  294. letter = extend['dataszm-letter']
  295. url = 'https://api.cntv.cn/list/getVideoAlbumList?channelid=CHAL1460955924871139&fc={0}&channel={1}&sc={2}&year={3}&letter={4}&p={5}&n=24&serviceId=tvcctv&topv=1&t=json'.format(
  296. id, channel, datafl, year, letter, pg)
  297. elif tid == '电视剧':
  298. id = urllib.parse.quote(tid)
  299. if 'datafl-sc' in extend.keys():
  300. datafl = urllib.parse.quote(extend['datafl-sc'])
  301. if 'datanf-year' in extend.keys():
  302. year = extend['datanf-year']
  303. if 'dataszm-letter' in extend.keys():
  304. letter = extend['dataszm-letter']
  305. url = 'https://api.cntv.cn/list/getVideoAlbumList?channelid=CHAL1460955853485115&area={0}&sc={1}&fc={2}&year={3}&letter={4}&p={5}&n=24&serviceId=tvcctv&topv=1&t=json'.format(
  306. area, datafl, id, year, letter, pg)
  307. elif tid == '特别节目':
  308. id = urllib.parse.quote(tid)
  309. if 'datapd-channel' in extend.keys():
  310. channel = urllib.parse.quote(extend['datapd-channel'])
  311. if 'datafl-sc' in extend.keys():
  312. datafl = urllib.parse.quote(extend['datafl-sc'])
  313. if 'dataszm-letter' in extend.keys():
  314. letter = extend['dataszm-letter']
  315. url = 'https://api.cntv.cn/list/getVideoAlbumList?channelid=CHAL1460955953877151&channel={0}&sc={1}&fc={2}&bigday=&letter={3}&p={4}&n=24&serviceId=tvcctv&topv=1&t=json'.format(
  316. channel, datafl, id, letter, pg)
  317. elif tid == '栏目大全':
  318. cid = '' # 频道
  319. if 'cid' in extend.keys():
  320. cid = extend['cid']
  321. fc = '' # 分类
  322. if 'fc' in extend.keys():
  323. fc = extend['fc']
  324. fl = '' # 字母
  325. if 'fl' in extend.keys():
  326. fl = extend['fl']
  327. year = extend.get('year') or ''
  328. month = extend.get('month') or ''
  329. if year:
  330. year_prefix = year + month
  331. url = 'https://api.cntv.cn/lanmu/columnSearch?&fl={0}&fc={1}&cid={2}&p={3}&n=20&serviceId=tvcctv&t=json&cb=ko'.format(
  332. fl, fc, cid, pg)
  333. pagecount = 20
  334. elif tid == '4K专区':
  335. cid = 'CHAL1558416868484111'
  336. url = 'https://api.cntv.cn/NewVideo/getLastVideoList4K?serviceId=cctv4k&cid={0}&p={1}&n={2}&t=json&cb=ko'.format(
  337. cid, pg, pagecount
  338. )
  339. elif tid == '频道直播':
  340. url = 'https://tv.cctv.com/epg/index.shtml'
  341. else:
  342. url = 'https://tv.cctv.com/epg/index.shtml'
  343. videos = []
  344. htmlText = self.fetch(url).text
  345. if tid == '栏目大全':
  346. index = htmlText.rfind(');')
  347. if index > -1:
  348. htmlText = htmlText[3:index]
  349. videos = self.get_list1(html=htmlText, tid=tid, year_prefix=year_prefix)
  350. elif tid == '4K专区':
  351. index = htmlText.rfind(');')
  352. if index > -1:
  353. htmlText = htmlText[3:index]
  354. videos = self.get_list_4k(html=htmlText, tid=tid)
  355. elif tid == '频道直播':
  356. html = self.html(htmlText)
  357. lis = html.xpath('//*[@id="jiemudan01"]//div[contains(@class,"channel_con")]//ul/li')
  358. for li in lis:
  359. vid = ''.join(li.xpath('./img/@title'))
  360. pic = ''.join(li.xpath('./img/@src'))
  361. pic = self.urljoin('https://tv.cctv.com/epg/index.shtml', pic)
  362. videos.append({
  363. 'vod_id': '||'.join([tid, vid, f'https://tv.cctv.com/live/{vid}/', pic]),
  364. 'vod_name': vid,
  365. 'vod_pic': pic,
  366. 'vod_mark': '',
  367. })
  368. else:
  369. videos = self.get_list(html=htmlText, tid=tid)
  370. # print(videos)
  371. result['list'] = videos
  372. result['page'] = pg
  373. result['pagecount'] = 9999 if len(videos) >= pagecount else pg
  374. result['limit'] = 90
  375. result['total'] = 999999
  376. return result
  377. def detailContent(self, array):
  378. result = {}
  379. year_prefix = ''
  380. did = array[0]
  381. if '$$$' in did:
  382. year_prefix = did.split('$$$')[0]
  383. did = did.split('$$$')[1]
  384. aid = did.split('||')
  385. tid = aid[0]
  386. title = aid[1]
  387. lastVideo = aid[2]
  388. logo = aid[3]
  389. if tid == '频道直播':
  390. vod = {
  391. "vod_id": did,
  392. "vod_name": title.replace(' ', ''),
  393. "vod_pic": logo,
  394. "vod_content": f'频道{title}正在直播中',
  395. "vod_play_from": '道长在线直播',
  396. "vod_play_url": f'在线观看${title}||{lastVideo}',
  397. }
  398. result = {'list': [vod]}
  399. return result
  400. id = aid[4]
  401. vod_year = aid[5]
  402. actors = aid[6] if len(aid) > 6 else ''
  403. brief = aid[7] if len(aid) > 7 else '' # get请求最长255,这个描述会有可能直接被干没了。
  404. fromId = 'CCTV'
  405. if tid == "栏目大全":
  406. lastUrl = 'https://api.cntv.cn/video/videoinfoByGuid?guid={0}&serviceId=tvcctv'.format(id)
  407. # htmlTxt = self.webReadFile(urlStr=lastUrl, header=self.header)
  408. htmlTxt = self.fetch(lastUrl).text
  409. topicId = json.loads(htmlTxt)['ctid']
  410. url = 'https://api.cntv.cn/NewVideo/getVideoListByColumn'
  411. # params = {
  412. # 'p': '1',
  413. # 'n': '100',
  414. # 't': 'json',
  415. # 'mode': '0',
  416. # 'sort': 'desc',
  417. # 'serviceId': 'tvcctv',
  418. # 'd': year_prefix,
  419. # 'id': topicId
  420. # }
  421. # htmlTxt = self.fetch(url,data=params).text
  422. Url = "{0}?id={1}&d=&p=1&n=100&sort=desc&mode=0&serviceId=tvcctv&t=json&d={2}".format(
  423. url, topicId, year_prefix)
  424. elif tid == "4K专区":
  425. Url = 'https://api.cntv.cn/NewVideo/getVideoListByAlbumIdNew?id={0}&serviceId=cctv4k&p=1&n=100&mode=0&pub=1'.format(
  426. id)
  427. print(Url)
  428. else:
  429. Url = 'https://api.cntv.cn/NewVideo/getVideoListByAlbumIdNew?id={0}&serviceId=tvcctv&p=1&n=100&mode=0&pub=1'.format(
  430. id)
  431. jRoot = ''
  432. videoList = []
  433. try:
  434. if tid == "搜索":
  435. fromId = '中央台'
  436. videoList = [title + "$" + lastVideo]
  437. else:
  438. # htmlTxt = self.webReadFile(urlStr=Url, header=self.header)
  439. htmlTxt = self.fetch(Url).text
  440. jRoot = json.loads(htmlTxt)
  441. data = jRoot['data']
  442. jsonList = data['list']
  443. videoList = self.get_EpisodesList(jsonList=jsonList)
  444. if len(videoList) < 1:
  445. # htmlTxt = self.webReadFile(urlStr=lastVideo, header=self.header)
  446. htmlTxt = self.fetch(lastVideo).text
  447. if tid == "电视剧" or tid == "纪录片" or tid == "4K专区":
  448. patternTxt = r"'title':\s*'(?P<title>.+?)',\n{0,1}\s*'brief':\s*'(.+?)',\n{0,1}\s*'img':\s*'(.+?)',\n{0,1}\s*'url':\s*'(?P<url>.+?)'"
  449. elif tid == "特别节目":
  450. patternTxt = r'class="tp1"><a\s*href="(?P<url>https://.+?)"\s*target="_blank"\s*title="(?P<title>.+?)"></a></div>'
  451. elif tid == "动画片":
  452. patternTxt = r"'title':\s*'(?P<title>.+?)',\n{0,1}\s*'img':\s*'(.+?)',\n{0,1}\s*'brief':\s*'(.+?)',\n{0,1}\s*'url':\s*'(?P<url>.+?)'"
  453. elif tid == "栏目大全":
  454. patternTxt = r'href="(?P<url>.+?)" target="_blank" alt="(?P<title>.+?)" title=".+?">'
  455. videoList = self.get_EpisodesList_re(htmlTxt=htmlTxt, patternTxt=patternTxt)
  456. fromId = '央视'
  457. except:
  458. pass
  459. if len(videoList) == 0:
  460. return {}
  461. vod = {
  462. "vod_id": did,
  463. "vod_name": title.replace(' ', ''),
  464. "vod_pic": logo,
  465. "type_name": tid,
  466. "vod_year": vod_year,
  467. "vod_area": "",
  468. "vod_remarks": '',
  469. "vod_actor": actors,
  470. "vod_director": '',
  471. "vod_content": brief
  472. }
  473. vod['vod_play_from'] = fromId
  474. vod['vod_play_url'] = "#".join(videoList)
  475. result = {
  476. 'list': [
  477. vod
  478. ]
  479. }
  480. return result
  481. def get_lineList(self, Txt, mark, after):
  482. circuit = []
  483. origin = Txt.find(mark)
  484. while origin > 8:
  485. end = Txt.find(after, origin)
  486. circuit.append(Txt[origin:end])
  487. origin = Txt.find(mark, end)
  488. return circuit
  489. def get_RegexGetTextLine(self, Text, RegexText, Index):
  490. returnTxt = []
  491. pattern = re.compile(RegexText, re.M | re.S)
  492. ListRe = pattern.findall(Text)
  493. if len(ListRe) < 1:
  494. return returnTxt
  495. for value in ListRe:
  496. returnTxt.append(value)
  497. return returnTxt
  498. def searchContent(self, key, quick, pg=1):
  499. key = urllib.parse.quote(key)
  500. Url = 'https://search.cctv.com/ifsearch.php?page=1&qtext={0}&sort=relevance&pageSize=20&type=video&vtime=-1&datepid=1&channel=&pageflag=0&qtext_str={0}'.format(
  501. key)
  502. # htmlTxt = self.webReadFile(urlStr=Url, header=self.header)
  503. htmlTxt = self.fetch(Url).text
  504. videos = self.get_list_search(html=htmlTxt, tid='搜索')
  505. result = {
  506. 'list': videos
  507. }
  508. return result
  509. def playerContent(self, flag, id, vipFlags):
  510. result = {}
  511. url = ''
  512. parse = 0
  513. headers = {
  514. 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1'
  515. }
  516. if flag == 'CCTV':
  517. url = self.get_m3u8(urlTxt=id)
  518. elif flag == '道长在线直播':
  519. # _url = id
  520. title = id.split('||')[0] # 获取标题
  521. _url = f'https://vdn.live.cntv.cn/api2/liveHtml5.do?channel=pc://cctv_p2p_hd{title}&channel_id={title}'
  522. htmlTxt = self.fetch(_url).text
  523. # print(htmlTxt)
  524. vdata = self.regStr(htmlTxt, "var .*?=.*?'(.*?)';")
  525. vdata = self.str2json(vdata)
  526. print(vdata)
  527. url = vdata['hls_url']['hls1']
  528. print(url)
  529. url = self.fixm3u8_url(url)
  530. else:
  531. try:
  532. # htmlTxt = self.webReadFile(urlStr=id, header=self.header)
  533. htmlTxt = self.fetch(id).text
  534. guid = self.get_RegexGetText(Text=htmlTxt, RegexText=r'var\sguid\s*=\s*"(.+?)";', Index=1)
  535. url = self.get_m3u8(urlTxt=guid)
  536. except:
  537. url = id
  538. parse = 1
  539. if url.find('https:') < 0:
  540. url = id
  541. parse = 1
  542. result["parse"] = parse # 1=嗅探,0=播放
  543. result["playUrl"] = ''
  544. result["url"] = url
  545. result["header"] = headers
  546. return result
  547. # 分类抓取地址:
  548. # 栏目大全:https://tv.cctv.com/lm/index.shtml?spm=C28340.Pu9TN9YUsfNZ.E2PQtIunpEaz.24
  549. # 电视剧:https://tv.cctv.com/yxg/index.shtml?spm=C28340.PlFTqGe6Zk8M.E2PQtIunpEaz.65#datacid=dsj&datafl=&datadq=&fc=%E7%94%B5%E8%A7%86%E5%89%A7&datanf=&dataszm=
  550. # 动画片:https://tv.cctv.com/yxg/index.shtml?spm=C28340.PlFTqGe6Zk8M.E2PQtIunpEaz.65#datacid=dhp&datafl=&datadq=&fc=%E5%8A%A8%E7%94%BB%E7%89%87&dataszm=
  551. # 记录片:https://tv.cctv.com/yxg/index.shtml?spm=C28340.PlFTqGe6Zk8M.E2PQtIunpEaz.65#datacid=jlp&datapd=&datafl=&fc=%E7%BA%AA%E5%BD%95%E7%89%87&datanf=&dataszm=
  552. # 特别节目:https://tv.cctv.com/yxg/index.shtml?spm=C28340.PlFTqGe6Zk8M.E2PQtIunpEaz.65#datacid=tbjm&datapd=&datafl=&fc=%E7%89%B9%E5%88%AB%E8%8A%82%E7%9B%AE&datajr=&dataszm=
  553. config = {
  554. "player": {},
  555. "filter": {
  556. "电视剧": [
  557. {"key": "datafl-sc", "name": "类型",
  558. "value": [{"n": "全部", "v": ""}, {"n": "谍战", "v": "谍战"}, {"n": "悬疑", "v": "悬疑"},
  559. {"n": "刑侦", "v": "刑侦"}, {"n": "历史", "v": "历史"}, {"n": "古装", "v": "古装"},
  560. {"n": "武侠", "v": "武侠"}, {"n": "军旅", "v": "军旅"}, {"n": "战争", "v": "战争"},
  561. {"n": "喜剧", "v": "喜剧"}, {"n": "青春", "v": "青春"}, {"n": "言情", "v": "言情"},
  562. {"n": "偶像", "v": "偶像"}, {"n": "家庭", "v": "家庭"}, {"n": "年代", "v": "年代"},
  563. {"n": "革命", "v": "革命"}, {"n": "农村", "v": "农村"}, {"n": "都市", "v": "都市"},
  564. {"n": "其他", "v": "其他"}]},
  565. {"key": "datadq-area", "name": "地区",
  566. "value": [{"n": "全部", "v": ""}, {"n": "中国大陆", "v": "中国大陆"}, {"n": "中国香港", "v": "香港"},
  567. {"n": "美国", "v": "美国"}, {"n": "欧洲", "v": "欧洲"}, {"n": "泰国", "v": "泰国"}]},
  568. {"key": "datanf-year", "name": "年份",
  569. "value": [{"n": "全部", "v": ""}, {"n": "2024", "v": "2024"}, {"n": "2023", "v": "2023"},
  570. {"n": "2022", "v": "2022"},
  571. {"n": "2021", "v": "2021"}, {"n": "2020", "v": "2020"}, {"n": "2019", "v": "2019"},
  572. {"n": "2018", "v": "2018"}, {"n": "2017", "v": "2017"}, {"n": "2016", "v": "2016"},
  573. {"n": "2015", "v": "2015"}, {"n": "2014", "v": "2014"}, {"n": "2013", "v": "2013"},
  574. {"n": "2012", "v": "2012"}, {"n": "2011", "v": "2011"}, {"n": "2010", "v": "2010"},
  575. {"n": "2009", "v": "2009"}, {"n": "2008", "v": "2008"}, {"n": "2007", "v": "2007"},
  576. {"n": "2006", "v": "2006"}, {"n": "2005", "v": "2005"}, {"n": "2004", "v": "2004"},
  577. {"n": "2003", "v": "2003"}, {"n": "2002", "v": "2002"}, {"n": "2001", "v": "2001"},
  578. {"n": "2000", "v": "2000"}, {"n": "1999", "v": "1999"}, {"n": "1998", "v": "1998"},
  579. {"n": "1997", "v": "1997"}]},
  580. {"key": "dataszm-letter", "name": "字母",
  581. "value": [{"n": "全部", "v": ""}, {"n": "A", "v": "A"}, {"n": "C", "v": "C"}, {"n": "E", "v": "E"},
  582. {"n": "F", "v": "F"}, {"n": "G", "v": "G"}, {"n": "H", "v": "H"}, {"n": "I", "v": "I"},
  583. {"n": "J", "v": "J"}, {"n": "K", "v": "K"}, {"n": "L", "v": "L"}, {"n": "M", "v": "M"},
  584. {"n": "N", "v": "N"}, {"n": "O", "v": "O"}, {"n": "P", "v": "P"}, {"n": "Q", "v": "Q"},
  585. {"n": "R", "v": "R"}, {"n": "S", "v": "S"}, {"n": "T", "v": "T"}, {"n": "U", "v": "U"},
  586. {"n": "V", "v": "V"}, {"n": "W", "v": "W"}, {"n": "X", "v": "X"}, {"n": "Y", "v": "Y"},
  587. {"n": "Z", "v": "Z"}, {"n": "0-9", "v": "0-9"}]}
  588. ],
  589. "动画片": [
  590. {"key": "datafl-sc", "name": "类型",
  591. "value": [{"n": "全部", "v": ""}, {"n": "亲子", "v": "亲子"}, {"n": "搞笑", "v": "搞笑"},
  592. {"n": "冒险", "v": "冒险"}, {"n": "动作", "v": "动作"}, {"n": "宠物", "v": "宠物"},
  593. {"n": "体育", "v": "体育"}, {"n": "益智", "v": "益智"}, {"n": "历史", "v": "历史"},
  594. {"n": "教育", "v": "教育"}, {"n": "校园", "v": "校园"}, {"n": "言情", "v": "言情"},
  595. {"n": "武侠", "v": "武侠"}, {"n": "经典", "v": "经典"}, {"n": "未来", "v": "未来"},
  596. {"n": "古代", "v": "古代"}, {"n": "神话", "v": "神话"}, {"n": "真人", "v": "真人"},
  597. {"n": "励志", "v": "励志"}, {"n": "热血", "v": "热血"}, {"n": "奇幻", "v": "奇幻"},
  598. {"n": "童话", "v": "童话"}, {"n": "剧情", "v": "剧情"}, {"n": "夺宝", "v": "夺宝"},
  599. {"n": "其他", "v": "其他"}]},
  600. {"key": "datadq-area", "name": "地区",
  601. "value": [{"n": "全部", "v": ""}, {"n": "中国大陆", "v": "中国大陆"}, {"n": "美国", "v": "美国"},
  602. {"n": "欧洲", "v": "欧洲"}]},
  603. {"key": "dataszm-letter", "name": "字母",
  604. "value": [{"n": "全部", "v": ""}, {"n": "A", "v": "A"}, {"n": "C", "v": "C"}, {"n": "E", "v": "E"},
  605. {"n": "F", "v": "F"}, {"n": "G", "v": "G"}, {"n": "H", "v": "H"}, {"n": "I", "v": "I"},
  606. {"n": "J", "v": "J"}, {"n": "K", "v": "K"}, {"n": "L", "v": "L"}, {"n": "M", "v": "M"},
  607. {"n": "N", "v": "N"}, {"n": "O", "v": "O"}, {"n": "P", "v": "P"}, {"n": "Q", "v": "Q"},
  608. {"n": "R", "v": "R"}, {"n": "S", "v": "S"}, {"n": "T", "v": "T"}, {"n": "U", "v": "U"},
  609. {"n": "V", "v": "V"}, {"n": "W", "v": "W"}, {"n": "X", "v": "X"}, {"n": "Y", "v": "Y"},
  610. {"n": "Z", "v": "Z"}, {"n": "0-9", "v": "0-9"}]}
  611. ],
  612. "纪录片": [
  613. {"key": "datafl-sc", "name": "类型",
  614. "value": [{"n": "全部", "v": ""}, {"n": "人文历史", "v": "人文历史"}, {"n": "人物", "v": "人物"},
  615. {"n": "军事", "v": "军事"}, {"n": "探索", "v": "探索"}, {"n": "社会", "v": "社会"},
  616. {"n": "时政", "v": "时政"}, {"n": "经济", "v": "经济"}, {"n": "科技", "v": "科技"}]},
  617. {"key": "datanf-year", "name": "年份",
  618. "value": [{"n": "全部", "v": ""}, {"n": "2024", "v": "2024"}, {"n": "2023", "v": "2023"},
  619. {"n": "2022", "v": "2022"},
  620. {"n": "2021", "v": "2021"}, {"n": "2020", "v": "2020"}, {"n": "2019", "v": "2019"},
  621. {"n": "2018", "v": "2018"}, {"n": "2017", "v": "2017"}, {"n": "2016", "v": "2016"},
  622. {"n": "2015", "v": "2015"}, {"n": "2014", "v": "2014"}, {"n": "2013", "v": "2013"},
  623. {"n": "2012", "v": "2012"}, {"n": "2011", "v": "2011"}, {"n": "2010", "v": "2010"},
  624. {"n": "2009", "v": "2009"}, {"n": "2008", "v": "2008"}]},
  625. {"key": "dataszm-letter", "name": "字母",
  626. "value": [{"n": "全部", "v": ""}, {"n": "A", "v": "A"}, {"n": "C", "v": "C"}, {"n": "E", "v": "E"},
  627. {"n": "F", "v": "F"}, {"n": "G", "v": "G"}, {"n": "H", "v": "H"}, {"n": "I", "v": "I"},
  628. {"n": "J", "v": "J"}, {"n": "K", "v": "K"}, {"n": "L", "v": "L"}, {"n": "M", "v": "M"},
  629. {"n": "N", "v": "N"}, {"n": "O", "v": "O"}, {"n": "P", "v": "P"}, {"n": "Q", "v": "Q"},
  630. {"n": "R", "v": "R"}, {"n": "S", "v": "S"}, {"n": "T", "v": "T"}, {"n": "U", "v": "U"},
  631. {"n": "V", "v": "V"}, {"n": "W", "v": "W"}, {"n": "X", "v": "X"}, {"n": "Y", "v": "Y"},
  632. {"n": "Z", "v": "Z"}, {"n": "0-9", "v": "0-9"}]}
  633. ],
  634. "特别节目": [
  635. {"key": "datafl-sc", "name": "类型",
  636. "value": [{"n": "全部", "v": ""}, {"n": "全部", "v": "全部"}, {"n": "新闻", "v": "新闻"},
  637. {"n": "经济", "v": "经济"}, {"n": "综艺", "v": "综艺"}, {"n": "体育", "v": "体育"},
  638. {"n": "军事", "v": "军事"}, {"n": "影视", "v": "影视"}, {"n": "科教", "v": "科教"},
  639. {"n": "戏曲", "v": "戏曲"}, {"n": "青少", "v": "青少"}, {"n": "音乐", "v": "音乐"},
  640. {"n": "社会", "v": "社会"}, {"n": "公益", "v": "公益"}, {"n": "其他", "v": "其他"}]},
  641. {"key": "dataszm-letter", "name": "字母",
  642. "value": [{"n": "全部", "v": ""}, {"n": "A", "v": "A"}, {"n": "C", "v": "C"}, {"n": "E", "v": "E"},
  643. {"n": "F", "v": "F"}, {"n": "G", "v": "G"}, {"n": "H", "v": "H"}, {"n": "I", "v": "I"},
  644. {"n": "J", "v": "J"}, {"n": "K", "v": "K"}, {"n": "L", "v": "L"}, {"n": "M", "v": "M"},
  645. {"n": "N", "v": "N"}, {"n": "O", "v": "O"}, {"n": "P", "v": "P"}, {"n": "Q", "v": "Q"},
  646. {"n": "R", "v": "R"}, {"n": "S", "v": "S"}, {"n": "T", "v": "T"}, {"n": "U", "v": "U"},
  647. {"n": "V", "v": "V"}, {"n": "W", "v": "W"}, {"n": "X", "v": "X"}, {"n": "Y", "v": "Y"},
  648. {"n": "Z", "v": "Z"}, {"n": "0-9", "v": "0-9"}]}
  649. ],
  650. "栏目大全": [{"key": "cid", "name": "频道",
  651. "value": [{"n": "全部", "v": ""}, {"n": "CCTV-1综合", "v": "EPGC1386744804340101"},
  652. {"n": "CCTV-2财经", "v": "EPGC1386744804340102"},
  653. {"n": "CCTV-3综艺", "v": "EPGC1386744804340103"},
  654. {"n": "CCTV-4中文国际", "v": "EPGC1386744804340104"},
  655. {"n": "CCTV-5体育", "v": "EPGC1386744804340107"},
  656. {"n": "CCTV-6电影", "v": "EPGC1386744804340108"},
  657. {"n": "CCTV-7国防军事", "v": "EPGC1386744804340109"},
  658. {"n": "CCTV-8电视剧", "v": "EPGC1386744804340110"},
  659. {"n": "CCTV-9纪录", "v": "EPGC1386744804340112"},
  660. {"n": "CCTV-10科教", "v": "EPGC1386744804340113"},
  661. {"n": "CCTV-11戏曲", "v": "EPGC1386744804340114"},
  662. {"n": "CCTV-12社会与法", "v": "EPGC1386744804340115"},
  663. {"n": "CCTV-13新闻", "v": "EPGC1386744804340116"},
  664. {"n": "CCTV-14少儿", "v": "EPGC1386744804340117"},
  665. {"n": "CCTV-15音乐", "v": "EPGC1386744804340118"},
  666. {"n": "CCTV-16奥林匹克", "v": "EPGC1634630207058998"},
  667. {"n": "CCTV-17农业农村", "v": "EPGC1563932742616872"},
  668. {"n": "CCTV-5+体育赛事", "v": "EPGC1468294755566101"}]},
  669. {"key": "fc", "name": "分类",
  670. "value": [{"n": "全部", "v": ""}, {"n": "新闻", "v": "新闻"}, {"n": "体育", "v": "体育"},
  671. {"n": "综艺", "v": "综艺"}, {"n": "健康", "v": "健康"}, {"n": "生活", "v": "生活"},
  672. {"n": "科教", "v": "科教"}, {"n": "经济", "v": "经济"}, {"n": "农业", "v": "农业"},
  673. {"n": "法治", "v": "法治"}, {"n": "军事", "v": "军事"}, {"n": "少儿", "v": "少儿"},
  674. {"n": "动画", "v": "动画"}, {"n": "纪实", "v": "纪实"}, {"n": "戏曲", "v": "戏曲"},
  675. {"n": "音乐", "v": "音乐"}, {"n": "影视", "v": "影视"}]},
  676. {"key": "fl", "name": "字母",
  677. "value": [{"n": "全部", "v": ""}, {"n": "A", "v": "A"}, {"n": "B", "v": "B"},
  678. {"n": "C", "v": "C"}, {"n": "D", "v": "D"}, {"n": "E", "v": "E"},
  679. {"n": "F", "v": "F"}, {"n": "G", "v": "G"}, {"n": "H", "v": "H"},
  680. {"n": "I", "v": "I"}, {"n": "J", "v": "J"}, {"n": "K", "v": "K"},
  681. {"n": "L", "v": "L"}, {"n": "M", "v": "M"}, {"n": "N", "v": "N"},
  682. {"n": "O", "v": "O"}, {"n": "P", "v": "P"}, {"n": "Q", "v": "Q"},
  683. {"n": "R", "v": "R"}, {"n": "S", "v": "S"}, {"n": "T", "v": "T"},
  684. {"n": "U", "v": "U"}, {"n": "V", "v": "V"}, {"n": "W", "v": "W"},
  685. {"n": "X", "v": "X"}, {"n": "Y", "v": "Y"}, {"n": "Z", "v": "Z"}]},
  686. ]
  687. }
  688. }
  689. header = {
  690. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36",
  691. "Host": "tv.cctv.com",
  692. "Referer": "https://tv.cctv.com/"
  693. }
  694. def localProxy(self, params):
  695. return [200, "video/MP2T", ""]
  696. # -----------------------------------------------自定义函数-----------------------------------------------
  697. # 访问网页
  698. def webReadFile(self, urlStr, header):
  699. html = ''
  700. req = urllib.request.Request(url=urlStr) # ,headers=header
  701. with urllib.request.urlopen(req) as response:
  702. html = response.read().decode('utf-8')
  703. return html
  704. # 判断网络地址是否存在
  705. def TestWebPage(self, urlStr, header):
  706. html = ''
  707. req = urllib.request.Request(url=urlStr, method='HEAD') # ,headers=header
  708. with urllib.request.urlopen(req) as response:
  709. html = response.getcode()
  710. return html
  711. # 正则取文本
  712. def get_RegexGetText(self, Text, RegexText, Index):
  713. returnTxt = ""
  714. Regex = re.search(RegexText, Text, re.M | re.S)
  715. if Regex is None:
  716. returnTxt = ""
  717. else:
  718. returnTxt = Regex.group(Index)
  719. return returnTxt
  720. # 取集数
  721. def get_EpisodesList(self, jsonList):
  722. videos = []
  723. for vod in jsonList:
  724. url = vod['guid']
  725. title = vod['title']
  726. if len(url) == 0:
  727. continue
  728. videos.append(title + "$" + url)
  729. return videos
  730. # 取集数
  731. def get_EpisodesList_re(self, htmlTxt, patternTxt):
  732. ListRe = re.finditer(patternTxt, htmlTxt, re.M | re.S)
  733. videos = []
  734. for vod in ListRe:
  735. url = vod.group('url')
  736. title = vod.group('title')
  737. if len(url) == 0:
  738. continue
  739. videos.append(title + "$" + url)
  740. return videos
  741. # 取剧集区
  742. def get_lineList(self, Txt, mark, after):
  743. circuit = []
  744. origin = Txt.find(mark)
  745. while origin > 8:
  746. end = Txt.find(after, origin)
  747. circuit.append(Txt[origin:end])
  748. origin = Txt.find(mark, end)
  749. return circuit
  750. # 正则取文本,返回数组
  751. def get_RegexGetTextLine(self, Text, RegexText, Index):
  752. returnTxt = []
  753. pattern = re.compile(RegexText, re.M | re.S)
  754. ListRe = pattern.findall(Text)
  755. if len(ListRe) < 1:
  756. return returnTxt
  757. for value in ListRe:
  758. returnTxt.append(value)
  759. return returnTxt
  760. # 删除html标签
  761. def removeHtml(self, txt):
  762. soup = re.compile(r'<[^>]+>', re.S)
  763. txt = soup.sub('', txt)
  764. return txt.replace("&nbsp;", " ")
  765. def hookM3u8(self, url):
  766. """
  767. https://www.52pojie.cn/thread-1932358-1-1.html
  768. JavaScript:$.ajaxSettings.async = false; var s = ""; let a = $.get(vodh5player.playerList[0].ads.contentSrc); for (var m = 0; m < a.responseText.match(/asp.*?m3u8/g).length; m++) { s = s + "https://hls.cntv.myalicdn.com//asp" + a.responseText.match(/asp.*?m3u8/g)[m].slice(7) + "\n\n" }; var blob = new Blob([s], { type: "text/plain" }); var url = URL.createObjectURL(blob); window.open(url);
  769. @param url:
  770. @return:
  771. """
  772. url = url or ''
  773. hook1 = lambda x: x.replace('asp/', 'asp//', 1)
  774. hook2 = lambda x: x.replace('hls/', 'hls//', 1)
  775. hook3 = lambda x: x.replace('https://hls.cntv.myhwcdn.cn', 'https://dh5.cntv.myhwcdn.cn/', 1)
  776. hook4 = lambda x: x.replace('https://hls.cntv.myalicdn.com', 'https://dh5.cntv.myhwcdn.cn/', 1)
  777. hook5 = lambda x: x.replace('https://hls.cntv.lxdns.com', 'https://dh5.cntv.myhwcdn.cn/', 1)
  778. hook6 = lambda x: x.replace('http://hls.cntv.kcdnvip.com', 'https://dh5.cntv.myhwcdn.cn/', 1)
  779. hook7 = lambda x: x.replace('https://newcntv.qcloudcdn.com', 'https://dh5.cntv.myhwcdn.cn/', 1)
  780. hook8 = lambda x: x.replace('https://dh5aliwx01.v.cntv.cn', 'https://dh5.cntv.myhwcdn.cn/', 1)
  781. hooks = [hook1, hook2, hook3, hook4, hook5, hook6, hook7, hook8]
  782. hook = random.choice(hooks)
  783. return hook(url)
  784. # 取m3u8
  785. def get_m3u8(self, urlTxt):
  786. """
  787. https://blog.csdn.net/panwang666/article/details/135347859
  788. JavaScript:jQuery.getJSON("https://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid="+guid,function(result){document.writeln(result.hls_url.link(result.hls_url));});
  789. https://newcntv.qcloudcdn.com/asp/hls/main/0303000a/3/default/3628bb15af644f588dc91ec68425b9ac/main.m3u8?maxbr=2048
  790. @param urlTxt:
  791. @return:
  792. """
  793. url = "https://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid={0}".format(urlTxt)
  794. # htmlTxt = self.webReadFile(urlStr=url, header=self.header)
  795. htmlTxt = self.fetch(url).text
  796. jo = json.loads(htmlTxt)
  797. link = jo['hls_url'].strip()
  798. # print('hls_url:',link)
  799. # 获取域名前缀
  800. urlPrefix = self.get_RegexGetText(Text=link, RegexText='(http[s]?://[a-zA-z0-9.]+)/', Index=1)
  801. # 域名前缀指定替换,然后可以获取到更高质量的视频列表
  802. # /asp/h5e/hls/2000/0303000a/3/default/3628bb15af644f588dc91ec68425b9ac/2000.m3u8
  803. new_link = link.replace(f'{urlPrefix}/asp/hls/', 'https://dh5.cntv.myhwcdn.cn/asp/h5e/hls/').split('?')[0]
  804. # print('new_link:',new_link)
  805. html = self.webReadFile(urlStr=new_link, header=self.header)
  806. content = html.strip()
  807. arr = content.split('\n')
  808. subUrl = arr[-1].split('/')
  809. # hdUrl = urlPrefix + arr[-1]
  810. # subUrl[3] = '2000'
  811. # subUrl[-1] = '2000.m3u8'
  812. # hdUrl = urlPrefix + '/'.join(subUrl)
  813. maxVideo = subUrl[-1].replace('.m3u8', '')
  814. hdUrl = link.replace('main', maxVideo)
  815. hdUrl = hdUrl.replace(urlPrefix, 'https://dh5.cntv.myhwcdn.cn/')
  816. hdRsp = self.TestWebPage(urlStr=hdUrl, header=self.header)
  817. if hdRsp == 200:
  818. url = hdUrl.split('?')[0]
  819. url = self.hookM3u8(url)
  820. self.log(f'视频链接: {url}')
  821. else:
  822. url = ''
  823. return url
  824. def fixm3u8_url(self, url):
  825. # 获取域名前缀
  826. urlPrefix = self.get_RegexGetText(Text=url, RegexText='(http[s]?://[a-zA-z0-9.]+)/', Index=1)
  827. # 域名前缀指定替换,然后可以获取到更高质量的视频列表
  828. new_link = url.split('?')[0]
  829. # print(new_link)
  830. html = self.webReadFile(urlStr=new_link, header=self.header)
  831. content = html.strip()
  832. # print(content)
  833. arr = content.split('\n')
  834. subUrl = arr[3] if 'EXT-X-VERSION' in content else arr[2]
  835. hdUrl = self.urljoin(new_link, subUrl).split('?')[0]
  836. # hdUrl = hdUrl.replace(urlPrefix, 'https://dh5.cntv.myhwcdn.cn/')
  837. hdRsp = self.TestWebPage(urlStr=hdUrl, header=self.header)
  838. if hdRsp == 200:
  839. url = hdUrl
  840. self.log(f'视频链接: {url}')
  841. else:
  842. url = ''
  843. return url
  844. # 搜索
  845. def get_list_search(self, html, tid):
  846. jRoot = json.loads(html)
  847. jsonList = jRoot['list']
  848. videos = []
  849. for vod in jsonList:
  850. url = vod['urllink']
  851. title = self.removeHtml(txt=vod['title'])
  852. img = vod['imglink']
  853. id = vod['id']
  854. brief = vod['channel']
  855. year = vod['uploadtime']
  856. if len(url) == 0:
  857. continue
  858. guids = [tid, title, url, img, id, year, '', brief]
  859. guid = "||".join(guids)
  860. videos.append({
  861. "vod_id": guid,
  862. "vod_name": title,
  863. "vod_pic": img,
  864. "vod_remarks": year
  865. })
  866. return videos
  867. def get_list1(self, html, tid, year_prefix=None):
  868. jRoot = json.loads(html)
  869. videos = []
  870. data = jRoot['response']
  871. if data is None:
  872. return []
  873. jsonList = data['docs']
  874. for vod in jsonList:
  875. id = vod['lastVIDE']['videoSharedCode']
  876. desc = vod['lastVIDE']['videoTitle']
  877. title = vod['column_name']
  878. url = vod['column_website']
  879. img = vod['column_logo']
  880. year = vod['column_playdate']
  881. brief = vod['column_brief']
  882. actors = ''
  883. if len(url) == 0:
  884. continue
  885. guids = [tid, title, url, img, id, year, actors, brief]
  886. guid = "||".join(guids)
  887. # print(vod_id)
  888. videos.append({
  889. "vod_id": year_prefix + '$$$' + guid if year_prefix else guid,
  890. "vod_name": title,
  891. "vod_pic": img,
  892. "vod_remarks": desc.split('》')[1].strip() if '》' in desc else desc.strip()
  893. })
  894. # print(videos)
  895. return videos
  896. # 分类取结果
  897. def get_list(self, html, tid):
  898. jRoot = json.loads(html)
  899. videos = []
  900. data = jRoot['data']
  901. if data is None:
  902. return []
  903. jsonList = data['list']
  904. for vod in jsonList:
  905. url = vod['url']
  906. title = vod['title']
  907. img = vod['image']
  908. id = vod['id']
  909. try:
  910. brief = vod['brief']
  911. except:
  912. brief = ''
  913. try:
  914. year = vod['year']
  915. except:
  916. year = ''
  917. try:
  918. actors = vod['actors']
  919. except:
  920. actors = ''
  921. if len(url) == 0:
  922. continue
  923. guids = [tid, title, url, img, id, year, actors, brief]
  924. guid = "||".join(guids)
  925. # print(vod_id)
  926. videos.append({
  927. "vod_id": guid,
  928. "vod_name": title,
  929. "vod_pic": img,
  930. "vod_remarks": ''
  931. })
  932. return videos
  933. # 4k分类取结果
  934. def get_list_4k(self, html, tid):
  935. jRoot = json.loads(html)
  936. videos = []
  937. data = jRoot['data']
  938. if data is None:
  939. return []
  940. jsonList = data['list']
  941. for vod in jsonList:
  942. vod_remarks = vod['title']
  943. id = vod['id']
  944. vod = vod['last_video']
  945. img = vod['image']
  946. url = vod['url']
  947. title = vod['title']
  948. brief = vod.get('brief') or ''
  949. year = vod.get('year') or ''
  950. actors = vod.get('actors') or ''
  951. if len(url) == 0:
  952. continue
  953. guids = [tid, title, url, img, id, year, actors, brief]
  954. guid = "||".join(guids)
  955. # print(vod_id)
  956. videos.append({
  957. "vod_id": guid,
  958. "vod_name": title,
  959. "vod_pic": img,
  960. "vod_remarks": vod_remarks
  961. })
  962. return videos
  963. if __name__ == '__main__':
  964. from t4.core.loader import t4_spider_init
  965. spider = Spider()
  966. t4_spider_init(spider)
  967. # print(spider.homeContent(True))
  968. # print(spider.homeVideoContent())
  969. # spider.init_api_ext_file()
  970. # url = 'https://api.cntv.cn/lanmu/columnSearch?&fl=&fc=%E6%96%B0%E9%97%BB&cid=&p=1&n=20&serviceId=tvcctv&t=jsonp&cb=Callback'
  971. # url = 'https://api.cntv.cn/lanmu/columnSearch?&fl=&fc=&cid=&p=1&n=20&serviceId=tvcctv&t=json&cb=ko'
  972. # r = spider.fetch(url)
  973. # print(r.text)
  974. # home_content = spider.homeContent(None)
  975. # print(home_content)
  976. cate_content = spider.categoryContent('栏目大全', 1, {'cid': 'n'}, {})
  977. # cate_content = spider.categoryContent('频道直播', 1, None, None)
  978. print(cate_content)
  979. vid = cate_content['list'][0]['vod_id']
  980. print(vid)
  981. detail_content = spider.detailContent([vid])
  982. print(detail_content)
  983. # #
  984. vod_play_from = detail_content['list'][0]['vod_play_from']
  985. vod_play_url = detail_content['list'][0]['vod_play_url']
  986. print(vod_play_from, vod_play_url)
  987. _url = vod_play_url.split('#')[0].split('$')[1]
  988. print(_url)
  989. print('vod_play_from:', vod_play_from, ' vod_play_url:', _url)
  990. play = spider.playerContent(vod_play_from, _url, None)
  991. print(play)
  992. # play = spider.playerContent('道长在线直播', 'cctv1||https://tv.cctv.com/live/cctv1/', None)
  993. # print(play)