py_cctv_2025.py 24 KB


  1. #coding=utf-8
  2. #!/usr/bin/python
  3. import sys
  4. sys.path.append('..')
  5. from base.spider import Spider
  6. import json
  7. import time
  8. import base64
  9. import re
  10. from urllib import request, parse
  11. import urllib
  12. import urllib.request
  13. import time
  14. class Spider(Spider): # 元类 默认的元类 type
  15. def getName(self):
  16. return "中央电视台"#可搜索
  17. def init(self,extend=""):
  18. print("============{0}============".format(extend))
  19. pass
  20. def isVideoFormat(self,url):
  21. pass
  22. def manualVideoCheck(self):
  23. pass
  24. def homeContent(self,filter):
  25. result = {}
  26. cateManual = {
  27. "电视剧": "电视剧",
  28. "动画片": "动画片",
  29. "纪录片": "纪录片",
  30. "特别节目": "特别节目",
  31. "节目大全":"节目大全"
  32. }
  33. classes = []
  34. for k in cateManual:
  35. classes.append({
  36. 'type_name':k,
  37. 'type_id':cateManual[k]
  38. })
  39. result['class'] = classes
  40. if(filter):
  41. result['filters'] = self.config['filter']
  42. return result
  43. def homeVideoContent(self):
  44. result = {
  45. 'list':[]
  46. }
  47. return result
  48. def categoryContent(self,tid,pg,filter,extend):
  49. result = {}
  50. month = ""#月
  51. year = ""#年
  52. area=''#地区
  53. channel=''#频道
  54. datafl=''#类型
  55. letter=''#字母
  56. pagecount=24
  57. if tid=='动画片':
  58. id=urllib.parse.quote(tid)
  59. if 'datadq-area' in extend.keys():
  60. area=urllib.parse.quote(extend['datadq-area'])
  61. if 'dataszm-letter' in extend.keys():
  62. letter=extend['dataszm-letter']
  63. if 'datafl-sc' in extend.keys():
  64. datafl=urllib.parse.quote(extend['datafl-sc'])
  65. url='https://api.cntv.cn/list/getVideoAlbumList?channelid=CHAL1460955899450127&area={0}&sc={4}&fc={1}&letter={2}&p={3}&n=24&serviceId=tvcctv&topv=1&t=json'.format(area,id,letter,pg,datafl)
  66. elif tid=='纪录片':
  67. id=urllib.parse.quote(tid)
  68. if 'datapd-channel' in extend.keys():
  69. channel=urllib.parse.quote(extend['datapd-channel'])
  70. if 'datafl-sc' in extend.keys():
  71. datafl=urllib.parse.quote(extend['datafl-sc'])
  72. if 'datanf-year' in extend.keys():
  73. year=extend['datanf-year']
  74. if 'dataszm-letter' in extend.keys():
  75. letter=extend['dataszm-letter']
  76. url='https://api.cntv.cn/list/getVideoAlbumList?channelid=CHAL1460955924871139&fc={0}&channel={1}&sc={2}&year={3}&letter={4}&p={5}&n=24&serviceId=tvcctv&topv=1&t=json'.format(id,channel,datafl,year,letter,pg)
  77. elif tid=='电视剧':
  78. id=urllib.parse.quote(tid)
  79. if 'datafl-sc' in extend.keys():
  80. datafl=urllib.parse.quote(extend['datafl-sc'])
  81. if 'datanf-year' in extend.keys():
  82. year=extend['datanf-year']
  83. if 'dataszm-letter' in extend.keys():
  84. letter=extend['dataszm-letter']
  85. url='https://api.cntv.cn/list/getVideoAlbumList?channelid=CHAL1460955853485115&area={0}&sc={1}&fc={2}&year={3}&letter={4}&p={5}&n=24&serviceId=tvcctv&topv=1&t=json'.format(area,datafl,id,year,letter,pg)
  86. elif tid=='特别节目':
  87. id=urllib.parse.quote(tid)
  88. if 'datapd-channel' in extend.keys():
  89. channel=urllib.parse.quote(extend['datapd-channel'])
  90. if 'datafl-sc' in extend.keys():
  91. datafl=urllib.parse.quote(extend['datafl-sc'])
  92. if 'dataszm-letter' in extend.keys():
  93. letter=extend['dataszm-letter']
  94. url='https://api.cntv.cn/list/getVideoAlbumList?channelid=CHAL1460955953877151&channel={0}&sc={1}&fc={2}&bigday=&letter={3}&p={4}&n=24&serviceId=tvcctv&topv=1&t=json'.format(channel,datafl,id,letter,pg)
  95. elif tid=='节目大全':
  96. cid=''#频道
  97. if 'cid' in extend.keys():
  98. cid=extend['cid']
  99. fc=''#分类
  100. if 'fc' in extend.keys():
  101. fc=extend['fc']
  102. fl=''#字母
  103. if 'fl' in extend.keys():
  104. fl=extend['fl']
  105. url = 'https://api.cntv.cn/lanmu/columnSearch?&fl={0}&fc={1}&cid={2}&p={3}&n=20&serviceId=tvcctv&t=json&cb=ko'.format(fl,fc,cid,pg)
  106. pagecount=20
  107. else:
  108. url = 'https://tv.cctv.com/epg/index.shtml'
  109. videos=[]
  110. htmlText =self.webReadFile(urlStr=url,header=self.header)
  111. if tid=='节目大全':
  112. index=htmlText.rfind(');')
  113. if index>-1:
  114. htmlText=htmlText[3:index]
  115. videos =self.get_list1(html=htmlText,tid=tid)
  116. else:
  117. videos =self.get_list(html=htmlText,tid=tid)
  118. #print(videos)
  119. result['list'] = videos
  120. result['page'] = pg
  121. result['pagecount'] = 9999 if len(videos)>=pagecount else pg
  122. result['limit'] = 90
  123. result['total'] = 999999
  124. return result
  125. def detailContent(self, array):
  126. result = {}
  127. year_prefix = ''
  128. did = array[0]
  129. if '$$$' in did:
  130. year_prefix = did.split('$$$')[0]
  131. did = did.split('$$$')[1]
  132. aid = did.split('||')
  133. tid = aid[0]
  134. title = aid[1]
  135. lastVideo = aid[2]
  136. logo = aid[3]
  137. if tid == '频道直播':
  138. vod = {
  139. "vod_id": did,
  140. "vod_name": title.replace(' ', ''),
  141. "vod_pic": logo,
  142. "vod_content": f'频道{title}正在直播中',
  143. "vod_play_from": '道长在线直播',
  144. "vod_play_url": f'在线观看${title}||{lastVideo}',
  145. }
  146. result = {'list': [vod]}
  147. return result
  148. vod_year = aid[5]
  149. actors = aid[6] if len(aid) > 6 else ''
  150. brief = aid[7] if len(aid) > 7 else '' # get请求最长255,这个描述会有可能直接被干没了。
  151. fromId = 'CCTV'
  152. if tid == "栏目大全":
  153. lastUrl = 'https://api.cntv.cn/video/videoinfoByGuid?guid={0}&serviceId=tvcctv'.format(id)
  154. htmlTxt = self.webReadFile(urlStr=lastUrl, header=self.header)
  155. topicId = json.loads(htmlTxt)['ctid']
  156. Url = "https://api.cntv.cn/NewVideo/getVideoListByColumn?id={0}&d=&p=1&n=100&sort=desc&mode=0&serviceId=tvcctv&t=json".format(
  157. topicId)
  158. htmlTxt = self.webReadFile(urlStr=Url, header=self.header)
  159. else:
  160. Url = 'https://api.cntv.cn/NewVideo/getVideoListByAlbumIdNew?id={0}&serviceId=tvcctv&p=1&n=100&mode=0&pub=1'.format(
  161. id)
  162. jRoot = ''
  163. videoList = []
  164. try:
  165. if tid == "搜索":
  166. fromId = '中央台'
  167. videoList = [title + "$" + lastVideo]
  168. else:
  169. htmlTxt = self.webReadFile(urlStr=Url, header=self.header)
  170. jRoot = json.loads(htmlTxt)
  171. data = jRoot['data']
  172. jsonList = data['list']
  173. videoList = self.get_EpisodesList(jsonList=jsonList)
  174. if len(videoList) < 1:
  175. htmlTxt = self.webReadFile(urlStr=lastVideo, header=self.header)
  176. if tid == "电视剧" or tid == "纪录片":
  177. patternTxt = r"'title':\s*'(?P<title>.+?)',\n{0,1}\s*'brief':\s*'(.+?)',\n{0,1}\s*'img':\s*'(.+?)',\n{0,1}\s*'url':\s*'(?P<url>.+?)'"
  178. elif tid == "特别节目":
  179. patternTxt = r'class="tp1"><a\s*href="(?P<url>https://.+?)"\s*target="_blank"\s*title="(?P<title>.+?)"></a></div>'
  180. elif tid == "动画片":
  181. patternTxt = r"'title':\s*'(?P<title>.+?)',\n{0,1}\s*'img':\s*'(.+?)',\n{0,1}\s*'brief':\s*'(.+?)',\n{0,1}\s*'url':\s*'(?P<url>.+?)'"
  182. elif tid == "栏目大全":
  183. patternTxt = r'href="(?P<url>.+?)" target="_blank" alt="(?P<title>.+?)" title=".+?">'
  184. videoList = self.get_EpisodesList_re(htmlTxt=htmlTxt, patternTxt=patternTxt)
  185. fromId = '央视'
  186. except:
  187. pass
  188. if len(videoList) == 0:
  189. return {}
  190. vod = {
  191. "vod_id": array[0],
  192. "vod_name": title,
  193. "vod_pic": logo,
  194. "type_name": tid,
  195. "vod_year": vod_year,
  196. "vod_area": "",
  197. "vod_remarks": '',
  198. "vod_actor": actors,
  199. "vod_director": '',
  200. "vod_content": brief
  201. }
  202. vod['vod_play_from'] = fromId
  203. vod['vod_play_url'] = "#".join(videoList)
  204. result = {
  205. 'list': [
  206. vod
  207. ]
  208. }
  209. return result
  210. config = {
  211. "player": {},
  212. "filter": {
  213. "电视剧":[
  214. {"key":"datafl-sc","name":"类型","value":[{"n":"全部","v":""},{"n":"谍战","v":"谍战"},{"n":"悬疑","v":"悬疑"},{"n":"刑侦","v":"刑侦"},{"n":"历史","v":"历史"},{"n":"古装","v":"古装"},{"n":"武侠","v":"武侠"},{"n":"军旅","v":"军旅"},{"n":"战争","v":"战争"},{"n":"喜剧","v":"喜剧"},{"n":"青春","v":"青春"},{"n":"言情","v":"言情"},{"n":"偶像","v":"偶像"},{"n":"家庭","v":"家庭"},{"n":"年代","v":"年代"},{"n":"革命","v":"革命"},{"n":"农村","v":"农村"},{"n":"都市","v":"都市"},{"n":"其他","v":"其他"}]},
  215. {"key":"datadq-area","name":"地区","value":[{"n":"全部","v":""},{"n":"中国大陆","v":"中国大陆"},{"n":"中国香港","v":"香港"},{"n":"美国","v":"美国"},{"n":"欧洲","v":"欧洲"},{"n":"泰国","v":"泰国"}]},
  216. {"key":"datanf-year","name":"年份","value":[{"n":"全部","v":""},{"n":"2024","v":"2024"},{"n":"2023","v":"2023"},{"n":"2022","v":"2022"},{"n":"2021","v":"2021"},{"n":"2020","v":"2020"},{"n":"2019","v":"2019"},{"n":"2018","v":"2018"},{"n":"2017","v":"2017"},{"n":"2016","v":"2016"},{"n":"2015","v":"2015"},{"n":"2014","v":"2014"},{"n":"2013","v":"2013"},{"n":"2012","v":"2012"},{"n":"2011","v":"2011"},{"n":"2010","v":"2010"},{"n":"2009","v":"2009"},{"n":"2008","v":"2008"},{"n":"2007","v":"2007"},{"n":"2006","v":"2006"},{"n":"2005","v":"2005"},{"n":"2004","v":"2004"},{"n":"2003","v":"2003"},{"n":"2002","v":"2002"},{"n":"2001","v":"2001"},{"n":"2000","v":"2000"},{"n":"1999","v":"1999"},{"n":"1998","v":"1998"},{"n":"1997","v":"1997"}]},
  217. {"key":"dataszm-letter","name":"字母","value":[{"n":"全部","v":""},{"n":"A","v":"A"},{"n":"C","v":"C"},{"n":"E","v":"E"},{"n":"F","v":"F"},{"n":"G","v":"G"},{"n":"H","v":"H"},{"n":"I","v":"I"},{"n":"J","v":"J"},{"n":"K","v":"K"},{"n":"L","v":"L"},{"n":"M","v":"M"},{"n":"N","v":"N"},{"n":"O","v":"O"},{"n":"P","v":"P"},{"n":"Q","v":"Q"},{"n":"R","v":"R"},{"n":"S","v":"S"},{"n":"T","v":"T"},{"n":"U","v":"U"},{"n":"V","v":"V"},{"n":"W","v":"W"},{"n":"X","v":"X"},{"n":"Y","v":"Y"},{"n":"Z","v":"Z"},{"n":"0-9","v":"0-9"}]}
  218. ],
  219. "动画片":[
  220. {"key":"datafl-sc","name":"类型","value":[{"n":"全部","v":""},{"n":"亲子","v":"亲子"},{"n":"搞笑","v":"搞笑"},{"n":"冒险","v":"冒险"},{"n":"动作","v":"动作"},{"n":"宠物","v":"宠物"},{"n":"体育","v":"体育"},{"n":"益智","v":"益智"},{"n":"历史","v":"历史"},{"n":"教育","v":"教育"},{"n":"校园","v":"校园"},{"n":"言情","v":"言情"},{"n":"武侠","v":"武侠"},{"n":"经典","v":"经典"},{"n":"未来","v":"未来"},{"n":"古代","v":"古代"},{"n":"神话","v":"神话"},{"n":"真人","v":"真人"},{"n":"励志","v":"励志"},{"n":"热血","v":"热血"},{"n":"奇幻","v":"奇幻"},{"n":"童话","v":"童话"},{"n":"剧情","v":"剧情"},{"n":"夺宝","v":"夺宝"},{"n":"其他","v":"其他"}]},
  221. {"key":"datadq-area","name":"地区","value":[{"n":"全部","v":""},{"n":"中国大陆","v":"中国大陆"},{"n":"美国","v":"美国"},{"n":"欧洲","v":"欧洲"}]},
  222. {"key":"dataszm-letter","name":"字母","value":[{"n":"全部","v":""},{"n":"A","v":"A"},{"n":"C","v":"C"},{"n":"E","v":"E"},{"n":"F","v":"F"},{"n":"G","v":"G"},{"n":"H","v":"H"},{"n":"I","v":"I"},{"n":"J","v":"J"},{"n":"K","v":"K"},{"n":"L","v":"L"},{"n":"M","v":"M"},{"n":"N","v":"N"},{"n":"O","v":"O"},{"n":"P","v":"P"},{"n":"Q","v":"Q"},{"n":"R","v":"R"},{"n":"S","v":"S"},{"n":"T","v":"T"},{"n":"U","v":"U"},{"n":"V","v":"V"},{"n":"W","v":"W"},{"n":"X","v":"X"},{"n":"Y","v":"Y"},{"n":"Z","v":"Z"},{"n":"0-9","v":"0-9"}]}
  223. ],
  224. "纪录片":[
  225. {"key":"datapd-channel","name":"频道","value":[{"n":"全部","v":""},{"n":"CCTV1 综合","v":"CCTV1 综合"},{"n":"CCTV2 财经","v":"CCTV2 财经"},{"n":"CCTV3 综艺","v":"CCTV3 综艺"},{"n":"CCTV4 中文国际","v":"CCTV4 中文国际"},{"n":"CCTV5 体育","v":"CCTV5 体育"},{"n":"CCTV6 电影","v":"CCTV6 电影"},{"n":"CCTV7 国防军事","v":"CCTV7 国防军事"},{"n":"CCTV8 电视剧","v":"CCTV8 电视剧"},{"n":"CCTV9 纪录","v":"CCTV9 纪录"},{"n":"CCTV10 科教","v":"CCTV10 科教"},{"n":"CCTV11 戏曲","v":"CCTV11 戏曲"},{"n":"CCTV12 社会与法","v":"CCTV12 社会与法"},{"n":"CCTV13 新闻","v":"CCTV13 新闻"},{"n":"CCTV14 少儿","v":"CCTV14 少儿"},{"n":"CCTV15 音乐","v":"CCTV15 音乐"},{"n":"CCTV17 农业农村","v":"CCTV17 农业农村"}]},
  226. {"key":"datafl-sc","name":"类型","value":[{"n":"全部","v":""},{"n":"人文历史","v":"人文历史"},{"n":"人物","v":"人物"},{"n":"军事","v":"军事"},{"n":"探索","v":"探索"},{"n":"社会","v":"社会"},{"n":"时政","v":"时政"},{"n":"经济","v":"经济"},{"n":"科技","v":"科技"}]},
  227. {"key":"datanf-year","name":"年份","value":[{"n":"全部","v":""},{"n":"2024","v":"2024"},{"n":"2023","v":"2023"},{"n":"2022","v":"2022"},{"n":"2021","v":"2021"},{"n":"2020","v":"2020"},{"n":"2019","v":"2019"},{"n":"2018","v":"2018"},{"n":"2017","v":"2017"},{"n":"2016","v":"2016"},{"n":"2015","v":"2015"},{"n":"2014","v":"2014"},{"n":"2013","v":"2013"},{"n":"2012","v":"2012"},{"n":"2011","v":"2011"},{"n":"2010","v":"2010"},{"n":"2009","v":"2009"},{"n":"2008","v":"2008"}]},
  228. {"key":"dataszm-letter","name":"字母","value":[{"n":"全部","v":""},{"n":"A","v":"A"},{"n":"C","v":"C"},{"n":"E","v":"E"},{"n":"F","v":"F"},{"n":"G","v":"G"},{"n":"H","v":"H"},{"n":"I","v":"I"},{"n":"J","v":"J"},{"n":"K","v":"K"},{"n":"L","v":"L"},{"n":"M","v":"M"},{"n":"N","v":"N"},{"n":"O","v":"O"},{"n":"P","v":"P"},{"n":"Q","v":"Q"},{"n":"R","v":"R"},{"n":"S","v":"S"},{"n":"T","v":"T"},{"n":"U","v":"U"},{"n":"V","v":"V"},{"n":"W","v":"W"},{"n":"X","v":"X"},{"n":"Y","v":"Y"},{"n":"Z","v":"Z"},{"n":"0-9","v":"0-9"}]}
  229. ],
  230. "特别节目":[
  231. {"key":"datapd-channel","name":"频道","value":[{"n":"全部","v":""},{"n":"CCTV1 综合","v":"CCTV1 综合"},{"n":"CCTV2 财经","v":"CCTV2 财经"},{"n":"CCTV3 综艺","v":"CCTV3 综艺"},{"n":"CCTV4 中文国际","v":"CCTV4 中文国际"},{"n":"CCTV5 体育","v":"CCTV5 体育"},{"n":"CCTV6 电影","v":"CCTV6 电影"},{"n":"CCTV7 国防军事","v":"CCTV7 国防军事"},{"n":"CCTV8 电视剧","v":"CCTV8 电视剧"},{"n":"CCTV9 纪录","v":"CCTV9 纪录"},{"n":"CCTV10 科教","v":"CCTV10 科教"},{"n":"CCTV11 戏曲","v":"CCTV11 戏曲"},{"n":"CCTV12 社会与法","v":"CCTV12 社会与法"},{"n":"CCTV13 新闻","v":"CCTV13 新闻"},{"n":"CCTV14 少儿","v":"CCTV14 少儿"},{"n":"CCTV15 音乐","v":"CCTV15 音乐"},{"n":"CCTV17 农业农村","v":"CCTV17 农业农村"}]},
  232. {"key":"datafl-sc","name":"类型","value":[{"n":"全部","v":""},{"n":"全部","v":"全部"},{"n":"新闻","v":"新闻"},{"n":"经济","v":"经济"},{"n":"综艺","v":"综艺"},{"n":"体育","v":"体育"},{"n":"军事","v":"军事"},{"n":"影视","v":"影视"},{"n":"科教","v":"科教"},{"n":"戏曲","v":"戏曲"},{"n":"青少","v":"青少"},{"n":"音乐","v":"音乐"},{"n":"社会","v":"社会"},{"n":"公益","v":"公益"},{"n":"其他","v":"其他"}]},
  233. {"key":"dataszm-letter","name":"字母","value":[{"n":"全部","v":""},{"n":"A","v":"A"},{"n":"C","v":"C"},{"n":"E","v":"E"},{"n":"F","v":"F"},{"n":"G","v":"G"},{"n":"H","v":"H"},{"n":"I","v":"I"},{"n":"J","v":"J"},{"n":"K","v":"K"},{"n":"L","v":"L"},{"n":"M","v":"M"},{"n":"N","v":"N"},{"n":"O","v":"O"},{"n":"P","v":"P"},{"n":"Q","v":"Q"},{"n":"R","v":"R"},{"n":"S","v":"S"},{"n":"T","v":"T"},{"n":"U","v":"U"},{"n":"V","v":"V"},{"n":"W","v":"W"},{"n":"X","v":"X"},{"n":"Y","v":"Y"},{"n":"Z","v":"Z"},{"n":"0-9","v":"0-9"}]}
  234. ],
  235. "节目大全":[{"key":"cid","name":"频道","value":[{"n":"全部","v":""},{"n":"CCTV-1综合","v":"EPGC1386744804340101"},{"n":"CCTV-2财经","v":"EPGC1386744804340102"},{"n":"CCTV-3综艺","v":"EPGC1386744804340103"},{"n":"CCTV-4中文国际","v":"EPGC1386744804340104"},{"n":"CCTV-5体育","v":"EPGC1386744804340107"},{"n":"CCTV-6电影","v":"EPGC1386744804340108"},{"n":"CCTV-7国防军事","v":"EPGC1386744804340109"},{"n":"CCTV-8电视剧","v":"EPGC1386744804340110"},{"n":"CCTV-9纪录","v":"EPGC1386744804340112"},{"n":"CCTV-10科教","v":"EPGC1386744804340113"},{"n":"CCTV-11戏曲","v":"EPGC1386744804340114"},{"n":"CCTV-12社会与法","v":"EPGC1386744804340115"},{"n":"CCTV-13新闻","v":"EPGC1386744804340116"},{"n":"CCTV-14少儿","v":"EPGC1386744804340117"},{"n":"CCTV-15音乐","v":"EPGC1386744804340118"},{"n":"CCTV-16奥林匹克","v":"EPGC1634630207058998"},{"n":"CCTV-17农业农村","v":"EPGC1563932742616872"},{"n":"CCTV-5+体育赛事","v":"EPGC1468294755566101"}]},{"key":"fc","name":"分类","value":[{"n":"全部","v":""},{"n":"新闻","v":"新闻"},{"n":"体育","v":"体育"},{"n":"综艺","v":"综艺"},{"n":"健康","v":"健康"},{"n":"生活","v":"生活"},{"n":"科教","v":"科教"},{"n":"经济","v":"经济"},{"n":"农业","v":"农业"},{"n":"法治","v":"法治"},{"n":"军事","v":"军事"},{"n":"少儿","v":"少儿"},{"n":"动画","v":"动画"},{"n":"纪实","v":"纪实"},{"n":"戏曲","v":"戏曲"},{"n":"音乐","v":"音乐"},{"n":"影视","v":"影视"}]},{"key":"fl","name":"字母","value":[{"n":"全部","v":""},{"n":"A","v":"A"},{"n":"B","v":"B"},{"n":"C","v":"C"},{"n":"D","v":"D"},{"n":"E","v":"E"},{"n":"F","v":"F"},{"n":"G","v":"G"},{"n":"H","v":"H"},{"n":"I","v":"I"},{"n":"J","v":"J"},{"n":"K","v":"K"},{"n":"L","v":"L"},{"n":"M","v":"M"},{"n":"N","v":"N"},{"n":"O","v":"O"},{"n":"P","v":"P"},{"n":"Q","v":"Q"},{"n":"R","v":"R"},{"n":"S","v":"S"},{"n":"T","v":"T"},{"n":"U","v":"U"},{"n":"V","v":"V"},{"n":"W","v":"W"},{"n":"X","v":"X"},{"n":"Y","v":"Y"},{"n":"Z","v":"Z"}]},{"key":"year","name":"年份","value":[{"n":"全部","v":""},{"n":"2024","v":"2024"},{"n":"2023","v":"2023"},{"n":"2022","v":"2022"},{"n":"2021","v":"2021"},{"n":"2020","v":"2020"},{"n":"2019","v":"2019"},{"n":"2018","v":"2018"},{"n":"2017","v":"2017"},{"n":"2016","v":"2016"},{"n":"2015","v":"2015"},{"n":"2014","v":"2014"},{"n":"2013","v":"2013"},{"n":"2012","v":"2012"},{"n":"2011","v":"2011"},{"n":"2010","v":"2010"},{"n":"2009","v":"2009"},{"n":"2008","v":"2008"},{"n":"2007","v":"2007"},{"n":"2006","v":"2006"},{"n":"2005","v":"2005"},{"n":"2004","v":"2004"},{"n":"2003","v":"2003"},{"n":"2002","v":"2002"},{"n":"2001","v":"2001"},{"n":"2000","v":"2000"}]},{"key":"month","name":"月份","value":[{"n":"全部","v":""},{"n":"12","v":"12"},{"n":"11","v":"11"},{"n":"10","v":"10"},{"n":"09","v":"09"},{"n":"08","v":"08"},{"n":"07","v":"07"},{"n":"06","v":"06"},{"n":"05","v":"05"},{"n":"04","v":"04"},{"n":"03","v":"03"},{"n":"02","v":"02"},{"n":"01","v":"01"}]}]
  236. }
  237. }
  238. header = {
  239. "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36",
  240. "Host": "tv.cctv.com",
  241. "Referer": "https://tv.cctv.com/"
  242. }
  243. def localProxy(self,param):
  244. return [200, "video/MP2T", action, ""]
  245. #-----------------------------------------------自定义函数-----------------------------------------------
  246. #访问网页
  247. def webReadFile(self,urlStr,header):
  248. html=''
  249. req=urllib.request.Request(url=urlStr)#,headers=header
  250. with urllib.request.urlopen(req) as response:
  251. html = response.read().decode('utf-8')
  252. return html
  253. #判断网络地址是否存在
  254. def TestWebPage(self,urlStr,header):
  255. html=''
  256. req=urllib.request.Request(url=urlStr,method='HEAD')#,headers=header
  257. with urllib.request.urlopen(req) as response:
  258. html = response.getcode ()
  259. return html
  260. #正则取文本
  261. def get_RegexGetText(self,Text,RegexText,Index):
  262. returnTxt=""
  263. Regex=re.search(RegexText, Text, re.M|re.S)
  264. if Regex is None:
  265. returnTxt=""
  266. else:
  267. returnTxt=Regex.group(Index)
  268. return returnTxt
  269. #取集数
  270. def get_EpisodesList(self,jsonList):
  271. videos=[]
  272. for vod in jsonList:
  273. url = vod['guid']
  274. title =vod['title']
  275. if len(url) == 0:
  276. continue
  277. videos.append(title+"$"+url)
  278. return videos
  279. #取集数
  280. def get_EpisodesList_re(self,htmlTxt,patternTxt):
  281. ListRe=re.finditer(patternTxt, htmlTxt, re.M|re.S)
  282. videos=[]
  283. for vod in ListRe:
  284. url = vod.group('url')
  285. title =vod.group('title')
  286. if len(url) == 0:
  287. continue
  288. videos.append(title+"$"+url)
  289. return videos
  290. #取剧集区
  291. def get_lineList(self,Txt,mark,after):
  292. circuit=[]
  293. origin=Txt.find(mark)
  294. while origin>8:
  295. end=Txt.find(after,origin)
  296. circuit.append(Txt[origin:end])
  297. origin=Txt.find(mark,end)
  298. return circuit
  299. #正则取文本,返回数组
  300. def get_RegexGetTextLine(self,Text,RegexText,Index):
  301. returnTxt=[]
  302. pattern = re.compile(RegexText, re.M|re.S)
  303. ListRe=pattern.findall(Text)
  304. if len(ListRe)<1:
  305. return returnTxt
  306. for value in ListRe:
  307. returnTxt.append(value)
  308. return returnTxt
  309. #删除html标签
  310. def removeHtml(self,txt):
  311. soup = re.compile(r'<[^>]+>',re.S)
  312. txt =soup.sub('', txt)
  313. return txt.replace("&nbsp;"," ")
  314. # 链接替换
  315. def hookM3u8(self, url):
  316. url = url or ''
  317. hook1 = lambda x: x.replace('asp/', 'asp//', 1)
  318. hook2 = lambda x: x.replace('hls/', 'hls//', 1)
  319. hook3 = lambda x: x.replace('https://newcntv.qcloudcdn.com', 'https://hls.cntv.myalicdn.com/', 1)
  320. hooks = [hook1, hook2, hook3]
  321. hook = random.choice(hooks)
  322. return hook(url)
  323. # 取m3u8
  324. def get_m3u8(self, urlTxt):
  325. url = "https://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid={0}".format(urlTxt)
  326. html = self.webReadFile(urlStr=url, header=self.header)
  327. jo = json.loads(html)
  328. link = jo['hls_url'].strip()
  329. urlPrefix = self.get_RegexGetText(Text=link, RegexText='(http[s]?://[a-zA-z0-9.]+)/', Index=1)
  330. new_link = link.replace(f'{urlPrefix}/asp/hls/', 'https://dh5.cntv.qcloudcdn.com/asp/h5e/hls/').split('?')[0]
  331. html = self.webReadFile(urlStr=new_link, header=self.header)
  332. content = html.strip()
  333. arr = content.split('\n')
  334. subUrl = arr[-1].split('/')
  335. maxVideo = subUrl[-1].replace('.m3u8', '')
  336. hdUrl = link.replace('main', maxVideo)
  337. hdUrl = hdUrl.replace(urlPrefix, 'https://newcntv.qcloudcdn.com')
  338. hdRsp = self.TestWebPage(urlStr=hdUrl, header=self.header)
  339. if hdRsp == 200:
  340. url = hdUrl.split('?')[0]
  341. url = self.hookM3u8(url)
  342. self.log(f'视频链接: {url}')
  343. else:
  344. url = ''
  345. return url
  346. def fixm3u8_url(self, url):
  347. # 获取域名前缀
  348. urlPrefix = self.get_RegexGetText(Text=url, RegexText='(http[s]?://[a-zA-z0-9.]+)/', Index=1)
  349. # 域名前缀指定替换,然后可以获取到更高质量的视频列表
  350. new_link = url.split('?')[0]
  351. # print(new_link)
  352. html = self.webReadFile(urlStr=new_link, header=self.header)
  353. content = html.strip()
  354. # print(content)
  355. arr = content.split('\n')
  356. subUrl = arr[3] if 'EXT-X-VERSION' in content else arr[2]
  357. hdUrl = self.urljoin(new_link, subUrl).split('?')[0]
  358. # hdUrl = hdUrl.replace(urlPrefix, 'https://newcntv.qcloudcdn.com')
  359. hdRsp = self.TestWebPage(urlStr=hdUrl, header=self.header)
  360. if hdRsp == 200:
  361. url = hdUrl
  362. self.log(f'视频链接: {url}')
  363. else:
  364. url = ''
  365. return url
  366. #搜索
  367. def get_list_search(self,html,tid):
  368. jRoot = json.loads(html)
  369. jsonList=jRoot['list']
  370. videos=[]
  371. for vod in jsonList:
  372. url = vod['urllink']
  373. title =self.removeHtml(txt=vod['title'])
  374. img=vod['imglink']
  375. id=vod['id']
  376. brief=vod['channel']
  377. year=vod['uploadtime']
  378. if len(url) == 0:
  379. continue
  380. guid="{0}###{1}###{2}###{3}###{4}###{5}###{6}###{7}".format(tid,title,url,img,id,year,'',brief)
  381. videos.append({
  382. "vod_id":guid,
  383. "vod_name":title,
  384. "vod_pic":img,
  385. "vod_remarks":year
  386. })
  387. return videos
  388. return videos
  389. def get_list1(self,html,tid):
  390. jRoot = json.loads(html)
  391. videos = []
  392. data=jRoot['response']
  393. if data is None:
  394. return []
  395. jsonList=data['docs']
  396. for vod in jsonList:
  397. id = vod['lastVIDE']['videoSharedCode']
  398. title =vod['column_name']
  399. url=vod['column_website']
  400. img=vod['column_logo']
  401. year=vod['column_playdate']
  402. brief=vod['column_brief']
  403. actors=''
  404. if len(url) == 0:
  405. continue
  406. guid="{0}###{1}###{2}###{3}###{4}###{5}###{6}###{7}".format(tid,title,url,img,id,year,actors,brief)
  407. #print(vod_id)
  408. videos.append({
  409. "vod_id":guid,
  410. "vod_name":title,
  411. "vod_pic":img,
  412. "vod_remarks":''
  413. })
  414. #print(videos)
  415. return videos
  416. #分类取结果
  417. def get_list(self,html,tid):
  418. jRoot = json.loads(html)
  419. videos = []
  420. data=jRoot['data']
  421. if data is None:
  422. return []
  423. jsonList=data['list']
  424. for vod in jsonList:
  425. url = vod['url']
  426. title =vod['title']
  427. img=vod['image']
  428. id=vod['id']
  429. try:
  430. brief=vod['brief']
  431. except:
  432. brief=''
  433. try:
  434. year=vod['year']
  435. except:
  436. year=''
  437. try:
  438. actors=vod['actors']
  439. except:
  440. actors=''
  441. if len(url) == 0:
  442. continue
  443. guid="{0}###{1}###{2}###{3}###{4}###{5}###{6}###{7}".format(tid,title,url,img,id,year,actors,brief)
  444. #print(vod_id)
  445. videos.append({
  446. "vod_id":guid,
  447. "vod_name":title,
  448. "vod_pic":img,
  449. "vod_remarks":''
  450. })
  451. return videos