py_cctv_test_1126.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551
  1. #coding=utf-8
  2. #!/usr/bin/python
  3. import sys
  4. sys.path.append('..')
  5. from base.spider import Spider
  6. import json
  7. import time
  8. import base64
  9. import re
  10. from urllib import request, parse
  11. import urllib
  12. import urllib.request
  13. import time
  14. class Spider(Spider): # 元类 默认的元类 type
  15. def getName(self):
  16. return "中央电视台"#可搜索
  17. def init(self,extend=""):
  18. print("============{0}============".format(extend))
  19. pass
  20. def isVideoFormat(self,url):
  21. pass
  22. def manualVideoCheck(self):
  23. pass
  24. def homeContent(self,filter):
  25. result = {}
  26. cateManual = {
  27. "电视剧": "电视剧",
  28. "动画片": "动画片",
  29. "纪录片": "纪录片",
  30. "特别节目": "特别节目",
  31. "节目大全":"节目大全"
  32. }
  33. classes = []
  34. for k in cateManual:
  35. classes.append({
  36. 'type_name':k,
  37. 'type_id':cateManual[k]
  38. })
  39. result['class'] = classes
  40. if(filter):
  41. result['filters'] = self.config['filter']
  42. return result
  43. def homeVideoContent(self):
  44. result = {
  45. 'list':[]
  46. }
  47. return result
  48. def categoryContent(self,tid,pg,filter,extend):
  49. result = {}
  50. month = ""#月
  51. year = ""#年
  52. area=''#地区
  53. channel=''#频道
  54. datafl=''#类型
  55. letter=''#字母
  56. pagecount=24
  57. if tid=='动画片':
  58. id=urllib.parse.quote(tid)
  59. if 'datadq-area' in extend.keys():
  60. area=urllib.parse.quote(extend['datadq-area'])
  61. if 'dataszm-letter' in extend.keys():
  62. letter=extend['dataszm-letter']
  63. if 'datafl-sc' in extend.keys():
  64. datafl=urllib.parse.quote(extend['datafl-sc'])
  65. url='https://api.cntv.cn/list/getVideoAlbumList?channelid=CHAL1460955899450127&area={0}&sc={4}&fc={1}&letter={2}&p={3}&n=24&serviceId=tvcctv&topv=1&t=json'.format(area,id,letter,pg,datafl)
  66. elif tid=='纪录片':
  67. id=urllib.parse.quote(tid)
  68. if 'datapd-channel' in extend.keys():
  69. channel=urllib.parse.quote(extend['datapd-channel'])
  70. if 'datafl-sc' in extend.keys():
  71. datafl=urllib.parse.quote(extend['datafl-sc'])
  72. if 'datanf-year' in extend.keys():
  73. year=extend['datanf-year']
  74. if 'dataszm-letter' in extend.keys():
  75. letter=extend['dataszm-letter']
  76. url='https://api.cntv.cn/list/getVideoAlbumList?channelid=CHAL1460955924871139&fc={0}&channel={1}&sc={2}&year={3}&letter={4}&p={5}&n=24&serviceId=tvcctv&topv=1&t=json'.format(id,channel,datafl,year,letter,pg)
  77. elif tid=='电视剧':
  78. id=urllib.parse.quote(tid)
  79. if 'datafl-sc' in extend.keys():
  80. datafl=urllib.parse.quote(extend['datafl-sc'])
  81. if 'datanf-year' in extend.keys():
  82. year=extend['datanf-year']
  83. if 'dataszm-letter' in extend.keys():
  84. letter=extend['dataszm-letter']
  85. url='https://api.cntv.cn/list/getVideoAlbumList?channelid=CHAL1460955853485115&area={0}&sc={1}&fc={2}&year={3}&letter={4}&p={5}&n=24&serviceId=tvcctv&topv=1&t=json'.format(area,datafl,id,year,letter,pg)
  86. elif tid=='特别节目':
  87. id=urllib.parse.quote(tid)
  88. if 'datapd-channel' in extend.keys():
  89. channel=urllib.parse.quote(extend['datapd-channel'])
  90. if 'datafl-sc' in extend.keys():
  91. datafl=urllib.parse.quote(extend['datafl-sc'])
  92. if 'dataszm-letter' in extend.keys():
  93. letter=extend['dataszm-letter']
  94. url='https://api.cntv.cn/list/getVideoAlbumList?channelid=CHAL1460955953877151&channel={0}&sc={1}&fc={2}&bigday=&letter={3}&p={4}&n=24&serviceId=tvcctv&topv=1&t=json'.format(channel,datafl,id,letter,pg)
  95. elif tid=='节目大全':
  96. cid=''#频道
  97. if 'cid' in extend.keys():
  98. cid=extend['cid']
  99. fc=''#分类
  100. if 'fc' in extend.keys():
  101. fc=extend['fc']
  102. fl=''#字母
  103. if 'fl' in extend.keys():
  104. fl=extend['fl']
  105. url = 'https://api.cntv.cn/lanmu/columnSearch?&fl={0}&fc={1}&cid={2}&p={3}&n=20&serviceId=tvcctv&t=json&cb=ko'.format(fl,fc,cid,pg)
  106. pagecount=20
  107. else:
  108. url = 'https://tv.cctv.com/epg/index.shtml'
  109. videos=[]
  110. htmlText =self.webReadFile(urlStr=url,header=self.header)
  111. if tid=='节目大全':
  112. index=htmlText.rfind(');')
  113. if index>-1:
  114. htmlText=htmlText[3:index]
  115. videos =self.get_list1(html=htmlText,tid=tid)
  116. else:
  117. videos =self.get_list(html=htmlText,tid=tid)
  118. #print(videos)
  119. result['list'] = videos
  120. result['page'] = pg
  121. result['pagecount'] = 9999 if len(videos)>=pagecount else pg
  122. result['limit'] = 90
  123. result['total'] = 999999
  124. return result
  125. def detailContent(self,array):
  126. result={}
  127. aid = array[0].split('###')
  128. tid = aid[0]
  129. logo = aid[3]
  130. lastVideo = aid[2]
  131. title = aid[1]
  132. if len(aid)>4:
  133. id= aid[4]
  134. vod_year= aid[5]
  135. actors= aid[6]
  136. brief= aid[7]
  137. fromId='CCTV'
  138. if tid=="节目大全":
  139. lastUrl = 'https://api.cntv.cn/video/videoinfoByGuid?guid={0}&serviceId=tvcctv'.format(id)
  140. htmlTxt = self.webReadFile(urlStr=lastUrl,header=self.header)
  141. topicId=json.loads(htmlTxt)['ctid']
  142. Url = "https://api.cntv.cn/NewVideo/getVideoListByColumn?id={0}&d=&p=1&n=100&sort=desc&mode=0&serviceId=tvcctv&t=json".format(topicId)
  143. htmlTxt = self.webReadFile(urlStr=Url,header=self.header)
  144. elif tid=='搜索栏目':
  145. htmlTxt = self.webReadFile(urlStr=lastVideo,header=self.header)
  146. topicId=self.get_RegexGetText(Text=htmlTxt,RegexText=r"var (column_id|topicID)\s*=\s*(\"|')(.+?)(\"|');",Index=3)
  147. if len(topicId)<4:
  148. return {'list':[]}
  149. Url = "https://api.cntv.cn/NewVideo/getVideoListByColumn?id={0}&d=&p=1&n=100&sort=desc&mode=0&serviceId=tvcctv&t=json".format(topicId)
  150. vod_year=''
  151. actors=''
  152. brief=''
  153. else:
  154. Url='https://api.cntv.cn/NewVideo/getVideoListByAlbumIdNew?id={0}&serviceId=tvcctv&p=1&n=100&mode=0&pub=1'.format(id)
  155. jRoot = ''
  156. videoList = []
  157. try:
  158. if tid=="搜索":
  159. fromId='中央台'
  160. videoList=[title+"$"+lastVideo]
  161. else:
  162. htmlTxt=self.webReadFile(urlStr=Url,header=self.header)
  163. if htmlTxt.find('list')>4:
  164. jRoot = json.loads(htmlTxt)
  165. data=jRoot['data']
  166. jsonList=data['list']
  167. videoList=self.get_EpisodesList(jsonList=jsonList)
  168. # return {}
  169. if len(videoList)<1:
  170. htmlTxt=self.webReadFile(urlStr=lastVideo,header=self.header)
  171. patternTxt=[r"'title':\s*'(?P<title>.+?)',\n{0,1}\s*'brief':\s*'(.+?)',\n{0,1}\s*'img':\s*'(.+?)',\n{0,1}\s*'url':\s*'(?P<url>.+?)'",r'class="tp1"><a\s*href="(?P<url>https://.+?)"\s*target="_blank"\s*title="(?P<title>.+?)"></a></div>',r"'title':\s*'(?P<title>.+?)',\n{0,1}\s*'img':\s*'(.+?)',\n{0,1}\s*'brief':\s*'(.+?)',\n{0,1}\s*'url':\s*'(?P<url>.+?)'",r'href="(?P<url>.+?)" target="_blank" alt="(?P<title>.+?)" title=".+?">']
  172. fromId='央视'
  173. if tid=="电视剧" or tid=="纪录片" or tid=='搜索栏目':
  174. videoList=self.get_EpisodesList_re(htmlTxt=htmlTxt,patternTxt=patternTxt[0])
  175. elif tid=="特别节目":
  176. videoList=self.get_EpisodesList_re(htmlTxt=htmlTxt,patternTxt=patternTxt[1])
  177. elif tid=="动画片":
  178. videoList=self.get_EpisodesList_re(htmlTxt=htmlTxt,patternTxt=patternTxt[2])
  179. elif tid=="节目大全":
  180. videoList=self.get_EpisodesList_re(htmlTxt=htmlTxt,patternTxt=patternTxt[3])
  181. if len(videoList)<1:
  182. for vod in patternTxt:
  183. videoList=self.get_EpisodesList_re(htmlTxt=htmlTxt,patternTxt=vod)
  184. print(len(videoList))
  185. if len(videoList)>0:
  186. break
  187. except:
  188. pass
  189. if len(videoList) == 0:
  190. return {}
  191. vod = {
  192. "vod_id":array[0],
  193. "vod_name":title,
  194. "vod_pic":logo,
  195. "type_name":tid,
  196. "vod_year":vod_year,
  197. "vod_area":"",
  198. "vod_remarks":'',
  199. "vod_actor":actors,
  200. "vod_director":'',
  201. "vod_content":brief
  202. }
  203. vod['vod_play_from'] = fromId
  204. vod['vod_play_url'] = "#".join(videoList)
  205. result = {
  206. 'list':[
  207. vod
  208. ]
  209. }
  210. return result
  211. def get_lineList(self,Txt,mark,after):
  212. circuit=[]
  213. origin=Txt.find(mark)
  214. while origin>8:
  215. end=Txt.find(after,origin)
  216. circuit.append(Txt[origin:end])
  217. origin=Txt.find(mark,end)
  218. return circuit
  219. def get_RegexGetTextLine(self,Text,RegexText,Index):
  220. returnTxt=[]
  221. pattern = re.compile(RegexText, re.M|re.S)
  222. ListRe=pattern.findall(Text)
  223. if len(ListRe)<1:
  224. return returnTxt
  225. for value in ListRe:
  226. returnTxt.append(value)
  227. return returnTxt
  228. def searchContent(self,key,quick):
  229. key=urllib.parse.quote(key)
  230. Url='https://search.cctv.com/ifsearch.php?page=1&qtext={0}&sort=relevance&pageSize=20&type=video&vtime=-1&datepid=1&channel=&pageflag=0&qtext_str={0}'.format(key)
  231. htmlTxt=self.webReadFile(urlStr='https://search.cctv.com/search.php?qtext={0}&type=video'.format(key),header=self.header)
  232. videos=self.get_list_search_column(html=htmlTxt,tid='搜索栏目')
  233. # htmlTxt=self.webReadFile(urlStr=Url,header=self.header)
  234. # videos=self.get_list_search(html=htmlTxt,tid='搜索')
  235. result = {
  236. 'list':videos
  237. }
  238. return result
  239. def playerContent(self,flag,id,vipFlags):
  240. result = {}
  241. url=''
  242. parse=0
  243. headers = {
  244. 'User-Agent':'Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1'
  245. }
  246. if flag=='CCTV':
  247. url=self.get_m3u8(urlTxt=id)
  248. else:
  249. try:
  250. html=self.webReadFile(urlStr=id,header=self.header)
  251. if html.find('window.location.href')>5:
  252. url=self.get_RegexGetText(Text=html,RegexText=r"href\s*=\s*(\"|')(.+?)(\"|')",Index=2)
  253. html=self.webReadFile(urlStr=url,header=self.header)
  254. if url!='':
  255. id=url
  256. guid=self.get_RegexGetText(Text=html,RegexText=r'var\sguid\s*=\s*"(.+?)";',Index=1)
  257. url=self.get_m3u8(urlTxt=guid)
  258. except :
  259. url=id
  260. parse=1
  261. if url.find('https:')<0:
  262. url=id
  263. parse=1
  264. result["parse"] = parse#1=嗅探,0=播放
  265. result["playUrl"] = ''
  266. result["url"] = url
  267. result["header"] =headers
  268. return result
  269. config = {
  270. "player": {},
  271. "filter": {
  272. "电视剧":[
  273. {"key":"datafl-sc","name":"类型","value":[{"n":"全部","v":""},{"n":"谍战","v":"谍战"},{"n":"悬疑","v":"悬疑"},{"n":"刑侦","v":"刑侦"},{"n":"历史","v":"历史"},{"n":"古装","v":"古装"},{"n":"武侠","v":"武侠"},{"n":"军旅","v":"军旅"},{"n":"战争","v":"战争"},{"n":"喜剧","v":"喜剧"},{"n":"青春","v":"青春"},{"n":"言情","v":"言情"},{"n":"偶像","v":"偶像"},{"n":"家庭","v":"家庭"},{"n":"年代","v":"年代"},{"n":"革命","v":"革命"},{"n":"农村","v":"农村"},{"n":"都市","v":"都市"},{"n":"其他","v":"其他"}]},
  274. {"key":"datadq-area","name":"地区","value":[{"n":"全部","v":""},{"n":"中国大陆","v":"中国大陆"},{"n":"中国香港","v":"香港"},{"n":"美国","v":"美国"},{"n":"欧洲","v":"欧洲"},{"n":"泰国","v":"泰国"}]},
  275. {"key":"datanf-year","name":"年份","value":[{"n":"全部","v":""},{"n":"2023","v":"2023"},{"n":"2022","v":"2022"},{"n":"2021","v":"2021"},{"n":"2020","v":"2020"},{"n":"2019","v":"2019"},{"n":"2018","v":"2018"},{"n":"2017","v":"2017"},{"n":"2016","v":"2016"},{"n":"2015","v":"2015"},{"n":"2014","v":"2014"},{"n":"2013","v":"2013"},{"n":"2012","v":"2012"},{"n":"2011","v":"2011"},{"n":"2010","v":"2010"},{"n":"2009","v":"2009"},{"n":"2008","v":"2008"},{"n":"2007","v":"2007"},{"n":"2006","v":"2006"},{"n":"2005","v":"2005"},{"n":"2004","v":"2004"},{"n":"2003","v":"2003"},{"n":"2002","v":"2002"},{"n":"2001","v":"2001"},{"n":"2000","v":"2000"},{"n":"1999","v":"1999"},{"n":"1998","v":"1998"},{"n":"1997","v":"1997"}]},
  276. {"key":"dataszm-letter","name":"字母","value":[{"n":"全部","v":""},{"n":"A","v":"A"},{"n":"C","v":"C"},{"n":"E","v":"E"},{"n":"F","v":"F"},{"n":"G","v":"G"},{"n":"H","v":"H"},{"n":"I","v":"I"},{"n":"J","v":"J"},{"n":"K","v":"K"},{"n":"L","v":"L"},{"n":"M","v":"M"},{"n":"N","v":"N"},{"n":"O","v":"O"},{"n":"P","v":"P"},{"n":"Q","v":"Q"},{"n":"R","v":"R"},{"n":"S","v":"S"},{"n":"T","v":"T"},{"n":"U","v":"U"},{"n":"V","v":"V"},{"n":"W","v":"W"},{"n":"X","v":"X"},{"n":"Y","v":"Y"},{"n":"Z","v":"Z"},{"n":"0-9","v":"0-9"}]}
  277. ],
  278. "动画片":[
  279. {"key":"datafl-sc","name":"类型","value":[{"n":"全部","v":""},{"n":"亲子","v":"亲子"},{"n":"搞笑","v":"搞笑"},{"n":"冒险","v":"冒险"},{"n":"动作","v":"动作"},{"n":"宠物","v":"宠物"},{"n":"体育","v":"体育"},{"n":"益智","v":"益智"},{"n":"历史","v":"历史"},{"n":"教育","v":"教育"},{"n":"校园","v":"校园"},{"n":"言情","v":"言情"},{"n":"武侠","v":"武侠"},{"n":"经典","v":"经典"},{"n":"未来","v":"未来"},{"n":"古代","v":"古代"},{"n":"神话","v":"神话"},{"n":"真人","v":"真人"},{"n":"励志","v":"励志"},{"n":"热血","v":"热血"},{"n":"奇幻","v":"奇幻"},{"n":"童话","v":"童话"},{"n":"剧情","v":"剧情"},{"n":"夺宝","v":"夺宝"},{"n":"其他","v":"其他"}]},
  280. {"key":"datadq-area","name":"地区","value":[{"n":"全部","v":""},{"n":"中国大陆","v":"中国大陆"},{"n":"美国","v":"美国"},{"n":"欧洲","v":"欧洲"}]},
  281. {"key":"dataszm-letter","name":"字母","value":[{"n":"全部","v":""},{"n":"A","v":"A"},{"n":"C","v":"C"},{"n":"E","v":"E"},{"n":"F","v":"F"},{"n":"G","v":"G"},{"n":"H","v":"H"},{"n":"I","v":"I"},{"n":"J","v":"J"},{"n":"K","v":"K"},{"n":"L","v":"L"},{"n":"M","v":"M"},{"n":"N","v":"N"},{"n":"O","v":"O"},{"n":"P","v":"P"},{"n":"Q","v":"Q"},{"n":"R","v":"R"},{"n":"S","v":"S"},{"n":"T","v":"T"},{"n":"U","v":"U"},{"n":"V","v":"V"},{"n":"W","v":"W"},{"n":"X","v":"X"},{"n":"Y","v":"Y"},{"n":"Z","v":"Z"},{"n":"0-9","v":"0-9"}]}
  282. ],
  283. "纪录片":[
  284. {"key":"datapd-channel","name":"频道","value":[{"n":"全部","v":""},{"n":"CCTV{1 综合","v":"CCTV{1 综合"},{"n":"CCTV{2 财经","v":"CCTV{2 财经"},{"n":"CCTV{3 综艺","v":"CCTV{3 综艺"},{"n":"CCTV{4 中文国际","v":"CCTV{4 中文国际"},{"n":"CCTV{5 体育","v":"CCTV{5 体育"},{"n":"CCTV{6 电影","v":"CCTV{6 电影"},{"n":"CCTV{7 国防军事","v":"CCTV{7 国防军事"},{"n":"CCTV{8 电视剧","v":"CCTV{8 电视剧"},{"n":"CCTV{9 纪录","v":"CCTV{9 纪录"},{"n":"CCTV{10 科教","v":"CCTV{10 科教"},{"n":"CCTV{11 戏曲","v":"CCTV{11 戏曲"},{"n":"CCTV{12 社会与法","v":"CCTV{12 社会与法"},{"n":"CCTV{13 新闻","v":"CCTV{13 新闻"},{"n":"CCTV{14 少儿","v":"CCTV{14 少儿"},{"n":"CCTV{15 音乐","v":"CCTV{15 音乐"},{"n":"CCTV{17 农业农村","v":"CCTV{17 农业农村"}]},
  285. {"key":"datafl-sc","name":"类型","value":[{"n":"全部","v":""},{"n":"人文历史","v":"人文历史"},{"n":"人物","v":"人物"},{"n":"军事","v":"军事"},{"n":"探索","v":"探索"},{"n":"社会","v":"社会"},{"n":"时政","v":"时政"},{"n":"经济","v":"经济"},{"n":"科技","v":"科技"}]},
  286. {"key":"datanf-year","name":"年份","value":[{"n":"全部","v":""},{"n":"2023","v":"2023"},{"n":"2022","v":"2022"},{"n":"2021","v":"2021"},{"n":"2020","v":"2020"},{"n":"2019","v":"2019"},{"n":"2018","v":"2018"},{"n":"2017","v":"2017"},{"n":"2016","v":"2016"},{"n":"2015","v":"2015"},{"n":"2014","v":"2014"},{"n":"2013","v":"2013"},{"n":"2012","v":"2012"},{"n":"2011","v":"2011"},{"n":"2010","v":"2010"},{"n":"2009","v":"2009"},{"n":"2008","v":"2008"}]},
  287. {"key":"dataszm-letter","name":"字母","value":[{"n":"全部","v":""},{"n":"A","v":"A"},{"n":"C","v":"C"},{"n":"E","v":"E"},{"n":"F","v":"F"},{"n":"G","v":"G"},{"n":"H","v":"H"},{"n":"I","v":"I"},{"n":"J","v":"J"},{"n":"K","v":"K"},{"n":"L","v":"L"},{"n":"M","v":"M"},{"n":"N","v":"N"},{"n":"O","v":"O"},{"n":"P","v":"P"},{"n":"Q","v":"Q"},{"n":"R","v":"R"},{"n":"S","v":"S"},{"n":"T","v":"T"},{"n":"U","v":"U"},{"n":"V","v":"V"},{"n":"W","v":"W"},{"n":"X","v":"X"},{"n":"Y","v":"Y"},{"n":"Z","v":"Z"},{"n":"0-9","v":"0-9"}]}
  288. ],
  289. "特别节目":[
  290. {"key":"datapd-channel","name":"频道","value":[{"n":"全部","v":""},{"n":"CCTV{1 综合","v":"CCTV{1 综合"},{"n":"CCTV{2 财经","v":"CCTV{2 财经"},{"n":"CCTV{3 综艺","v":"CCTV{3 综艺"},{"n":"CCTV{4 中文国际","v":"CCTV{4 中文国际"},{"n":"CCTV{5 体育","v":"CCTV{5 体育"},{"n":"CCTV{6 电影","v":"CCTV{6 电影"},{"n":"CCTV{7 国防军事","v":"CCTV{7 国防军事"},{"n":"CCTV{8 电视剧","v":"CCTV{8 电视剧"},{"n":"CCTV{9 纪录","v":"CCTV{9 纪录"},{"n":"CCTV{10 科教","v":"CCTV{10 科教"},{"n":"CCTV{11 戏曲","v":"CCTV{11 戏曲"},{"n":"CCTV{12 社会与法","v":"CCTV{12 社会与法"},{"n":"CCTV{13 新闻","v":"CCTV{13 新闻"},{"n":"CCTV{14 少儿","v":"CCTV{14 少儿"},{"n":"CCTV{15 音乐","v":"CCTV{15 音乐"},{"n":"CCTV{17 农业农村","v":"CCTV{17 农业农村"}]},
  291. {"key":"datafl-sc","name":"类型","value":[{"n":"全部","v":""},{"n":"全部","v":"全部"},{"n":"新闻","v":"新闻"},{"n":"经济","v":"经济"},{"n":"综艺","v":"综艺"},{"n":"体育","v":"体育"},{"n":"军事","v":"军事"},{"n":"影视","v":"影视"},{"n":"科教","v":"科教"},{"n":"戏曲","v":"戏曲"},{"n":"青少","v":"青少"},{"n":"音乐","v":"音乐"},{"n":"社会","v":"社会"},{"n":"公益","v":"公益"},{"n":"其他","v":"其他"}]},
  292. {"key":"dataszm-letter","name":"字母","value":[{"n":"全部","v":""},{"n":"A","v":"A"},{"n":"C","v":"C"},{"n":"E","v":"E"},{"n":"F","v":"F"},{"n":"G","v":"G"},{"n":"H","v":"H"},{"n":"I","v":"I"},{"n":"J","v":"J"},{"n":"K","v":"K"},{"n":"L","v":"L"},{"n":"M","v":"M"},{"n":"N","v":"N"},{"n":"O","v":"O"},{"n":"P","v":"P"},{"n":"Q","v":"Q"},{"n":"R","v":"R"},{"n":"S","v":"S"},{"n":"T","v":"T"},{"n":"U","v":"U"},{"n":"V","v":"V"},{"n":"W","v":"W"},{"n":"X","v":"X"},{"n":"Y","v":"Y"},{"n":"Z","v":"Z"},{"n":"0-9","v":"0-9"}]}
  293. ],
  294. "节目大全":[{"key":"cid","name":"频道","value":[{"n":"全部","v":""},{"n":"CCTV-1综合","v":"EPGC1386744804340101"},{"n":"CCTV-2财经","v":"EPGC1386744804340102"},{"n":"CCTV-3综艺","v":"EPGC1386744804340103"},{"n":"CCTV-4中文国际","v":"EPGC1386744804340104"},{"n":"CCTV-5体育","v":"EPGC1386744804340107"},{"n":"CCTV-6电影","v":"EPGC1386744804340108"},{"n":"CCTV-7国防军事","v":"EPGC1386744804340109"},{"n":"CCTV-8电视剧","v":"EPGC1386744804340110"},{"n":"CCTV-9纪录","v":"EPGC1386744804340112"},{"n":"CCTV-10科教","v":"EPGC1386744804340113"},{"n":"CCTV-11戏曲","v":"EPGC1386744804340114"},{"n":"CCTV-12社会与法","v":"EPGC1386744804340115"},{"n":"CCTV-13新闻","v":"EPGC1386744804340116"},{"n":"CCTV-14少儿","v":"EPGC1386744804340117"},{"n":"CCTV-15音乐","v":"EPGC1386744804340118"},{"n":"CCTV-16奥林匹克","v":"EPGC1634630207058998"},{"n":"CCTV-17农业农村","v":"EPGC1563932742616872"},{"n":"CCTV-5+体育赛事","v":"EPGC1468294755566101"}]},{"key":"fc","name":"分类","value":[{"n":"全部","v":""},{"n":"新闻","v":"新闻"},{"n":"体育","v":"体育"},{"n":"综艺","v":"综艺"},{"n":"健康","v":"健康"},{"n":"生活","v":"生活"},{"n":"科教","v":"科教"},{"n":"经济","v":"经济"},{"n":"农业","v":"农业"},{"n":"法治","v":"法治"},{"n":"军事","v":"军事"},{"n":"少儿","v":"少儿"},{"n":"动画","v":"动画"},{"n":"纪实","v":"纪实"},{"n":"戏曲","v":"戏曲"},{"n":"音乐","v":"音乐"},{"n":"影视","v":"影视"}]},{"key":"fl","name":"字母","value":[{"n":"全部","v":""},{"n":"A","v":"A"},{"n":"B","v":"B"},{"n":"C","v":"C"},{"n":"D","v":"D"},{"n":"E","v":"E"},{"n":"F","v":"F"},{"n":"G","v":"G"},{"n":"H","v":"H"},{"n":"I","v":"I"},{"n":"J","v":"J"},{"n":"K","v":"K"},{"n":"L","v":"L"},{"n":"M","v":"M"},{"n":"N","v":"N"},{"n":"O","v":"O"},{"n":"P","v":"P"},{"n":"Q","v":"Q"},{"n":"R","v":"R"},{"n":"S","v":"S"},{"n":"T","v":"T"},{"n":"U","v":"U"},{"n":"V","v":"V"},{"n":"W","v":"W"},{"n":"X","v":"X"},{"n":"Y","v":"Y"},{"n":"Z","v":"Z"}]},{"key":"year","name":"年份","value":[{"n":"全部","v":""},{"n":"2022","v":"2022"},{"n":"2021","v":"2021"},{"n":"2020","v":"2020"},{"n":"2019","v":"2019"},{"n":"2018","v":"2018"},{"n":"2017","v":"2017"},{"n":"2016","v":"2016"},{"n":"2015","v":"2015"},{"n":"2014","v":"2014"},{"n":"2013","v":"2013"},{"n":"2012","v":"2012"},{"n":"2011","v":"2011"},{"n":"2010","v":"2010"},{"n":"2009","v":"2009"},{"n":"2008","v":"2008"},{"n":"2007","v":"2007"},{"n":"2006","v":"2006"},{"n":"2005","v":"2005"},{"n":"2004","v":"2004"},{"n":"2003","v":"2003"},{"n":"2002","v":"2002"},{"n":"2001","v":"2001"},{"n":"2000","v":"2000"}]},{"key":"month","name":"月份","value":[{"n":"全部","v":""},{"n":"12","v":"12"},{"n":"11","v":"11"},{"n":"10","v":"10"},{"n":"09","v":"09"},{"n":"08","v":"08"},{"n":"07","v":"07"},{"n":"06","v":"06"},{"n":"05","v":"05"},{"n":"04","v":"04"},{"n":"03","v":"03"},{"n":"02","v":"02"},{"n":"01","v":"01"}]}]
  295. }
  296. }
  297. header = {
  298. "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36",
  299. "Host": "tv.cctv.com",
  300. "Referer": "https://tv.cctv.com/"
  301. }
  302. def localProxy(self,param):
  303. return [200, "video/MP2T", action, ""]
  304. #-----------------------------------------------自定义函数-----------------------------------------------
  305. def removeHtml(self,txt):
  306. soup = re.compile(r'<[^>]+>',re.S)
  307. txt =soup.sub('', txt)
  308. return txt.replace("&nbsp;"," ")
  309. #访问网页
  310. def webReadFile(self,urlStr,header):
  311. html=''
  312. req=urllib.request.Request(url=urlStr)#,headers=header
  313. with urllib.request.urlopen(req) as response:
  314. html = response.read().decode('utf-8')
  315. return html
  316. #判断网络地址是否存在
  317. def TestWebPage(self,urlStr,header):
  318. html=''
  319. req=urllib.request.Request(url=urlStr,method='HEAD')#,headers=header
  320. with urllib.request.urlopen(req) as response:
  321. html = response.getcode ()
  322. return html
  323. #正则取文本
  324. def get_RegexGetText(self,Text,RegexText,Index):
  325. returnTxt=""
  326. Regex=re.search(RegexText, Text, re.M|re.S)
  327. if Regex is None:
  328. returnTxt=""
  329. else:
  330. returnTxt=Regex.group(Index)
  331. return returnTxt
  332. #正则取文本,返回数组
  333. def custom_RegexGetTextLine(self,Text,RegexText,Index):
  334. returnTxt=[]
  335. pattern = re.compile(RegexText, re.M|re.S)
  336. ListRe=pattern.findall(Text)
  337. if len(ListRe)<1:
  338. return returnTxt
  339. for value in ListRe:
  340. returnTxt.append(value)
  341. return returnTxt
  342. #取集数
  343. def get_EpisodesList(self,jsonList):
  344. videos=[]
  345. for vod in jsonList:
  346. url = vod['guid']
  347. title =vod['title']
  348. if len(url) == 0:
  349. continue
  350. videos.append(title+"$"+url)
  351. return videos
  352. #取集数
  353. def get_EpisodesList_re(self,htmlTxt,patternTxt):
  354. ListRe=re.finditer(patternTxt, htmlTxt, re.M|re.S)
  355. videos=[]
  356. for vod in ListRe:
  357. url = vod.group('url')
  358. title =vod.group('title')
  359. if len(url) == 0:
  360. continue
  361. videos.append(title+"$"+url)
  362. return videos
  363. #取剧集区
  364. def get_lineList(self,Txt,mark,after):
  365. circuit=[]
  366. origin=Txt.find(mark)
  367. while origin>8:
  368. end=Txt.find(after,origin)
  369. circuit.append(Txt[origin:end])
  370. origin=Txt.find(mark,end)
  371. return circuit
  372. #正则取文本,返回数组
  373. def get_RegexGetTextLine(self,Text,RegexText,Index):
  374. returnTxt=[]
  375. pattern = re.compile(RegexText, re.M|re.S)
  376. ListRe=pattern.findall(Text)
  377. if len(ListRe)<1:
  378. return returnTxt
  379. for value in ListRe:
  380. returnTxt.append(value)
  381. return returnTxt
  382. #删除html标签
  383. def removeHtml(self,txt):
  384. soup = re.compile(r'<[^>]+>',re.S)
  385. txt =soup.sub('', txt)
  386. return txt.replace("&nbsp;"," ")
  387. #取m3u8
  388. def get_m3u8(self,urlTxt):
  389. url = "https://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid={0}".format(urlTxt)
  390. html=self.webReadFile(urlStr=url,header=self.header)
  391. jo =json.loads(html)
  392. link = jo['hls_url'].strip()
  393. html = self.webReadFile(urlStr=link,header=self.header)
  394. content = html.strip()
  395. arr = content.split('\n')
  396. urlPrefix = self.get_RegexGetText(Text=link,RegexText='(http[s]?://[a-zA-z0-9.]+)/',Index=1)
  397. subUrl = arr[-1].split('/')
  398. subUrl[3] = '1200'
  399. subUrl[-1] = '1200.m3u8'
  400. hdUrl = urlPrefix + '/'.join(subUrl)
  401. url = urlPrefix + arr[-1]
  402. hdRsp=0
  403. try:
  404. hdRsp = self.TestWebPage(urlStr=hdUrl,header=self.header)
  405. except:
  406. pass
  407. if hdRsp == 200:
  408. url = hdUrl
  409. return url
  410. #搜索
  411. def get_list_search(self,html,tid):
  412. jRoot = json.loads(html)
  413. jsonList=jRoot['list']
  414. videos=[]
  415. for vod in jsonList:
  416. url = vod['urllink']
  417. title =self.removeHtml(txt=vod['title'])
  418. img=vod['imglink']
  419. id=vod['id']
  420. brief=vod['channel']
  421. year=vod['uploadtime']
  422. if len(url) == 0:
  423. continue
  424. guid="{0}###{1}###{2}###{3}###{4}###{5}###{6}###{7}".format(tid,title,url,img,id,year,'',brief)
  425. videos.append({
  426. "vod_id":guid,
  427. "vod_name":title,
  428. "vod_pic":img,
  429. "vod_remarks":year
  430. })
  431. return videos
  432. return videos
  433. def get_list1(self,html,tid):
  434. jRoot = json.loads(html)
  435. videos = []
  436. data=jRoot['response']
  437. if data is None:
  438. return []
  439. jsonList=data['docs']
  440. for vod in jsonList:
  441. id = vod['lastVIDE']['videoSharedCode']
  442. title =vod['column_name']
  443. url=vod['column_website']
  444. img=vod['column_logo']
  445. year=vod['column_playdate']
  446. brief=vod['column_brief']
  447. actors=''
  448. if len(url) == 0:
  449. continue
  450. guid="{0}###{1}###{2}###{3}###{4}###{5}###{6}###{7}".format(tid,title,url,img,id,year,actors,brief)
  451. #print(vod_id)
  452. videos.append({
  453. "vod_id":guid,
  454. "vod_name":title,
  455. "vod_pic":img,
  456. "vod_remarks":''
  457. })
  458. #print(videos)
  459. return videos
  460. #分类取结果
  461. def get_list(self,html,tid):
  462. jRoot = json.loads(html)
  463. videos = []
  464. data=jRoot['data']
  465. if data is None:
  466. return []
  467. jsonList=data['list']
  468. for vod in jsonList:
  469. url = vod['url']
  470. title =vod['title']
  471. img=vod['image']
  472. id=vod['id']
  473. try:
  474. brief=vod['brief']
  475. except:
  476. brief=''
  477. try:
  478. year=vod['year']
  479. except:
  480. year=''
  481. try:
  482. actors=vod['actors']
  483. except:
  484. actors=''
  485. if len(url) == 0:
  486. continue
  487. guid="{0}###{1}###{2}###{3}###{4}###{5}###{6}###{7}".format(tid,title,url,img,id,year,actors,brief)
  488. #print(vod_id)
  489. videos.append({
  490. "vod_id":guid,
  491. "vod_name":title,
  492. "vod_pic":img,
  493. "vod_remarks":''
  494. })
  495. return videos
  496. #分类取结果
  497. def get_list_search_column(self,html,tid):
  498. videos=[]
  499. temporary=self. get_lineList(Txt=html,mark=r'<div class="ind01" id="ind01',after=r'缩略图')
  500. for vod in temporary:
  501. url=self.get_RegexGetText(Text=vod,RegexText=r'lanmu3="video_recomm"><a href="(https{0,1}://tv\..+?\.shtml)"',Index=1)
  502. if url=='':
  503. continue
  504. title=self.removeHtml(txt=self.get_RegexGetText(Text=vod,RegexText=r'id="video_playlist_cont_\d+?">(.+?)</a>',Index=1))
  505. img=self.get_RegexGetText(Text=vod,RegexText=r'src="(.+?)"',Index=1)
  506. if len(url) == 0:
  507. continue
  508. vod_id="{0}###{1}###{2}###{3}".format(tid,title,url,img)
  509. # print(vod_id)
  510. videos.append({
  511. "vod_id":vod_id,
  512. "vod_name":title,
  513. "vod_pic":img,
  514. "vod_remarks":''
  515. })
  516. return videos
  517. # T=Spider()
  518. # # print(T.homeContent(filter=False))
  519. # # l=T.homeVideoContent()
  520. # l=T.searchContent(key='军事科技',quick='')
  521. # # l=T.categoryContent(tid='特别节目',pg='1',filter=False,extend={})
  522. # # for x in l['list']:
  523. # # print(x['vod_id'])
  524. # mubiao= l['list'][1]['vod_id']
  525. # # print(mubiao)
  526. # playTabulation=T.detailContent(array=[mubiao,])
  527. # # print(playTabulation['list'][0]['vod_play_url'])
  528. # vod_play_from=playTabulation['list'][0]['vod_play_from']
  529. # vod_play_url=playTabulation['list'][0]['vod_play_url']
  530. # url=vod_play_url.split('$$$')
  531. # vod_play_from=vod_play_from.split('$$$')[0]
  532. # url=url[0].split('$')
  533. # url=url[1].split('#')[0]
  534. # print(url)
  535. # m3u8=T.playerContent(flag=vod_play_from,id=url,vipFlags=True)
  536. # print(m3u8)