py_xigua.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. #coding=utf-8
  2. #!/usr/bin/python
  3. import sys
  4. sys.path.append('..')
  5. from base.spider import Spider
  6. import base64
  7. import math
  8. import json
  9. import requests
  10. import urllib
  11. from urllib import request, parse
  12. import urllib.request
  13. import re
  14. class Spider(Spider):
  15. def getName(self):
  16. return "西瓜视频(个人中心)"
  17. def init(self,extend=""):
  18. self.userid=self.get_userid()
  19. def isVideoFormat(self,url):
  20. pass
  21. def manualVideoCheck(self):
  22. pass
  23. def homeContent(self,filter):
  24. result = {}
  25. cateManual = {
  26. "电视剧":"dianshiju",
  27. "电影":"dianying",
  28. "动漫":"dongman",
  29. "纪录片":"jilupian",
  30. "少儿":"shaoer",
  31. "综艺":"zongyi",
  32. "关注":"follow"
  33. }
  34. if self.userid=='':
  35. del cateManual['关注']
  36. classes = []
  37. for k in cateManual:
  38. classes.append({
  39. 'type_name': k,
  40. 'type_id': cateManual[k]
  41. })
  42. result['class'] = classes
  43. if (filter):
  44. result['filters'] = self.config['filter']
  45. return result
  46. def homeVideoContent(self):
  47. result = {
  48. 'list': []
  49. }
  50. return result
  51. userid=''
  52. def categoryContent(self,tid,pg,filter,extend):
  53. result = {}
  54. idTxt='电视剧'
  55. maximum=17
  56. url = 'https://www.ixigua.com/api/cinema/filterv2/albums'
  57. if tid=='dianying':
  58. idTxt='电影'
  59. elif tid=='zongyi':
  60. idTxt='综艺'
  61. elif tid=='dianshiju':
  62. idTxt='电视剧'
  63. elif tid=='dongman':
  64. idTxt='动漫'
  65. elif tid=='jilupian':
  66. idTxt='纪录片'
  67. elif tid=='shaoer':
  68. idTxt='少儿'
  69. elif tid=='follow':
  70. offset=0 if int(pg)<2 else 20*int(pg)
  71. url='https://www.ixigua.com/api/userv2/follow/list?authorId={0}&sortType=desc&sortType=desc&cursor={1}'.format(self.userid,offset)
  72. maximum=12
  73. videos=[]
  74. if tid!='follow':
  75. offset=0 if int(pg)<2 else 18*int(pg)
  76. self.header['Referer']='https://www.ixigua.com/cinema/filter/'.format(tid)
  77. data=r'{"pinyin":"'+tid+'","filters":{"type":"'+idTxt+'","area":"全部地区","tag":"全部类型","sort":"综合排序","paid":"全部资费"},"offset":'+str(offset)+',"limit":18}'
  78. req = request.Request(url=url, data=bytes(data, encoding='utf8'),headers=self.header, method='POST')
  79. response = request.urlopen(req)
  80. urlTxt=response.read().decode('utf-8')
  81. videos= self.get_list_videoGroup_json(jsonTxt=urlTxt)
  82. else:
  83. rsp=self.fetch(url,headers=self.header)
  84. urlTxt=rsp.text
  85. videos= self.get_list_videoGroup_follow_json(jsonTxt=urlTxt)
  86. numvL = len(videos)
  87. result['list'] = videos
  88. result['page'] = pg
  89. result['pagecount'] = pg if int(numvL)<maximum else int(pg)+1
  90. result['limit'] = numvL
  91. result['total'] = numvL
  92. return result
  93. def get_userid(self):
  94. Url='https://www.ixigua.com/'
  95. rsp=self.fetch(Url,headers=self.header)
  96. htmlTxt = rsp.text
  97. userid= self.get_RegexGetText(Text=htmlTxt,RegexText=r'"identity":{"id":"(\d+?)",',Index=1)
  98. return userid
  99. def detailContent(self,array):
  100. result = {}
  101. aid = array[0].split('###')
  102. key = aid[1]
  103. title = aid[0]
  104. act=aid[2]
  105. logo = aid[3]
  106. Url='https://www.ixigua.com/api/albumv2/details?albumId={0}'.format(key)
  107. if len(aid)==5:
  108. Url='https://www.ixigua.com/api/videov2/author/new_video_list?to_user_id={0}'.format(key)
  109. rsp = self.fetch(Url,headers=self.header)
  110. htmlTxt = rsp.text
  111. typeName=''
  112. area=''
  113. dir=''
  114. cont=''
  115. vip='true'
  116. videoList=[]
  117. if len(aid)==5:
  118. jRoot = json.loads(htmlTxt)
  119. if jRoot['code']!=200:
  120. return result
  121. jo = jRoot['data']
  122. jsonList=jo['videoList']
  123. for value in jsonList:
  124. id="{0}${1}_false".format(value['title'],value.get('group_id'))
  125. videoList.append(id)
  126. dir=title
  127. elif htmlTxt.find('playlist')>2:
  128. jRoot = json.loads(htmlTxt)
  129. if jRoot['code']!=200:
  130. return result
  131. jo = jRoot['data']
  132. jsonList=jo['playlist']
  133. if jsonList is not None:
  134. for value in jsonList:
  135. id="{0}${1}?id={2}_{3}".format(value['title'],value['albumId'],value['episodeId'],vip)
  136. videoList.append(id)
  137. playFrom=[v for v in jo['albumInfo']['tagList']]
  138. typeName='/'.join(playFrom)
  139. playFrom=[v for v in jo['albumInfo']['areaList']]
  140. area='/'.join(playFrom)
  141. playFrom=[v['name'] for v in jo['albumInfo']['directorList']]
  142. dir='/'.join(playFrom)
  143. cont=jo['albumInfo']['intro']
  144. if len(videoList)<1:
  145. return result
  146. vod = {
  147. "vod_id":array[0],
  148. "vod_name":title,
  149. "vod_pic":logo,
  150. "type_name":typeName,
  151. "vod_year":'',
  152. "vod_area":area,
  153. "vod_remarks":"",
  154. "vod_actor":'',
  155. "vod_director":dir,
  156. "vod_content":cont
  157. }
  158. vod['vod_play_from'] = '西瓜'
  159. vod['vod_play_url'] = "#".join(videoList)
  160. result = {
  161. 'list':[
  162. vod
  163. ]
  164. }
  165. return result
  166. def verifyCode(self):
  167. pass
  168. def searchContent(self,key,quick):
  169. Url='https://www.ixigua.com/api/searchv2/lvideo/{0}/0'.format(urllib.parse.quote(key))
  170. rsp = self.fetch(Url,headers=self.header)
  171. htmlTxt = rsp.text
  172. videos=self.get_list(html=htmlTxt)
  173. '''
  174. Url='https://www.ixigua.com/api/searchv2/user/{0}/10'.format(urllib.parse.quote(key))
  175. rsp = self.fetch(Url,headers=self.header)
  176. htmlTxt1 = rsp.text
  177. videos=self.get_list_user(html=htmlTxt1)
  178. '''
  179. result = {
  180. 'list': videos
  181. }
  182. return result
  183. def playerContent(self,flag,id,vipFlags):
  184. result={}
  185. UrlId=id.split('_')
  186. Url='https://www.ixigua.com/{0}'.format(UrlId[0])
  187. headers = {
  188. 'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3947.100 Mobile Safari/537.36'
  189. }
  190. jx=1 if UrlId[1]=='true' else 0
  191. result["parse"] = 1#0=直接播放,1=解析
  192. result["playUrl"] = ''
  193. result["url"] = Url
  194. result['jx'] = jx#VIP解析
  195. result["header"] =headers
  196. return result
  197. def get_RegexGetText(self,Text,RegexText,Index):
  198. returnTxt=""
  199. Regex=re.search(RegexText, Text, re.M|re.I)
  200. if Regex is None:
  201. returnTxt=""
  202. else:
  203. returnTxt=Regex.group(Index)
  204. return returnTxt
  205. def get_RegexGetTextLine(self,Text,RegexText,Index):
  206. returnTxt=[]
  207. pattern = re.compile(RegexText)
  208. ListRe=pattern.findall(Text)
  209. if len(ListRe)<1:
  210. return returnTxt
  211. for value in ListRe:
  212. returnTxt.append(value)
  213. return returnTxt
  214. def get_playlist(self,Text,headStr,endStr):
  215. circuit=""
  216. origin=Text.find(headStr)
  217. if origin>8:
  218. end=Text.find(endStr,origin)
  219. circuit=Text[origin:end]
  220. return circuit
  221. def removeHtml(self,txt):
  222. soup = re.compile(r'<[^>]+>',re.S)
  223. txt =soup.sub('', txt)
  224. return txt.replace("&nbsp;"," ")
  225. def get_webReadFile(self,urlStr):
  226. headers = {
  227. 'Referer':urlStr,
  228. 'User-Agent': 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36',
  229. 'Host': 'www.ikmjw.com'
  230. }
  231. req = urllib.request.Request(url=urlStr, headers=headers)
  232. html = urllib.request.urlopen(req).read().decode('utf-8')
  233. return html
  234. def get_list(self,html):
  235. result={}
  236. jRoot = json.loads(html)
  237. if jRoot['code']!=0:
  238. return result
  239. jo = jRoot['data']
  240. if len(jo)<1:
  241. return result
  242. vodList = jo['data']
  243. if len(vodList)<1:
  244. return result
  245. videos=[]
  246. img='_'
  247. artist='_'
  248. for vod in vodList:
  249. if vod['type']!='lvideo':
  250. continue
  251. data=vod['data']
  252. display=data['display']
  253. title =display['name']
  254. if len(title)==0:
  255. continue
  256. episode_link=display.get('episode_link')
  257. asc_link=episode_link.get('asc_link')
  258. KeyName='album_id'
  259. if asc_link is None:
  260. asc_link=episode_link.get('desc_link')
  261. KeyName='album_id'
  262. id=asc_link[0].get('album_id')
  263. sslocal=asc_link[0]['scheme_url']
  264. if sslocal!=None and sslocal.find('sslocal:')<0:
  265. continue
  266. video_cover_info=display.get('video_cover_info')
  267. img=video_cover_info['url'] if video_cover_info is not None else '_'
  268. try:
  269. artist=display['actor']
  270. except Exception:
  271. artist=''
  272. remarks=display['rating']
  273. vod_id="{0}###{1}###{2}###{3}".format(title,id,artist,img)
  274. videos.append({
  275. "vod_id":vod_id,
  276. "vod_name":title,
  277. "vod_pic":img,
  278. "vod_remarks":remarks
  279. })
  280. return videos
  281. def get_list_user(self,html):
  282. result={}
  283. jRoot = json.loads(html)
  284. if jRoot['code']!=0:
  285. return result
  286. jo = jRoot['data']
  287. if len(jo)<1:
  288. return result
  289. vodList = jo['data']
  290. if len(vodList)<1:
  291. return result
  292. videos=[]
  293. img='_'
  294. artist='_'
  295. for vod in vodList:
  296. if vod['type']!='user':
  297. continue
  298. data=vod['data']
  299. img=data['avatar']
  300. title =data['name']
  301. log_pb=json.loads(data.get('log_pb'))
  302. user_auth_info=json.loads(data.get('user_auth_info'))
  303. remarks=user_auth_info['auth_info']
  304. id=log_pb.get('search_result_id')
  305. vod_id="{0}###{1}###{2}###{3}###{4}".format(title,id,artist,img,'user')
  306. videos.append({
  307. "vod_id":vod_id,
  308. "vod_name":title,
  309. "vod_pic":img,
  310. "vod_remarks":remarks
  311. })
  312. return videos
  313. def get_list_videoGroup_json(self,jsonTxt):
  314. result={}
  315. jRoot = json.loads(jsonTxt)
  316. if jRoot['code']!=200:
  317. return result
  318. jo = jRoot['data']
  319. vodList = jo['albumList']
  320. if len(vodList)<1:
  321. return result
  322. videos=[]
  323. img='_'
  324. artist='_'
  325. for vod in vodList:
  326. url =vod['albumId']
  327. title =vod['title']
  328. imgList =vod.get('coverList')
  329. if len(imgList)>0:
  330. img=imgList[0]['url']
  331. remarks=vod['subTitle']
  332. artistList=vod.get('actorList')
  333. if artistList is not None:
  334. artistList=artistList if len(artistList)<5 else artistList[0:4]
  335. artist='/'.join(artistList)
  336. if len(title)==0:
  337. continue
  338. #标题###地址###演员###封面
  339. vod_id="{0}###{1}###{2}###{3}".format(title,url,artist,img)
  340. videos.append({
  341. "vod_id":vod_id,
  342. "vod_name":title,
  343. "vod_pic":img,
  344. "vod_remarks":remarks
  345. })
  346. return videos
  347. def get_list_videoGroup_follow_json(self,jsonTxt):
  348. videos=[]
  349. jRoot = json.loads(jsonTxt)
  350. if jRoot['code']!=0:
  351. return videos
  352. jo = jRoot['data']
  353. vodList=jo['data']
  354. if len(vodList)<1:
  355. return videos
  356. img='_'
  357. artist=''
  358. for vod in vodList:
  359. url =vod.get('user_id')
  360. title =vod['name']
  361. img =vod.get('avatar_url')
  362. remarks=vod['description']
  363. artistList=vod.get('actorList')
  364. artist=title
  365. if len(title)==0:
  366. continue
  367. #标题###地址###演员###封面
  368. vod_id="{0}###{1}###{2}###{3}###{4}".format(title,url,artist,img,'user')
  369. videos.append({
  370. "vod_id":vod_id,
  371. "vod_name":title,
  372. "vod_pic":img,
  373. "vod_remarks":remarks
  374. })
  375. return videos
  376. def get_lineList(self,Txt,mark,after):
  377. circuit=[]
  378. origin=Txt.find(mark)
  379. while origin>8:
  380. end=Txt.find(after,origin)
  381. circuit.append(Txt[origin:end])
  382. origin=Txt.find(mark,end)
  383. return circuit
  384. def get_EpisodesList(self,jsonList):
  385. vodItems=[]
  386. for value in jsonList:
  387. vodItems.append(value['title']+"$"+'https://www.ixigua.com/{0}?logTag=55abe18cfb733871bb04'.format(value['episodeId']))
  388. return vodItems
  389. config = {
  390. "player": {},
  391. "filter": {}
  392. }
  393. header = {
  394. "Cookie":"s_v_web_id=verify_lev3h43l_rrTPrFDG_ztWQ_4ugg_8WBA_yGVYsXlVyoBh; passport_csrf_token=80e0efe90bc8bd6681a896dd90cd08cc; passport_csrf_token_default=80e0efe90bc8bd6681a896dd90cd08cc; __ac_nonce=0643361890096533c765; __ac_signature=_02B4Z6wo00f01JPVVrAAAIDBcisHPfWA66CT91IAAEDK9840CE-PheNOCgA4VtrFG0-K.KkLmAR5KsI-Xx-6dBXxz.ABWU2OpEd22kF7biwGaVmGR7an4S1heLEU9xpv0ObRSHFHDslR7uL8fb; support_webp=true; support_avif=false; MONITOR_WEB_ID=45c3b6ab-7ad4-4805-b971-5962d1d6909a; ttwid=1%7CCueNR-HU9tGVF30WaiFCjXDxh0FUXoXsZr-cIb9Dogg%7C1681089268%7Cf0eeaa2016a602a277055494954f083e9f7fa8121c5dd1162db9195932fa167b; odin_tt=386a42a5740f9859d4670373fc8c70cf320ea5b227aed04bcc53fde26c233e8c952a8b0ba5f80fd3d46f9663fa595d8c; sid_guard=842b56710f55021912487890e7d5bef3%7C1681089337%7C3024001%7CMon%2C+15-May-2023+01%3A15%3A38+GMT; uid_tt=b7eb5b73cd72bed6d3c2e1e1e8aaa9f3; uid_tt_ss=b7eb5b73cd72bed6d3c2e1e1e8aaa9f3; sid_tt=842b56710f55021912487890e7d5bef3; sessionid=842b56710f55021912487890e7d5bef3; sessionid_ss=842b56710f55021912487890e7d5bef3; sid_ucp_v1=1.0.0-KGQ2YzBlMDFiMzIyMjY0YTIwMDg2MjZmZGQzMTE5MmFlYTYzY2EwMTMKFQjL2cnx9AIQucbNoQYYGCAMOAhABRoCaGwiIDg0MmI1NjcxMGY1NTAyMTkxMjQ4Nzg5MGU3ZDViZWYz; ssid_ucp_v1=1.0.0-KGQ2YzBlMDFiMzIyMjY0YTIwMDg2MjZmZGQzMTE5MmFlYTYzY2EwMTMKFQjL2cnx9AIQucbNoQYYGCAMOAhABRoCaGwiIDg0MmI1NjcxMGY1NTAyMTkxMjQ4Nzg5MGU3ZDViZWYz; csrf_session_id=c58adecac1d20d91d8c61e72ce0c6fdb; ixigua-a-s=3; msToken=St3ptsHkwBjPgGvRWhZfVdhHO_K16vTizxxna17draCvt4ekz6DQXO6c2Ctrp6gOnn9_Abm83-a9URQp5rKb-JIPs4mQPn2fpKlQRY6jLDBWUdLYTePIAACU6cY2fk40; tt_scid=nN3WPItw72gnB5PDiGcHveirXDZ8oDl5n.ihKN583mmZhoe.uLhFpy3JOL8wrsQO0ed1",
  395. "Referer": 'https://www.ixigua.com/cinema/filter/dianshiju/',
  396. 'User-Agent':'User-Agent: Mozilla%2F5.0+(Windows+NT+10.0%3B+WOW64)+AppleWebKit%2F537.36+(KHTML%2C+like+Gecko)+Chrome%2F63.0.3239.132+Safari%2F537.36',
  397. 'Host': 'www.ixigua.com',
  398. 'Accept': 'application/json, text/plain, */*',
  399. 'x-secsdk-csrf-token': '0001000000017b593ba6251b18bd7bce2753042917bb36e534867b9606317584c00b0ae836c61754314b7365128e',
  400. 'tt-anti-token': 'oDr7A3PDDFq4pWzk-707faf92a9e3040f5c6ed4284d53b05b7091221852c0e9d32bca9fcfe5035225',
  401. 'content-type': 'application/json'
  402. }
  403. def localProxy(self,param):
  404. return [200, "video/MP2T", action, ""]