py_bilibilivd.py 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. #coding=utf-8
  2. #!/usr/bin/python
  3. import sys
  4. import json
  5. import time
  6. from datetime import datetime
  7. from urllib.parse import quote, unquote
  8. sys.path.append('..')
  9. from base.spider import Spider
  10. class Spider(Spider): # 元类 默认的元类 type
  11. def getName(self):
  12. return "B站视频"
  13. def init(self, extend):
  14. try:
  15. self.extendDict = json.loads(extend)
  16. except:
  17. self.extendDict = {}
  18. def isVideoFormat(self, url):
  19. pass
  20. def manualVideoCheck(self):
  21. pass
  22. def homeContent(self, filter):
  23. result = {}
  24. result['filters'] = {}
  25. cookie = ''
  26. if 'cookie' in self.extendDict:
  27. cookie = self.extendDict['cookie']
  28. if 'json' in self.extendDict:
  29. r = self.fetch(self.extendDict['json'], timeout=10)
  30. if 'cookie' in r.json():
  31. cookie = r.json()['cookie']
  32. if cookie == '':
  33. cookie = '{}'
  34. elif type(cookie) == str and cookie.startswith('http'):
  35. cookie = self.fetch(cookie, timeout=10).text.strip()
  36. try:
  37. if type(cookie) == dict:
  38. cookie = json.dumps(cookie, ensure_ascii=False)
  39. except:
  40. pass
  41. _, _, _ = self.getCookie(cookie)
  42. bblogin = self.getCache('bblogin')
  43. if bblogin:
  44. result['class'] = [{"type_name": "动态", "type_id": "动态"}, {"type_name": "收藏夹", "type_id": "收藏夹"}, {"type_name": "历史记录", "type_id": "历史记录"}]
  45. else:
  46. result['class'] = []
  47. if 'json' in self.extendDict:
  48. r = self.fetch(self.extendDict['json'], timeout=10)
  49. params = r.json()
  50. if 'classes' in params:
  51. result['class'] = result['class'] + params['classes']
  52. if filter:
  53. if 'filter' in params:
  54. result['filters'] = params['filter']
  55. elif 'categories' in self.extendDict or 'type' in self.extendDict:
  56. if 'categories' in self.extendDict:
  57. cateList = self.extendDict['categories'].split('#')
  58. else:
  59. cateList = self.extendDict['type'].split('#')
  60. for cate in cateList:
  61. result['class'].append({'type_name': cate, 'type_id': cate})
  62. if not 'class' in result:
  63. result['class'] = {"type_name": "沙雕动漫", "type_id": "沙雕动漫"}
  64. return result
  65. def homeVideoContent(self):
  66. result = {}
  67. cookie = ''
  68. if 'cookie' in self.extendDict:
  69. cookie = self.extendDict['cookie']
  70. if 'json' in self.extendDict:
  71. r = self.fetch(self.extendDict['json'], timeout=10)
  72. if 'cookie' in r.json():
  73. cookie = r.json()['cookie']
  74. if cookie == '':
  75. cookie = '{}'
  76. elif type(cookie) == str and cookie.startswith('http'):
  77. cookie = self.fetch(cookie, timeout=10).text.strip()
  78. try:
  79. if type(cookie) == dict:
  80. cookie = json.dumps(cookie, ensure_ascii=False)
  81. except:
  82. pass
  83. cookie, imgKey, subKey = self.getCookie(cookie)
  84. url = 'https://api.bilibili.com/x/web-interface/index/top/feed/rcmd?&y_num=1&fresh_type=3&feed_version=SEO_VIDEO&fresh_idx_1h=1&fetch_row=1&fresh_idx=1&brush=0&homepage_ver=1&ps=20'
  85. r = self.fetch(url, cookies=cookie, headers=self.header, timeout=5)
  86. data = json.loads(self.cleanText(r.text))
  87. try:
  88. vodList = data['data']['item']
  89. for vod in vodList:
  90. aid = str(vod['id']).strip()
  91. title = self.removeHtmlTags(vod['title']).strip()
  92. img = vod['pic'].strip()
  93. remark = time.strftime('%H:%M:%S', time.gmtime(vod['duration']))
  94. if remark.startswith('00:'):
  95. remark = remark[3:]
  96. if remark == '00:00':
  97. continue
  98. result['list'].append({
  99. 'vod_id': aid,
  100. 'vod_name': title,
  101. 'vod_pic': img,
  102. 'vod_remarks': remark
  103. })
  104. except:
  105. pass
  106. return result
  107. def categoryContent(self, cid, page, filter, ext):
  108. page = int(page)
  109. result = {}
  110. videos = []
  111. cookie = ''
  112. pagecount = page
  113. if 'cookie' in self.extendDict:
  114. cookie = self.extendDict['cookie']
  115. if 'json' in self.extendDict:
  116. r = self.fetch(self.extendDict['json'], timeout=10)
  117. if 'cookie' in r.json():
  118. cookie = r.json()['cookie']
  119. if cookie == '':
  120. cookie = '{}'
  121. elif type(cookie) == str and cookie.startswith('http'):
  122. cookie = self.fetch(cookie, timeout=10).text.strip()
  123. try:
  124. if type(cookie) == dict:
  125. cookie = json.dumps(cookie, ensure_ascii=False)
  126. except:
  127. pass
  128. cookie, imgKey, subKey = self.getCookie(cookie)
  129. if cid == '动态':
  130. if page > 1:
  131. offset = self.getCache('offset')
  132. if not offset:
  133. offset = ''
  134. url = f'https://api.bilibili.com/x/polymer/web-dynamic/v1/feed/all?timezone_offset=-480&type=all&offset={offset}&page={page}'
  135. else:
  136. url = f'https://api.bilibili.com/x/polymer/web-dynamic/v1/feed/all?timezone_offset=-480&type=all&page={page}'
  137. r = self.fetch(url, cookies=cookie, headers=self.header, timeout=5)
  138. data = json.loads(self.cleanText(r.text))
  139. self.setCache('offset', data['data']['offset'])
  140. vodList = data['data']['items']
  141. if data['data']['has_more']:
  142. pagecount = page + 1
  143. for vod in vodList:
  144. if vod['type'] != 'DYNAMIC_TYPE_AV':
  145. continue
  146. vid = str(vod['modules']['module_dynamic']['major']['archive']['aid']).strip()
  147. remark = vod['modules']['module_dynamic']['major']['archive']['duration_text'].strip()
  148. title = self.removeHtmlTags(vod['modules']['module_dynamic']['major']['archive']['title']).strip()
  149. img = vod['modules']['module_dynamic']['major']['archive']['cover']
  150. videos.append({
  151. "vod_id": vid,
  152. "vod_name": title,
  153. "vod_pic": img,
  154. "vod_remarks": remark
  155. })
  156. elif cid == "收藏夹":
  157. userid = self.getUserid(cookie)
  158. if userid is None:
  159. return {}, 1
  160. url = f'http://api.bilibili.com/x/v3/fav/folder/created/list-all?up_mid={userid}&jsonp=jsonp'
  161. r = self.fetch(url, cookies=cookie, headers=self.header, timeout=5)
  162. data = json.loads(self.cleanText(r.text))
  163. vodList = data['data']['list']
  164. pagecount = page
  165. for vod in vodList:
  166. vid = vod['id']
  167. title = vod['title'].strip()
  168. remark = vod['media_count']
  169. img = 'https://api-lmteam.koyeb.app/files/shoucang.png'
  170. videos.append({
  171. "vod_id": f'fav&&&{vid}',
  172. "vod_name": title,
  173. "vod_pic": img,
  174. "vod_tag": 'folder',
  175. "vod_remarks": remark
  176. })
  177. elif cid.startswith('fav&&&'):
  178. cid = cid[6:]
  179. url = f'http://api.bilibili.com/x/v3/fav/resource/list?media_id={cid}&pn={page}&ps=20&platform=web&type=0'
  180. r = self.fetch(url, cookies=cookie, headers=self.header, timeout=5)
  181. data = json.loads(self.cleanText(r.text))
  182. if data['data']['has_more']:
  183. pagecount = page + 1
  184. else:
  185. pagecount = page
  186. vodList = data['data']['medias']
  187. for vod in vodList:
  188. vid = str(vod['id']).strip()
  189. title = self.removeHtmlTags(vod['title']).replace(""", '"')
  190. img = vod['cover'].strip()
  191. remark = time.strftime('%H:%M:%S', time.gmtime(vod['duration']))
  192. if remark.startswith('00:'):
  193. remark = remark[3:]
  194. videos.append({
  195. "vod_id": vid,
  196. "vod_name": title,
  197. "vod_pic": img,
  198. "vod_remarks": remark
  199. })
  200. elif cid.startswith('UP主&&&'):
  201. cid = cid[6:]
  202. params = {'mid': cid, 'ps': 30, 'pn': page}
  203. # params = WBI().encWbi(params, imgKey, subKey)
  204. url = 'https://api.bilibili.com/x/space/wbi/arc/search?'
  205. for key in params:
  206. url += f'&{key}={quote(params[key])}'
  207. r = self.fetch(url, cookies=cookie, headers=self.header, timeout=5)
  208. data = json.loads(self.cleanText(r.text))
  209. if page < data['data']['page']['count']:
  210. pagecount = page + 1
  211. else:
  212. pagecount = page
  213. if page == 1:
  214. bizId = self.regStr(reg='play/(.*?)\?', src=data['data']['episodic_button']['uri'])
  215. videos = [{"vod_id": f'UP主&&&{bizId}', "vod_name": '播放列表'}]
  216. vodList = data['data']['list']['vlist']
  217. for vod in vodList:
  218. vid = str(vod['aid']).strip()
  219. title = self.removeHtmlTags(vod['title']).replace("&quot;", '"')
  220. img = vod['pic'].strip()
  221. remarkinfos = vod['length'].split(':')
  222. minutes = int(remarkinfos[0])
  223. if minutes >= 60:
  224. hours = str(minutes // 60)
  225. minutes = str(minutes % 60)
  226. if len(hours) == 1:
  227. hours = '0' + hours
  228. if len(minutes) == 1:
  229. minutes = '0' + minutes
  230. remark = hours + ':' + minutes + ':' + remarkinfos[1]
  231. else:
  232. remark = vod['length']
  233. videos.append({
  234. "vod_id": vid,
  235. "vod_name": title,
  236. "vod_pic": img,
  237. "vod_remarks": remark
  238. })
  239. elif cid == '历史记录':
  240. url = f'http://api.bilibili.com/x/v2/history?pn={page}'
  241. r = self.fetch(url, cookies=cookie, headers=self.header, timeout=5)
  242. data = json.loads(self.cleanText(r.text))
  243. if len(data['data']) == 300:
  244. pagecount = page + 1
  245. else:
  246. pagecount = page
  247. vodList = data['data']
  248. for vod in vodList:
  249. if vod['duration'] <= 0:
  250. continue
  251. vid = str(vod["aid"]).strip()
  252. img = vod["pic"].strip()
  253. title = self.removeHtmlTags(vod["title"]).replace("&quot;", '"')
  254. if vod['progress'] != -1:
  255. process = time.strftime('%H:%M:%S', time.gmtime(vod['progress']))
  256. totalTime = time.strftime('%H:%M:%S', time.gmtime(vod['duration']))
  257. if process.startswith('00:'):
  258. process = process[3:]
  259. if totalTime.startswith('00:'):
  260. totalTime = totalTime[3:]
  261. remark = process + '|' + totalTime
  262. videos.append({
  263. "vod_id": vid,
  264. "vod_name": title,
  265. "vod_pic": img,
  266. "vod_remarks": remark
  267. })
  268. else:
  269. url = 'https://api.bilibili.com/x/web-interface/search/type?search_type=video&keyword={}&page={}'
  270. for key in ext:
  271. if key == 'tid':
  272. cid = ext[key]
  273. continue
  274. url += f'&{key}={ext[key]}'
  275. url = url.format(cid, page)
  276. r = self.fetch(url, cookies=cookie, headers=self.header, timeout=5)
  277. data = json.loads(self.cleanText(r.text))
  278. pagecount = data['data']['numPages']
  279. vodList = data['data']['result']
  280. for vod in vodList:
  281. if vod['type'] != 'video':
  282. continue
  283. vid = str(vod['aid']).strip()
  284. title = self.removeHtmlTags(self.cleanText(vod['title']))
  285. img = 'https:' + vod['pic'].strip()
  286. remarkinfo = vod['duration'].split(':')
  287. minutes = int(remarkinfo[0])
  288. seconds = remarkinfo[1]
  289. if len(seconds) == 1:
  290. seconds = '0' + seconds
  291. if minutes >= 60:
  292. hour = str(minutes // 60)
  293. minutes = str(minutes % 60)
  294. if len(hour) == 1:
  295. hour = '0' + hour
  296. if len(minutes) == 1:
  297. minutes = '0' + minutes
  298. remark = f'{hour}:{m