channel.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605
  1. import base64
  2. from youtube import (util, yt_data_extract, local_playlist, subscriptions,
  3. playlist)
  4. from youtube import yt_app
  5. import settings
  6. import urllib
  7. import json
  8. from string import Template
  9. import youtube.proto as proto
  10. import html
  11. import math
  12. import gevent
  13. import re
  14. import cachetools.func
  15. import traceback
  16. import flask
  17. from flask import request
  18. headers_desktop = (
  19. ('Accept', '*/*'),
  20. ('Accept-Language', 'en-US,en;q=0.5'),
  21. ('X-YouTube-Client-Name', '1'),
  22. ('X-YouTube-Client-Version', '2.20180830'),
  23. ) + util.desktop_ua
  24. headers_mobile = (
  25. ('Accept', '*/*'),
  26. ('Accept-Language', 'en-US,en;q=0.5'),
  27. ('X-YouTube-Client-Name', '2'),
  28. ('X-YouTube-Client-Version', '2.20180830'),
  29. ) + util.mobile_ua
  30. real_cookie = (('Cookie', 'VISITOR_INFO1_LIVE=8XihrAcN1l4'),)
  31. generic_cookie = (('Cookie', 'VISITOR_INFO1_LIVE=ST1Ti53r4fU'),)
  32. # added an extra nesting under the 2nd base64 compared to v4
  33. # added tab support
  34. # changed offset field to uint id 1
  35. def channel_ctoken_v5(channel_id, page, sort, tab, view=1):
  36. new_sort = (2 if int(sort) == 1 else 1)
  37. offset = 30*(int(page) - 1)
  38. if tab == 'videos':
  39. tab = 15
  40. elif tab == 'shorts':
  41. tab = 10
  42. elif tab == 'streams':
  43. tab = 14
  44. pointless_nest = proto.string(80226972,
  45. proto.string(2, channel_id)
  46. + proto.string(3,
  47. proto.percent_b64encode(
  48. proto.string(110,
  49. proto.string(3,
  50. proto.string(tab,
  51. proto.string(1,
  52. proto.string(1,
  53. proto.unpadded_b64encode(
  54. proto.string(1,
  55. proto.string(1,
  56. proto.unpadded_b64encode(
  57. proto.string(2,
  58. b"ST:"
  59. + proto.unpadded_b64encode(
  60. proto.uint(1, offset)
  61. )
  62. )
  63. )
  64. )
  65. )
  66. )
  67. )
  68. # targetId, just needs to be present but
  69. # doesn't need to be correct
  70. + proto.string(2, "63faaff0-0000-23fe-80f0-582429d11c38")
  71. )
  72. # 1 - newest, 2 - popular
  73. + proto.uint(3, new_sort)
  74. )
  75. )
  76. )
  77. )
  78. )
  79. )
  80. return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
  81. def channel_about_ctoken(channel_id):
  82. return proto.make_protobuf(
  83. ('base64p',
  84. [
  85. [2, 80226972,
  86. [
  87. [2, 2, channel_id],
  88. [2, 3,
  89. ('base64p',
  90. [
  91. [2, 110,
  92. [
  93. [2, 3,
  94. [
  95. [2, 19,
  96. [
  97. [2, 1, b'66b0e9e9-0000-2820-9589-582429a83980'],
  98. ]
  99. ],
  100. ]
  101. ],
  102. ]
  103. ],
  104. ]
  105. )
  106. ],
  107. ]
  108. ],
  109. ]
  110. )
  111. )
  112. # https://github.com/user234683/youtube-local/issues/151
  113. def channel_ctoken_v4(channel_id, page, sort, tab, view=1):
  114. new_sort = (2 if int(sort) == 1 else 1)
  115. offset = str(30*(int(page) - 1))
  116. pointless_nest = proto.string(80226972,
  117. proto.string(2, channel_id)
  118. + proto.string(3,
  119. proto.percent_b64encode(
  120. proto.string(110,
  121. proto.string(3,
  122. proto.string(15,
  123. proto.string(1,
  124. proto.string(1,
  125. proto.unpadded_b64encode(
  126. proto.string(1,
  127. proto.unpadded_b64encode(
  128. proto.string(2,
  129. b"ST:"
  130. + proto.unpadded_b64encode(
  131. proto.string(2, offset)
  132. )
  133. )
  134. )
  135. )
  136. )
  137. )
  138. # targetId, just needs to be present but
  139. # doesn't need to be correct
  140. + proto.string(2, "63faaff0-0000-23fe-80f0-582429d11c38")
  141. )
  142. # 1 - newest, 2 - popular
  143. + proto.uint(3, new_sort)
  144. )
  145. )
  146. )
  147. )
  148. )
  149. )
  150. return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
  151. # SORT:
  152. # videos:
  153. # Popular - 1
  154. # Oldest - 2
  155. # Newest - 3
  156. # playlists:
  157. # Oldest - 2
  158. # Newest - 3
  159. # Last video added - 4
  160. # view:
  161. # grid: 0 or 1
  162. # list: 2
  163. def channel_ctoken_v3(channel_id, page, sort, tab, view=1):
  164. # page > 1 doesn't work when sorting by oldest
  165. offset = 30*(int(page) - 1)
  166. page_token = proto.string(61, proto.unpadded_b64encode(
  167. proto.string(1, proto.unpadded_b64encode(proto.uint(1,offset)))
  168. ))
  169. tab = proto.string(2, tab)
  170. sort = proto.uint(3, int(sort))
  171. shelf_view = proto.uint(4, 0)
  172. view = proto.uint(6, int(view))
  173. continuation_info = proto.string(3,
  174. proto.percent_b64encode(tab + sort + shelf_view + view + page_token)
  175. )
  176. channel_id = proto.string(2, channel_id)
  177. pointless_nest = proto.string(80226972, channel_id + continuation_info)
  178. return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
  179. def channel_ctoken_v2(channel_id, page, sort, tab, view=1):
  180. # see https://github.com/iv-org/invidious/issues/1319#issuecomment-671732646
  181. # page > 1 doesn't work when sorting by oldest
  182. offset = 30*(int(page) - 1)
  183. schema_number = {
  184. 3: 6307666885028338688,
  185. 2: 17254859483345278706,
  186. 1: 16570086088270825023,
  187. }[int(sort)]
  188. page_token = proto.string(61, proto.unpadded_b64encode(proto.string(1,
  189. proto.uint(1, schema_number) + proto.string(2,
  190. proto.string(1, proto.unpadded_b64encode(proto.uint(1,offset)))
  191. )
  192. )))
  193. tab = proto.string(2, tab)
  194. sort = proto.uint(3, int(sort))
  195. #page = proto.string(15, str(page))
  196. shelf_view = proto.uint(4, 0)
  197. view = proto.uint(6, int(view))
  198. continuation_info = proto.string(
  199. 3,
  200. proto.percent_b64encode(tab + sort + shelf_view + view + page_token)
  201. )
  202. channel_id = proto.string(2, channel_id)
  203. pointless_nest = proto.string(80226972, channel_id + continuation_info)
  204. return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
  205. def channel_ctoken_v1(channel_id, page, sort, tab, view=1):
  206. tab = proto.string(2, tab)
  207. sort = proto.uint(3, int(sort))
  208. page = proto.string(15, str(page))
  209. # example with shelves in videos tab: https://www.youtube.com/channel/UCNL1ZadSjHpjm4q9j2sVtOA/videos
  210. shelf_view = proto.uint(4, 0)
  211. view = proto.uint(6, int(view))
  212. continuation_info = proto.string(3, proto.percent_b64encode(tab + view + sort + shelf_view + page + proto.uint(23, 0)) )
  213. channel_id = proto.string(2, channel_id)
  214. pointless_nest = proto.string(80226972, channel_id + continuation_info)
  215. return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
  216. def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1,
  217. ctoken=None, print_status=True):
  218. message = 'Got channel tab' if print_status else None
  219. if not ctoken:
  220. if tab in ('videos', 'shorts', 'streams'):
  221. ctoken = channel_ctoken_v5(channel_id, page, sort, tab, view)
  222. else:
  223. ctoken = channel_ctoken_v3(channel_id, page, sort, tab, view)
  224. ctoken = ctoken.replace('=', '%3D')
  225. # Not sure what the purpose of the key is or whether it will change
  226. # For now it seems to be constant for the API endpoint, not dependent
  227. # on the browsing session or channel
  228. key = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
  229. url = 'https://www.youtube.com/youtubei/v1/browse?key=' + key
  230. data = {
  231. 'context': {
  232. 'client': {
  233. 'hl': 'en',
  234. 'gl': 'US',
  235. 'clientName': 'WEB',
  236. 'clientVersion': '2.20240327.00.00',
  237. },
  238. },
  239. 'continuation': ctoken,
  240. }
  241. content_type_header = (('Content-Type', 'application/json'),)
  242. content = util.fetch_url(
  243. url, headers_desktop + content_type_header,
  244. data=json.dumps(data), debug_name='channel_tab', report_text=message)
  245. return content
  246. # cache entries expire after 30 minutes
  247. number_of_videos_cache = cachetools.TTLCache(128, 30*60)
  248. @cachetools.cached(number_of_videos_cache)
  249. def get_number_of_videos_channel(channel_id):
  250. if channel_id is None:
  251. return 1000
  252. # Uploads playlist
  253. playlist_id = 'UU' + channel_id[2:]
  254. url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1'
  255. try:
  256. response = util.fetch_url(url, headers_mobile,
  257. debug_name='number_of_videos', report_text='Got number of videos')
  258. except urllib.error.HTTPError as e:
  259. traceback.print_exc()
  260. print("Couldn't retrieve number of videos")
  261. return 1000
  262. response = response.decode('utf-8')
  263. # match = re.search(r'"numVideosText":\s*{\s*"runs":\s*\[{"text":\s*"([\d,]*) videos"', response)
  264. match = re.search(r'"numVideosText".*?([,\d]+)', response)
  265. if match:
  266. return int(match.group(1).replace(',',''))
  267. else:
  268. return 0
  269. def set_cached_number_of_videos(channel_id, num_videos):
  270. @cachetools.cached(number_of_videos_cache)
  271. def dummy_func_using_same_cache(channel_id):
  272. return num_videos
  273. dummy_func_using_same_cache(channel_id)
  274. channel_id_re = re.compile(r'videos\.xml\?channel_id=([a-zA-Z0-9_-]{24})"')
  275. @cachetools.func.lru_cache(maxsize=128)
  276. def get_channel_id(base_url):
  277. # method that gives the smallest possible response at ~4 kb
  278. # needs to be as fast as possible
  279. base_url = base_url.replace('https://www', 'https://m') # avoid redirect
  280. response = util.fetch_url(base_url + '/about?pbj=1', headers_mobile,
  281. debug_name='get_channel_id', report_text='Got channel id').decode('utf-8')
  282. match = channel_id_re.search(response)
  283. if match:
  284. return match.group(1)
  285. return None
  286. metadata_cache = cachetools.LRUCache(128)
  287. @cachetools.cached(metadata_cache)
  288. def get_metadata(channel_id):
  289. base_url = 'https://www.youtube.com/channel/' + channel_id
  290. polymer_json = util.fetch_url(base_url + '/about?pbj=1',
  291. headers_desktop,
  292. debug_name='gen_channel_about',
  293. report_text='Retrieved channel metadata')
  294. info = yt_data_extract.extract_channel_info(json.loads(polymer_json),
  295. 'about',
  296. continuation=False)
  297. return extract_metadata_for_caching(info)
  298. def set_cached_metadata(channel_id, metadata):
  299. @cachetools.cached(metadata_cache)
  300. def dummy_func_using_same_cache(channel_id):
  301. return metadata
  302. dummy_func_using_same_cache(channel_id)
  303. def extract_metadata_for_caching(channel_info):
  304. metadata = {}
  305. for key in ('approx_subscriber_count', 'short_description', 'channel_name',
  306. 'avatar'):
  307. metadata[key] = channel_info[key]
  308. return metadata
  309. def get_number_of_videos_general(base_url):
  310. return get_number_of_videos_channel(get_channel_id(base_url))
  311. def get_channel_search_json(channel_id, query, page):
  312. offset = proto.unpadded_b64encode(proto.uint(3, (page-1)*30))
  313. params = proto.string(2, 'search') + proto.string(15, offset)
  314. params = proto.percent_b64encode(params)
  315. ctoken = proto.string(2, channel_id) + proto.string(3, params) + proto.string(11, query)
  316. ctoken = base64.urlsafe_b64encode(proto.nested(80226972, ctoken)).decode('ascii')
  317. key = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
  318. url = 'https://www.youtube.com/youtubei/v1/browse?key=' + key
  319. data = {
  320. 'context': {
  321. 'client': {
  322. 'hl': 'en',
  323. 'gl': 'US',
  324. 'clientName': 'WEB',
  325. 'clientVersion': '2.20240327.00.00',
  326. },
  327. },
  328. 'continuation': ctoken,
  329. }
  330. content_type_header = (('Content-Type', 'application/json'),)
  331. polymer_json = util.fetch_url(
  332. url, headers_desktop + content_type_header,
  333. data=json.dumps(data), debug_name='channel_search')
  334. return polymer_json
  335. def post_process_channel_info(info):
  336. info['avatar'] = util.prefix_url(info['avatar'])
  337. info['channel_url'] = util.prefix_url(info['channel_url'])
  338. for item in info['items']:
  339. item['thumbnail'] = "https://i.ytimg.com/vi/{}/hqdefault.jpg".format(item['id'])
  340. util.prefix_urls(item)
  341. util.add_extra_html_info(item)
  342. if info['current_tab'] == 'about':
  343. for i, (text, url) in enumerate(info['links']):
  344. if isinstance(url, str) and util.YOUTUBE_URL_RE.fullmatch(url):
  345. info['links'][i] = (text, util.prefix_url(url))
  346. def get_channel_first_page(base_url=None, tab='videos', channel_id=None):
  347. if channel_id:
  348. base_url = 'https://www.youtube.com/channel/' + channel_id
  349. return util.fetch_url(base_url + '/' + tab + '?pbj=1&view=0',
  350. headers_desktop, debug_name='gen_channel_' + tab)
  351. playlist_sort_codes = {'2': "da", '3': "dd", '4': "lad"}
  352. # youtube.com/[channel_id]/[tab]
  353. # youtube.com/user/[username]/[tab]
  354. # youtube.com/c/[custom]/[tab]
  355. # youtube.com/[custom]/[tab]
  356. def get_channel_page_general_url(base_url, tab, request, channel_id=None):
  357. page_number = int(request.args.get('page', 1))
  358. # sort 1: views
  359. # sort 2: oldest
  360. # sort 3: newest
  361. # sort 4: newest - no shorts (Just a kludge on our end, not internal to yt)
  362. default_sort = '3' if settings.include_shorts_in_channel else '4'
  363. sort = request.args.get('sort', default_sort)
  364. view = request.args.get('view', '1')
  365. query = request.args.get('query', '')
  366. ctoken = request.args.get('ctoken', '')
  367. include_shorts = (sort != '4')
  368. default_params = (page_number == 1 and sort in ('3', '4') and view == '1')
  369. continuation = bool(ctoken) # whether or not we're using a continuation
  370. page_size = 30
  371. try_channel_api = True
  372. polymer_json = None
  373. # Use the special UU playlist which contains all the channel's uploads
  374. if tab == 'videos' and sort in ('3', '4'):
  375. if not channel_id:
  376. channel_id = get_channel_id(base_url)
  377. if page_number == 1 and include_shorts:
  378. tasks = (
  379. gevent.spawn(playlist.playlist_first_page,
  380. 'UU' + channel_id[2:],
  381. report_text='Retrieved channel videos'),
  382. gevent.spawn(get_metadata, channel_id),
  383. )
  384. gevent.joinall(tasks)
  385. util.check_gevent_exceptions(*tasks)
  386. # Ignore the metadata for now, it is cached and will be
  387. # recalled later
  388. pl_json = tasks[0].value
  389. pl_info = yt_data_extract.extract_playlist_info(pl_json)
  390. number_of_videos = pl_info['metadata']['video_count']
  391. if number_of_videos is None:
  392. number_of_videos = 1000
  393. else:
  394. set_cached_number_of_videos(channel_id, number_of_videos)
  395. else:
  396. tasks = (
  397. gevent.spawn(playlist.get_videos, 'UU' + channel_id[2:],
  398. page_number, include_shorts=include_shorts),
  399. gevent.spawn(get_metadata, channel_id),
  400. gevent.spawn(get_number_of_videos_channel, channel_id),
  401. )
  402. gevent.joinall(tasks)
  403. util.check_gevent_exceptions(*tasks)
  404. pl_json = tasks[0].value
  405. pl_info = yt_data_extract.extract_playlist_info(pl_json)
  406. number_of_videos = tasks[2].value
  407. info = pl_info
  408. info['channel_id'] = channel_id
  409. info['current_tab'] = 'videos'
  410. if info['items']: # Success
  411. page_size = 100
  412. try_channel_api = False
  413. else: # Try the first-page method next
  414. try_channel_api = True
  415. # Use the regular channel API
  416. if tab in ('shorts', 'streams') or (tab=='videos' and try_channel_api):
  417. if channel_id:
  418. num_videos_call = (get_number_of_videos_channel, channel_id)
  419. else:
  420. num_videos_call = (get_number_of_videos_general, base_url)
  421. # Use ctoken method, which YouTube changes all the time
  422. if channel_id and not default_params:
  423. if sort == 4:
  424. _sort = 3
  425. else:
  426. _sort = sort
  427. page_call = (get_channel_tab, channel_id, page_number, _sort,
  428. tab, view, ctoken)
  429. # Use the first-page method, which won't break
  430. else:
  431. page_call = (get_channel_first_page, base_url, tab)
  432. tasks = (
  433. gevent.spawn(*num_videos_call),
  434. gevent.spawn(*page_call),
  435. )
  436. gevent.joinall(tasks)
  437. util.check_gevent_exceptions(*tasks)
  438. number_of_videos, polymer_json = tasks[0].value, tasks[1].value
  439. elif tab == 'about':
  440. # polymer_json = util.fetch_url(base_url + '/about?pbj=1', headers_desktop, debug_name='gen_channel_about')
  441. channel_id = get_channel_id(base_url)
  442. ctoken = channel_about_ctoken(channel_id)
  443. polymer_json = util.call_youtube_api('web', 'browse', {
  444. 'continuation': ctoken,
  445. })
  446. continuation=True
  447. elif tab == 'playlists' and page_number == 1:
  448. polymer_json = util.fetch_url(base_url+ '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], headers_desktop, debug_name='gen_channel_playlists')
  449. elif tab == 'playlists':
  450. polymer_json = get_channel_tab(channel_id, page_number, sort,
  451. 'playlists', view)
  452. continuation = True
  453. elif tab == 'search' and channel_id:
  454. polymer_json = get_channel_search_json(channel_id, query, page_number)
  455. elif tab == 'search':
  456. url = base_url + '/search?pbj=1&query=' + urllib.parse.quote(query, safe='')
  457. polymer_json = util.fetch_url(url, headers_desktop, debug_name='gen_channel_search')
  458. elif tab == 'videos':
  459. pass
  460. else:
  461. flask.abort(404, 'Unknown channel tab: ' + tab)
  462. if polymer_json is not None:
  463. info = yt_data_extract.extract_channel_info(
  464. json.loads(polymer_json), tab, continuation=continuation
  465. )
  466. if info['error'] is not None:
  467. return flask.render_template('error.html', error_message=info['error'])
  468. if channel_id:
  469. info['channel_url'] = 'https://www.youtube.com/channel/' + channel_id
  470. info['channel_id'] = channel_id
  471. else:
  472. channel_id = info['channel_id']
  473. # Will have microformat present, cache metadata while we have it
  474. if channel_id and default_params and tab not in ('videos', 'about'):
  475. metadata = extract_metadata_for_caching(info)
  476. set_cached_metadata(channel_id, metadata)
  477. # Otherwise, populate with our (hopefully cached) metadata
  478. elif channel_id and info.get('channel_name') is None:
  479. metadata = get_metadata(channel_id)
  480. for key, value in metadata.items():
  481. yt_data_extract.conservative_update(info, key, value)
  482. # need to add this metadata to the videos/playlists
  483. additional_info = {
  484. 'author': info['channel_name'],
  485. 'author_id': info['channel_id'],
  486. 'author_url': info['channel_url'],
  487. }
  488. for item in info['items']:
  489. item.update(additional_info)
  490. if tab in ('videos', 'shorts', 'streams'):
  491. info['number_of_videos'] = number_of_videos
  492. info['number_of_pages'] = math.ceil(number_of_videos/page_size)
  493. info['header_playlist_names'] = local_playlist.get_playlist_names()
  494. if tab in ('videos', 'shorts', 'streams', 'playlists'):
  495. info['current_sort'] = sort
  496. elif tab == 'search':
  497. info['search_box_value'] = query
  498. info['header_playlist_names'] = local_playlist.get_playlist_names()
  499. if tab in ('search', 'playlists'):
  500. info['page_number'] = page_number
  501. info['subscribed'] = subscriptions.is_subscribed(info['channel_id'])
  502. post_process_channel_info(info)
  503. return flask.render_template('channel.html',
  504. parameters_dictionary = request.args,
  505. **info
  506. )
  507. @yt_app.route('/channel/<channel_id>/')
  508. @yt_app.route('/channel/<channel_id>/<tab>')
  509. def get_channel_page(channel_id, tab='videos'):
  510. return get_channel_page_general_url('https://www.youtube.com/channel/' + channel_id, tab, request, channel_id)
  511. @yt_app.route('/user/<username>/')
  512. @yt_app.route('/user/<username>/<tab>')
  513. def get_user_page(username, tab='videos'):
  514. return get_channel_page_general_url('https://www.youtube.com/user/' + username, tab, request)
  515. @yt_app.route('/c/<custom>/')
  516. @yt_app.route('/c/<custom>/<tab>')
  517. def get_custom_c_page(custom, tab='videos'):
  518. return get_channel_page_general_url('https://www.youtube.com/c/' + custom, tab, request)
  519. @yt_app.route('/<custom>')
  520. @yt_app.route('/<custom>/<tab>')
  521. def get_toplevel_custom_page(custom, tab='videos'):
  522. return get_channel_page_general_url('https://www.youtube.com/' + custom, tab, request)