sponsorblock.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. import hashlib
  2. import json
  3. import re
  4. import urllib.parse
  5. from .ffmpeg import FFmpegPostProcessor
  6. class SponsorBlockPP(FFmpegPostProcessor):
  7. # https://wiki.sponsor.ajay.app/w/Types
  8. EXTRACTORS = {
  9. 'Youtube': 'YouTube',
  10. }
  11. POI_CATEGORIES = {
  12. 'poi_highlight': 'Highlight',
  13. }
  14. NON_SKIPPABLE_CATEGORIES = {
  15. **POI_CATEGORIES,
  16. 'chapter': 'Chapter',
  17. }
  18. CATEGORIES = {
  19. 'sponsor': 'Sponsor',
  20. 'intro': 'Intermission/Intro Animation',
  21. 'outro': 'Endcards/Credits',
  22. 'selfpromo': 'Unpaid/Self Promotion',
  23. 'preview': 'Preview/Recap',
  24. 'filler': 'Filler Tangent',
  25. 'interaction': 'Interaction Reminder',
  26. 'music_offtopic': 'Non-Music Section',
  27. **NON_SKIPPABLE_CATEGORIES,
  28. }
  29. def __init__(self, downloader, categories=None, api='https://sponsor.ajay.app'):
  30. FFmpegPostProcessor.__init__(self, downloader)
  31. self._categories = tuple(categories or self.CATEGORIES.keys())
  32. self._API_URL = api if re.match('https?://', api) else 'https://' + api
  33. def run(self, info):
  34. extractor = info['extractor_key']
  35. if extractor not in self.EXTRACTORS:
  36. self.to_screen(f'SponsorBlock is not supported for {extractor}')
  37. return [], info
  38. self.to_screen('Fetching SponsorBlock segments')
  39. info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info.get('duration'))
  40. return [], info
  41. def _get_sponsor_chapters(self, info, duration):
  42. segments = self._get_sponsor_segments(info['id'], self.EXTRACTORS[info['extractor_key']])
  43. def duration_filter(s):
  44. start_end = s['segment']
  45. # Ignore entire video segments (https://wiki.sponsor.ajay.app/w/Types).
  46. if start_end == (0, 0):
  47. return False
  48. # Ignore milliseconds difference at the start.
  49. if start_end[0] <= 1:
  50. start_end[0] = 0
  51. # Make POI chapters 1 sec so that we can properly mark them
  52. if s['category'] in self.POI_CATEGORIES:
  53. start_end[1] += 1
  54. # Ignore milliseconds difference at the end.
  55. # Never allow the segment to exceed the video.
  56. if duration and duration - start_end[1] <= 1:
  57. start_end[1] = duration
  58. # SponsorBlock duration may be absent or it may deviate from the real one.
  59. diff = abs(duration - s['videoDuration']) if s['videoDuration'] else 0
  60. return diff < 1 or (diff < 5 and diff / (start_end[1] - start_end[0]) < 0.05)
  61. duration_match = [s for s in segments if duration_filter(s)]
  62. if len(duration_match) != len(segments):
  63. self.report_warning('Some SponsorBlock segments are from a video of different duration, maybe from an old version of this video')
  64. def to_chapter(s):
  65. (start, end), cat = s['segment'], s['category']
  66. title = s['description'] if cat == 'chapter' else self.CATEGORIES[cat]
  67. return {
  68. 'start_time': start,
  69. 'end_time': end,
  70. 'category': cat,
  71. 'title': title,
  72. 'type': s['actionType'],
  73. '_categories': [(cat, start, end, title)],
  74. }
  75. sponsor_chapters = [to_chapter(s) for s in duration_match]
  76. if not sponsor_chapters:
  77. self.to_screen('No matching segments were found in the SponsorBlock database')
  78. else:
  79. self.to_screen(f'Found {len(sponsor_chapters)} segments in the SponsorBlock database')
  80. return sponsor_chapters
  81. def _get_sponsor_segments(self, video_id, service):
  82. video_hash = hashlib.sha256(video_id.encode('ascii')).hexdigest()
  83. # SponsorBlock API recommends using first 4 hash characters.
  84. url = f'{self._API_URL}/api/skipSegments/{video_hash[:4]}?' + urllib.parse.urlencode({
  85. 'service': service,
  86. 'categories': json.dumps(self._categories),
  87. 'actionTypes': json.dumps(['skip', 'poi', 'chapter']),
  88. })
  89. for d in self._download_json(url) or []:
  90. if d['videoID'] == video_id:
  91. return d['segments']
  92. return []