youtube.lua 46 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116
  1. --[[
  2. Copyright © 2007-2023 the VideoLAN team
  3. This program is free software; you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 2 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program; if not, write to the Free Software
  13. Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  14. --]]
  15. -- Helper function to get a parameter's value in a URL
  16. function get_url_param( url, name )
  17. local _, _, res = string.find( url, "[&?]"..name.."=([^&]*)" )
  18. return res
  19. end
  20. -- Helper function to copy a parameter when building a new URL
  21. function copy_url_param( url, name )
  22. local value = get_url_param( url, name )
  23. return ( value and "&"..name.."="..value or "" ) -- Ternary operator
  24. end
  25. function get_arturl()
  26. local iurl = get_url_param( vlc.path, "iurl" )
  27. if iurl then
  28. return iurl
  29. end
  30. local video_id = get_url_param( vlc.path, "v" )
  31. if not video_id then
  32. return nil
  33. end
  34. return vlc.access.."://img.youtube.com/vi/"..video_id.."/default.jpg"
  35. end
  36. -- Pick the most suited format available
  37. function get_fmt( fmt_list )
  38. local prefres = vlc.var.inherit(nil, "preferred-resolution")
  39. if prefres < 0 then
  40. return nil
  41. end
  42. local fmt = nil
  43. for itag,height in string.gmatch( fmt_list, "(%d+)/%d+x(%d+)[^,]*" ) do
  44. -- Apparently formats are listed in quality
  45. -- order, so we take the first one that works,
  46. -- or fallback to the lowest quality
  47. fmt = itag
  48. if tonumber(height) <= prefres then
  49. break
  50. end
  51. end
  52. return fmt
  53. end
  54. -- Helper emulating vlc.readline() to work around its failure on
  55. -- very long lines (see #24957)
  56. function read_long_line()
  57. local eol
  58. local pos = 0
  59. local len = 32768
  60. repeat
  61. len = len * 2
  62. local line = vlc.peek( len )
  63. if not line then return nil end
  64. eol = string.find( line, "\n", pos + 1 )
  65. pos = len
  66. until eol or len >= 1024 * 1024 -- No EOF detection, loop until limit
  67. return vlc.read( eol or len )
  68. end
  69. -- Buffering iterator to parse through the HTTP stream several times
  70. -- without making several HTTP requests
  71. function buf_iter( s )
  72. s.i = s.i + 1
  73. local line = s.lines[s.i]
  74. if not line then
  75. -- Put back together statements split across several lines,
  76. -- otherwise we won't be able to parse them
  77. repeat
  78. local l = s.stream:readline()
  79. if not l then break end
  80. line = line and line..l or l -- Ternary operator
  81. until string.match( line, "};$" )
  82. if line then
  83. s.lines[s.i] = line
  84. end
  85. end
  86. return line
  87. end
  88. -- Helper to search and extract code from javascript stream
  89. function js_extract( js, pattern )
  90. js.i = 0 -- Reset to beginning
  91. for line in buf_iter, js do
  92. local ex = string.match( line, pattern )
  93. if ex then
  94. return ex
  95. end
  96. end
  97. return nil
  98. end
  99. -- Descramble the "n" parameter using the javascript code that does that
  100. -- in the web page
  101. function n_descramble( nparam, js )
  102. if not js.stream then
  103. if not js.url then
  104. return nil
  105. end
  106. js.stream = vlc.stream( js_url )
  107. if not js.stream then
  108. -- Retry once for transient errors
  109. js.stream = vlc.stream( js_url )
  110. if not js.stream then
  111. return nil
  112. end
  113. end
  114. end
  115. -- Look for the descrambler function's name
  116. -- a.C&&(b=a.get("n"))&&(b=Bpa[0](b),a.set("n",b),Bpa.length||iha(""))}};
  117. -- var Bpa=[iha];
  118. local callsite = js_extract( js, '[^;]*%.set%("n",[^};]*' )
  119. if not callsite then
  120. vlc.msg.dbg( "Couldn't extract YouTube video throttling parameter descrambling function name" )
  121. return nil
  122. end
  123. -- Try direct function name from following clause
  124. local descrambler = string.match( callsite, '%.set%("n",.%),...?%.length||(...?)%(' )
  125. local itm = nil
  126. if not descrambler then
  127. -- Try from main call site
  128. descrambler = string.match( callsite, '[=%(,&|]([a-zA-Z0-9_$%[%]]+)%(.%),.%.set%("n",' )
  129. if descrambler then
  130. -- Check if this is only an intermediate variable
  131. itm = string.match( descrambler, '^([^%[%]]+)%[' )
  132. if itm then
  133. descrambler = nil
  134. end
  135. else
  136. -- Last chance: intermediate variable in following clause
  137. itm = string.match( callsite, '%.set%("n",.%),(...?)%.length' )
  138. end
  139. end
  140. if not descrambler and itm then
  141. -- Resolve intermediate variable
  142. descrambler = js_extract( js, 'var '..itm..'=%[(...?)[%],]' )
  143. end
  144. if not descrambler then
  145. vlc.msg.dbg( "Couldn't extract YouTube video throttling parameter descrambling function name" )
  146. return nil
  147. end
  148. -- Fetch the code of the descrambler function
  149. -- lha=function(a){var b=a.split(""),c=[310282131,"KLf3",b,null,function(d,e){d.push(e)},-45817231, [data and transformations...] ,1248130556];c[3]=c;c[15]=c;c[18]=c;try{c[40](c[14],c[2]),c[25](c[48]),c[21](c[32],c[23]), [scripted calls...] ,c[25](c[33],c[3])}catch(d){return"enhanced_except_4ZMBnuz-_w8_"+a}return b.join("")};
  150. local code = js_extract( js, "^"..descrambler.."=function%([^)]*%){(.-)};" )
  151. if not code then
  152. vlc.msg.dbg( "Couldn't extract YouTube video throttling parameter descrambling code" )
  153. return nil
  154. end
  155. -- Split code into two main sections: 1/ data and transformations,
  156. -- and 2/ a script of calls
  157. local datac, script = string.match( code, "c=%[(.*)%];.-;try{(.*)}catch%(" )
  158. if ( not datac ) or ( not script ) then
  159. vlc.msg.dbg( "Couldn't extract YouTube video throttling parameter descrambling rules" )
  160. return nil
  161. end
  162. -- Split "n" parameter into a table as descrambling operates on it
  163. -- as one of several arrays
  164. local n = {}
  165. for c in string.gmatch( nparam, "." ) do
  166. table.insert( n, c )
  167. end
  168. -- Helper
  169. local table_len = function( tab )
  170. local len = 0
  171. for i, val in ipairs( tab ) do
  172. len = len + 1
  173. end
  174. return len
  175. end
  176. -- Shared core section of compound transformations: it compounds
  177. -- the "n" parameter with an input string, character by character,
  178. -- using a Base64 alphabet as algebraic modulo group.
  179. -- var h=f.length;d.forEach(function(l,m,n){this.push(n[m]=f[(f.indexOf(l)-f.indexOf(this[m])+m+h--)%f.length])},e.split(""))
  180. local compound = function( ntab, str, alphabet )
  181. if ntab ~= n or
  182. type( str ) ~= "string" or
  183. type( alphabet ) ~= "string" then
  184. return true
  185. end
  186. local input = {}
  187. for c in string.gmatch( str, "." ) do
  188. table.insert( input, c )
  189. end
  190. local len = string.len( alphabet )
  191. for i, c in ipairs( ntab ) do
  192. if type( c ) ~= "string" then
  193. return true
  194. end
  195. local pos1 = string.find( alphabet, c, 1, true )
  196. local pos2 = string.find( alphabet, input[i], 1, true )
  197. if ( not pos1 ) or ( not pos2 ) then
  198. return true
  199. end
  200. local pos = ( pos1 - pos2 ) % len
  201. local newc = string.sub( alphabet, pos + 1, pos + 1 )
  202. ntab[i] = newc
  203. table.insert( input, newc )
  204. end
  205. end
  206. -- The data section contains among others function code for a number
  207. -- of transformations, most of which are basic array operations.
  208. -- We can match these functions' code to identify them, and emulate
  209. -- the corresponding transformations.
  210. local trans = {
  211. reverse = {
  212. func = function( tab )
  213. local len = table_len( tab )
  214. local tmp = {}
  215. for i, val in ipairs( tab ) do
  216. tmp[len - i + 1] = val
  217. end
  218. for i, val in ipairs( tmp ) do
  219. tab[i] = val
  220. end
  221. end,
  222. match = {
  223. -- function(d){d.reverse()}
  224. -- function(d){for(var e=d.length;e;)d.push(d.splice(--e,1)[0])}
  225. "^function%(d%)",
  226. }
  227. },
  228. append = {
  229. func = function( tab, val )
  230. table.insert( tab, val )
  231. end,
  232. match = {
  233. -- function(d,e){d.push(e)}
  234. "^function%(d,e%){d%.push%(e%)},",
  235. }
  236. },
  237. remove = {
  238. func = function( tab, i )
  239. if type( i ) ~= "number" then
  240. return true
  241. end
  242. i = i % table_len( tab )
  243. table.remove( tab, i + 1 )
  244. end,
  245. match = {
  246. -- function(d,e){e=(e%d.length+d.length)%d.length;d.splice(e,1)}
  247. "^[^}]-;d%.splice%(e,1%)},",
  248. }
  249. },
  250. swap = {
  251. func = function( tab, i )
  252. if type( i ) ~= "number" then
  253. return true
  254. end
  255. i = i % table_len( tab )
  256. local tmp = tab[1]
  257. tab[1] = tab[i + 1]
  258. tab[i + 1] = tmp
  259. end,
  260. match = {
  261. -- function(d,e){e=(e%d.length+d.length)%d.length;var f=d[0];d[0]=d[e];d[e]=f}
  262. -- function(d,e){e=(e%d.length+d.length)%d.length;d.splice(0,1,d.splice(e,1,d[0])[0])}
  263. "^[^}]-;var f=d%[0%];d%[0%]=d%[e%];d%[e%]=f},",
  264. "^[^}]-;d%.splice%(0,1,d%.splice%(e,1,d%[0%]%)%[0%]%)},",
  265. }
  266. },
  267. rotate = {
  268. func = function( tab, shift )
  269. if type( shift ) ~= "number" then
  270. return true
  271. end
  272. local len = table_len( tab )
  273. shift = shift % len
  274. local tmp = {}
  275. for i, val in ipairs( tab ) do
  276. tmp[( i - 1 + shift ) % len + 1] = val
  277. end
  278. for i, val in ipairs( tmp ) do
  279. tab[i] = val
  280. end
  281. end,
  282. match = {
  283. -- function(d,e){for(e=(e%d.length+d.length)%d.length;e--;)d.unshift(d.pop())}
  284. -- function(d,e){e=(e%d.length+d.length)%d.length;d.splice(-e).reverse().forEach(function(f){d.unshift(f)})}
  285. "^[^}]-d%.unshift%(d.pop%(%)%)},",
  286. "^[^}]-d%.unshift%(f%)}%)},",
  287. }
  288. },
  289. -- Here functions with no arguments are not really functions,
  290. -- they're constants: treat them as such. These alphabets are
  291. -- passed to and used by the compound transformations.
  292. alphabet1 = {
  293. func = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_",
  294. match = {
  295. -- function(){for(var d=64,e=[];++d-e.length-32;){switch(d){case 91:d=44;continue;case 123:d=65;break;case 65:d-=18;continue;case 58:d=96;continue;case 46:d=95}e.push(String.fromCharCode(d))}return e}
  296. "^function%(%){[^}]-case 58:d=96;",
  297. }
  298. },
  299. alphabet2 = {
  300. func = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
  301. match = {
  302. -- function(){for(var d=64,e=[];++d-e.length-32;){switch(d){case 58:d-=14;case 91:case 92:case 93:continue;case 123:d=47;case 94:case 95:case 96:continue;case 46:d=95}e.push(String.fromCharCode(d))}return e}
  303. -- function(){for(var d=64,e=[];++d-e.length-32;)switch(d){case 46:d=95;default:e.push(String.fromCharCode(d));case 94:case 95:case 96:break;case 123:d-=76;case 92:case 93:continue;case 58:d=44;case 91:}return e}
  304. "^function%(%){[^}]-case 58:d%-=14;",
  305. "^function%(%){[^}]-case 58:d=44;",
  306. }
  307. },
  308. -- Compound transformations are based on a shared core section
  309. -- that compounds the "n" parameter with an input string,
  310. -- character by character, using a variation of a Base64
  311. -- alphabet as algebraic modulo group.
  312. compound = {
  313. func = compound,
  314. match = {
  315. -- function(d,e,f){var h=f.length;d.forEach(function(l,m,n){this.push(n[m]=f[(f.indexOf(l)-f.indexOf(this[m])+m+h--)%f.length])},e.split(""))}
  316. "^function%(d,e,f%)",
  317. }
  318. },
  319. -- These compound transformation variants first build their
  320. -- Base64 alphabet themselves, before using it.
  321. compound1 = {
  322. func = function( ntab, str )
  323. return compound( ntab, str, "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_" )
  324. end,
  325. match = {
  326. -- function(d,e){for(var f=64,h=[];++f-h.length-32;)switch(f){case 58:f=96;continue;case 91:f=44;break;case 65:f=47;continue;case 46:f=153;case 123:f-=58;default:h.push(String.fromCharCode(f))} [ compound... ] }
  327. "^function%(d,e%){[^}]-case 58:f=96;",
  328. }
  329. },
  330. compound2 = {
  331. func = function( ntab, str )
  332. return compound( ntab, str,"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" )
  333. end,
  334. match = {
  335. -- function(d,e){for(var f=64,h=[];++f-h.length-32;){switch(f){case 58:f-=14;case 91:case 92:case 93:continue;case 123:f=47;case 94:case 95:case 96:continue;case 46:f=95}h.push(String.fromCharCode(f))} [ compound... ] }
  336. -- function(d,e){for(var f=64,h=[];++f-h.length-32;)switch(f){case 46:f=95;default:h.push(String.fromCharCode(f));case 94:case 95:case 96:break;case 123:f-=76;case 92:case 93:continue;case 58:f=44;case 91:} [ compound... ] }
  337. "^function%(d,e%){[^}]-case 58:f%-=14;",
  338. "^function%(d,e%){[^}]-case 58:f=44;",
  339. }
  340. },
  341. -- Fallback
  342. unid = {
  343. func = function( )
  344. vlc.msg.dbg( "Couldn't apply unidentified YouTube video throttling parameter transformation, aborting descrambling" )
  345. return true
  346. end,
  347. match = {
  348. }
  349. },
  350. }
  351. -- The data section actually mixes input data, reference to the
  352. -- "n" parameter array, and self-reference to its own array, with
  353. -- transformation functions used to modify itself. We parse it
  354. -- as such into a table.
  355. local data = {}
  356. datac = datac..","
  357. while datac and datac ~= "" do
  358. local el = nil
  359. -- Transformation functions
  360. if string.match( datac, "^function%(" ) then
  361. for name, tr in pairs( trans ) do
  362. for i, match in ipairs( tr.match ) do
  363. if string.match( datac, match ) then
  364. el = tr.func
  365. break
  366. end
  367. end
  368. if el then
  369. break
  370. end
  371. end
  372. if not el then
  373. el = trans.unid.func
  374. vlc.msg.warn( "Couldn't parse unidentified YouTube video throttling parameter transformation" )
  375. end
  376. -- Compounding functions use a subfunction, so we need to be
  377. -- more specific in how much parsed data we consume.
  378. if el == trans.compound.func or
  379. el == trans.compound1.func or
  380. el == trans.compound2.func then
  381. datac = string.match( datac, '^.-},e%.split%(""%)%)},(.*)$' )
  382. or string.match( datac, "^.-},(.*)$" )
  383. else
  384. datac = string.match( datac, "^.-},(.*)$" )
  385. end
  386. -- String input data
  387. elseif string.match( datac, '^"[^"]*",' ) then
  388. el, datac = string.match( datac, '^"([^"]*)",(.*)$' )
  389. -- Integer input data
  390. -- 1818016376,-648890305,-1200559E3, ...
  391. elseif string.match( datac, '^%-?%d+,' ) or
  392. string.match( datac, '^%-?%d+[eE]%-?%d+,' ) then
  393. el, datac = string.match( datac, "^(.-),(.*)$" )
  394. el = tonumber( el )
  395. -- Reference to "n" parameter array
  396. elseif string.match( datac, '^b,' ) then
  397. el = n
  398. datac = string.match( datac, "^b,(.*)$" )
  399. -- Replaced by self-reference to data array after its declaration
  400. elseif string.match( datac, '^null,' ) then
  401. el = data
  402. datac = string.match( datac, "^null,(.*)$" )
  403. else
  404. vlc.msg.warn( "Couldn't parse unidentified YouTube video throttling parameter descrambling data" )
  405. el = false -- Lua tables can't contain nil values
  406. datac = string.match( datac, "^[^,]-,(.*)$" )
  407. end
  408. table.insert( data, el )
  409. end
  410. -- Debugging helper to print data array elements
  411. local prd = function( el, tab )
  412. if not el then
  413. return "???"
  414. elseif el == n then
  415. return "n"
  416. elseif el == data then
  417. return "data"
  418. elseif type( el ) == "string" then
  419. return '"'..el..'"'
  420. elseif type( el ) == "number" then
  421. el = tostring( el )
  422. if type( tab ) == "table" then
  423. el = el.." -> "..( el % table_len( tab ) )
  424. end
  425. return el
  426. else
  427. for name, tr in pairs( trans ) do
  428. if el == tr.func then
  429. return name
  430. end
  431. end
  432. return tostring( el )
  433. end
  434. end
  435. -- The script section contains a series of calls to elements of
  436. -- the data section array onto other elements of it: calls to
  437. -- transformations, with a reference to the data array itself or
  438. -- the "n" parameter array as first argument, and often input data
  439. -- as a second argument. We parse and emulate those calls to follow
  440. -- the descrambling script.
  441. -- c[40](c[14],c[2]),c[25](c[48]),c[14](c[1],c[24],c[42]()), [...]
  442. if not string.match( script, "c%[(%d+)%]%(c%[(%d+)%]([^)]-)%)" ) then
  443. vlc.msg.dbg( "Couldn't parse and execute YouTube video throttling parameter descrambling rules" )
  444. return nil
  445. end
  446. for ifunc, itab, args in string.gmatch( script, "c%[(%d+)%]%(c%[(%d+)%]([^)]-)%)" ) do
  447. local iarg1 = string.match( args, "^,c%[(%d+)%]" )
  448. local iarg2 = string.match( args, "^,[^,]-,c%[(%d+)%]" )
  449. local func = data[tonumber( ifunc ) + 1]
  450. local tab = data[tonumber( itab ) + 1]
  451. local arg1 = iarg1 and data[tonumber( iarg1 ) + 1]
  452. local arg2 = iarg2 and data[tonumber( iarg2 ) + 1]
  453. -- Uncomment to debug transformation chain
  454. --vlc.msg.err( '"n" parameter transformation: '..prd( func ).."("..prd( tab )..( arg1 ~= nil and ( ", "..prd( arg1, tab ) ) or "" )..( arg2 ~= nil and ( ", "..prd( arg2, tab ) ) or "" )..") "..ifunc.."("..itab..( iarg1 and ( ", "..iarg1 ) or "" )..( iarg2 and ( ", "..iarg2 ) or "" )..")" )
  455. --local nprev = table.concat( n )
  456. if type( func ) ~= "function" or type( tab ) ~= "table"
  457. or func( tab, arg1, arg2 ) then
  458. vlc.msg.dbg( "Invalid data type encountered during YouTube video throttling parameter descrambling transformation chain, aborting" )
  459. vlc.msg.dbg( "Couldn't descramble YouTube throttling URL parameter: data transfer will get throttled" )
  460. vlc.msg.err( "Couldn't process youtube video URL, please check for updates to this script" )
  461. break
  462. end
  463. -- Uncomment to debug transformation chain
  464. --local nnew = table.concat( n )
  465. --if nprev ~= nnew then
  466. -- vlc.msg.dbg( '"n" parameter transformation: '..nprev.." -> "..nnew )
  467. --end
  468. end
  469. return table.concat( n )
  470. end
  471. -- Descramble the URL signature using the javascript code that does that
  472. -- in the web page
  473. function sig_descramble( sig, js )
  474. if not js.stream then
  475. if not js.url then
  476. return nil
  477. end
  478. js.stream = vlc.stream( js.url )
  479. if not js.stream then
  480. -- Retry once for transient errors
  481. js.stream = vlc.stream( js.url )
  482. if not js.stream then
  483. return nil
  484. end
  485. end
  486. end
  487. -- Look for the descrambler function's name
  488. -- if(h.s){var l=h.sp,m=wja(decodeURIComponent(h.s));f.set(l,encodeURIComponent(m))}
  489. -- k.s (from stream map field "s") holds the input scrambled signature
  490. -- k.sp (from stream map field "sp") holds a parameter name (normally
  491. -- "signature" or "sig") to set with the output, descrambled signature
  492. local descrambler = js_extract( js, "[=%(,&|](...?)%(decodeURIComponent%(.%.s%)%)" )
  493. if not descrambler then
  494. vlc.msg.dbg( "Couldn't extract youtube video URL signature descrambling function name" )
  495. return nil
  496. end
  497. -- Fetch the code of the descrambler function
  498. -- Go=function(a){a=a.split("");Fo.sH(a,2);Fo.TU(a,28);Fo.TU(a,44);Fo.TU(a,26);Fo.TU(a,40);Fo.TU(a,64);Fo.TR(a,26);Fo.sH(a,1);return a.join("")};
  499. local rules = js_extract( js, "^"..descrambler.."=function%([^)]*%){(.-)};" )
  500. if not rules then
  501. vlc.msg.dbg( "Couldn't extract youtube video URL signature descrambling rules" )
  502. return nil
  503. end
  504. -- Get the name of the helper object providing transformation definitions
  505. local helper = string.match( rules, ";(..)%...%(" )
  506. if not helper then
  507. vlc.msg.dbg( "Couldn't extract youtube video URL signature transformation helper name" )
  508. return nil
  509. end
  510. -- Fetch the helper object code
  511. -- var Fo={TR:function(a){a.reverse()},TU:function(a,b){var c=a[0];a[0]=a[b%a.length];a[b]=c},sH:function(a,b){a.splice(0,b)}};
  512. local transformations = js_extract( js, "[ ,]"..helper.."={(.-)};" )
  513. if not transformations then
  514. vlc.msg.dbg( "Couldn't extract youtube video URL signature transformation code" )
  515. return nil
  516. end
  517. -- Parse the helper object to map available transformations
  518. local trans = {}
  519. for meth,code in string.gmatch( transformations, "(..):function%([^)]*%){([^}]*)}" ) do
  520. -- a=a.reverse()
  521. if string.match( code, "%.reverse%(" ) then
  522. trans[meth] = "reverse"
  523. -- a.splice(0,b)
  524. elseif string.match( code, "%.splice%(") then
  525. trans[meth] = "slice"
  526. -- var c=a[0];a[0]=a[b%a.length];a[b]=c
  527. elseif string.match( code, "var c=" ) then
  528. trans[meth] = "swap"
  529. else
  530. vlc.msg.warn("Couldn't parse unknown youtube video URL signature transformation")
  531. end
  532. end
  533. -- Parse descrambling rules, map them to known transformations
  534. -- and apply them on the signature
  535. local missing = false
  536. for meth,idx in string.gmatch( rules, "..%.(..)%([^,]+,(%d+)%)" ) do
  537. idx = tonumber( idx )
  538. if trans[meth] == "reverse" then
  539. sig = string.reverse( sig )
  540. elseif trans[meth] == "slice" then
  541. sig = string.sub( sig, idx + 1 )
  542. elseif trans[meth] == "swap" then
  543. if idx > 1 then
  544. sig = string.gsub( sig, "^(.)("..string.rep( ".", idx - 1 )..")(.)(.*)$", "%3%2%1%4" )
  545. elseif idx == 1 then
  546. sig = string.gsub( sig, "^(.)(.)", "%2%1" )
  547. end
  548. else
  549. vlc.msg.dbg("Couldn't apply unknown youtube video URL signature transformation")
  550. missing = true
  551. end
  552. end
  553. if missing then
  554. vlc.msg.err( "Couldn't process youtube video URL, please check for updates to this script" )
  555. end
  556. return sig
  557. end
  558. -- Parse and assemble video stream URL
  559. function stream_url( params, js )
  560. local url = string.match( params, "url=([^&]+)" )
  561. if not url then
  562. return nil
  563. end
  564. url = vlc.strings.decode_uri( url )
  565. -- Descramble any scrambled signature and append it to URL
  566. local s = string.match( params, "s=([^&]+)" )
  567. if s then
  568. s = vlc.strings.decode_uri( s )
  569. vlc.msg.dbg( "Found "..string.len( s ).."-character scrambled signature for youtube video URL, attempting to descramble... " )
  570. local ds = sig_descramble( s, js )
  571. if not ds then
  572. vlc.msg.dbg( "Couldn't descramble YouTube video URL signature" )
  573. vlc.msg.err( "Couldn't process youtube video URL, please check for updates to this script" )
  574. ds = s
  575. end
  576. local sp = string.match( params, "sp=([^&]+)" )
  577. if not sp then
  578. vlc.msg.warn( "Couldn't extract signature parameters for youtube video URL, guessing" )
  579. sp = "signature"
  580. end
  581. url = url.."&"..sp.."="..vlc.strings.encode_uri_component( ds )
  582. end
  583. return url
  584. end
  585. -- Parse and pick our video stream URL (classic parameters, out of use)
  586. function pick_url( url_map, fmt, js_url )
  587. for stream in string.gmatch( url_map, "[^,]+" ) do
  588. local itag = string.match( stream, "itag=(%d+)" )
  589. if not fmt or not itag or tonumber( itag ) == tonumber( fmt ) then
  590. return nil -- stream_url( stream, js_url )
  591. end
  592. end
  593. return nil
  594. end
  595. -- Pick suitable stream among available formats
  596. function pick_stream( formats, fmt )
  597. if not formats then
  598. return nil
  599. end
  600. -- Remove subobject fields to ease parsing of stream object array
  601. formats = string.gsub( formats, '"[^"]-":{[^{}]-},?', '' )
  602. if tonumber( fmt ) then
  603. -- Legacy match from URL parameter
  604. fmt = tonumber( fmt )
  605. for stream in string.gmatch( formats, '{(.-)}' ) do
  606. local itag = tonumber( string.match( stream, '"itag":(%d+)' ) )
  607. if fmt == itag then
  608. return stream
  609. end
  610. end
  611. return nil
  612. else
  613. -- Compare the different available formats listed with our
  614. -- quality targets
  615. local prefres = vlc.var.inherit( nil, "preferred-resolution" )
  616. local bestres, pick
  617. for stream in string.gmatch( formats, '{(.-)}' ) do
  618. local height = tonumber( string.match( stream, '"height":(%d+)' ) )
  619. -- We have no preference mechanism for audio formats,
  620. -- so just pick the first one
  621. if fmt == "audio" and not height then
  622. return stream
  623. end
  624. -- Better than nothing
  625. if ( not pick and fmt ~= "video" ) or ( height and ( not bestres
  626. -- Better quality within limits
  627. or ( ( prefres < 0 or height <= prefres ) and height > bestres )
  628. -- Lower quality more suited to limits
  629. or ( prefres > -1 and bestres > prefres and height < bestres )
  630. ) ) then
  631. bestres = height
  632. pick = stream
  633. end
  634. end
  635. return pick
  636. end
  637. end
  638. -- Parse and pick our video stream URL (new-style parameters)
  639. function pick_stream_url( muxed, adaptive, js_url, fmt )
  640. -- Shared JavaScript resources - lazy initialization
  641. local js = { url = js_url, stream = nil, lines = {}, i = 0 }
  642. if not js.url then
  643. vlc.msg.warn( "Couldn't extract YouTube JavaScript player code URL, descrambling functions unavailable" )
  644. end
  645. local pick = nil
  646. if tonumber( fmt ) then
  647. -- Specific numeric itag, search in both lists
  648. pick = pick_stream( muxed, fmt )
  649. if not pick then
  650. pick = pick_stream( adaptive, fmt )
  651. end
  652. elseif ( fmt == "audio" or fmt == "video" ) then
  653. -- Specifically audio or video only, no fallback
  654. pick = pick_stream( adaptive, fmt )
  655. else
  656. if fmt == "hd" then
  657. -- Try and leverage full array of adaptive formats
  658. local audio = pick_stream( adaptive, "audio" )
  659. local video = pick_stream( adaptive, "video" )
  660. if audio and video then
  661. local audio_url = assemble_stream_url( audio, js )
  662. local video_url = assemble_stream_url( video, js )
  663. if audio_url and video_url then
  664. return video_url, audio_url
  665. end
  666. end
  667. end
  668. if not pick then
  669. -- Default or fallback: safe old multiplexed streams,
  670. -- but reduced to a single, low-definition format
  671. -- available in some cases
  672. pick = pick_stream( muxed, fmt )
  673. end
  674. end
  675. if not pick then
  676. return nil
  677. end
  678. return assemble_stream_url( pick, js )
  679. end
  680. -- Parse, descramble and assemble elements of video stream URL
  681. function assemble_stream_url( pick, js )
  682. -- 1/ URL signature
  683. -- Either the "url" or the "signatureCipher" parameter is present,
  684. -- depending on whether the URL signature is scrambled.
  685. local url
  686. local cipher = string.match( pick, '"signatureCipher":"(.-)"' )
  687. or string.match( pick, '"[a-zA-Z]*[Cc]ipher":"(.-)"' )
  688. if cipher then
  689. -- Scrambled signature: some assembly required
  690. url = stream_url( cipher, js )
  691. end
  692. if not url then
  693. -- Unscrambled signature, already included in ready-to-use URL
  694. url = string.match( pick, '"url":"(.-)"' )
  695. end
  696. if not url then
  697. return nil
  698. end
  699. -- 2/ Data transfer throttling
  700. -- The "n" parameter is scrambled too, and needs to be descrambled
  701. -- and replaced in place, otherwise the data transfer gets throttled
  702. -- down to between 40 and 80 kB/s, below real-time playability level.
  703. local n = string.match( url, "[?&]n=([^&]+)" )
  704. if n then
  705. n = vlc.strings.decode_uri( n )
  706. local dn = nil -- n_descramble( n, js )
  707. if dn then
  708. url = string.gsub( url, "([?&])n=[^&]+", "%1n="..vlc.strings.encode_uri_component( dn ), 1 )
  709. else
  710. vlc.msg.err( "Couldn't descramble YouTube throttling URL parameter: data transfer will get throttled" )
  711. --vlc.msg.err( "Couldn't process youtube video URL, please check for updates to this script" )
  712. end
  713. end
  714. return url
  715. end
  716. -- Probe function.
  717. function probe()
  718. return ( ( vlc.access == "http" or vlc.access == "https" ) and (
  719. ((
  720. string.match( vlc.path, "^www%.youtube%.com/" )
  721. or string.match( vlc.path, "^music%.youtube%.com/" )
  722. or string.match( vlc.path, "^gaming%.youtube%.com/" ) -- out of use
  723. ) and (
  724. string.match( vlc.path, "/watch%?" ) -- the html page
  725. or string.match( vlc.path, "/live$" ) -- user live stream html page
  726. or string.match( vlc.path, "/live%?" ) -- user live stream html page
  727. or string.match( vlc.path, "/shorts/" ) -- YouTube Shorts HTML page
  728. or string.match( vlc.path, "/get_video_info%?" ) -- info API
  729. or string.match( vlc.path, "/v/" ) -- video in swf player
  730. or string.match( vlc.path, "/embed/" ) -- embedded player iframe
  731. )) or
  732. string.match( vlc.path, "^consent%.youtube%.com/" )
  733. ) )
  734. end
  735. -- Parse function.
  736. function parse()
  737. if string.match( vlc.path, "^consent%.youtube%.com/" ) then
  738. -- Cookie consent redirection
  739. -- Location: https://consent.youtube.com/m?continue=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DXXXXXXXXXXX&gl=FR&m=0&pc=yt&uxe=23983172&hl=fr&src=1
  740. -- Set-Cookie: CONSENT=PENDING+355; expires=Fri, 01-Jan-2038 00:00:00 GMT; path=/; domain=.youtube.com
  741. local url = get_url_param( vlc.path, "continue" )
  742. if not url then
  743. vlc.msg.err( "Couldn't handle YouTube cookie consent redirection, please check for updates to this script or try disabling HTTP cookie forwarding" )
  744. return { }
  745. end
  746. return { { path = vlc.strings.decode_uri( url ), options = { ":no-http-forward-cookies" } } }
  747. elseif not string.match( vlc.path, "^www%.youtube%.com/" ) then
  748. -- Skin subdomain
  749. return { { path = vlc.access.."://"..string.gsub( vlc.path, "^([^/]*)/", "www.youtube.com/" ) } }
  750. elseif string.match( vlc.path, "/watch%?" )
  751. or string.match( vlc.path, "/live$" )
  752. or string.match( vlc.path, "/live%?" )
  753. or string.match( vlc.path, "/shorts/" )
  754. then -- This is the HTML page's URL
  755. local path, path2, title, description, artist, arturl, js_url
  756. -- Retired YouTube API for video format itag parameter,
  757. -- still supported and extended as youtube.lua API
  758. -- https://en.wikipedia.org/w/index.php?title=YouTube&oldid=716878321#Quality_and_formats
  759. local fmt = get_url_param( vlc.path, "fmt" )
  760. while true do
  761. -- The new HTML code layout has fewer and longer lines; always
  762. -- use the long line workaround until we get more visibility.
  763. local line = new_layout and read_long_line() or vlc.readline()
  764. if not line then break end
  765. -- The next line is the major configuration line that we need.
  766. -- It is very long so we need this workaround (see #24957).
  767. if string.match( line, '^ *<div id="player%-api">' ) then
  768. line = read_long_line()
  769. if not line then break end
  770. end
  771. if not title then
  772. local meta = string.match( line, '<meta property="og:title"( .-)>' )
  773. if meta then
  774. title = string.match( meta, ' content="(.-)"' )
  775. if title then
  776. title = vlc.strings.resolve_xml_special_chars( title )
  777. end
  778. end
  779. end
  780. if not description then
  781. -- FIXME: there is another version of this available,
  782. -- without the double JSON string encoding, but we're
  783. -- unlikely to access it due to #24957
  784. description = string.match( line, '\\"shortDescription\\":\\"(.-[^\\])\\"')
  785. if description then
  786. -- FIXME: do this properly (see #24958)
  787. description = string.gsub( description, '\\(["\\/])', '%1' )
  788. else
  789. description = string.match( line, '"shortDescription":"(.-[^\\])"')
  790. end
  791. if description then
  792. if string.match( description, '^"' ) then
  793. description = ""
  794. end
  795. -- FIXME: do this properly (see #24958)
  796. -- This way of unescaping is technically wrong
  797. -- so as little as possible of it should be done
  798. description = string.gsub( description, '\\(["\\/])', '%1' )
  799. description = string.gsub( description, '\\n', '\n' )
  800. description = string.gsub( description, '\\r', '\r' )
  801. description = string.gsub( description, "\\u0026", "&" )
  802. end
  803. end
  804. if not arturl then
  805. local meta = string.match( line, '<meta property="og:image"( .-)>' )
  806. if meta then
  807. arturl = string.match( meta, ' content="(.-)"' )
  808. if arturl then
  809. arturl = vlc.strings.resolve_xml_special_chars( arturl )
  810. end
  811. end
  812. end
  813. if not artist then
  814. artist = string.match(line, '\\"author\\":\\"(.-)\\"')
  815. if artist then
  816. -- FIXME: do this properly (see #24958)
  817. artist = string.gsub( artist, '\\(["\\/])', '%1' )
  818. else
  819. artist = string.match( line, '"author":"(.-)"' )
  820. end
  821. if artist then
  822. -- FIXME: do this properly (see #24958)
  823. artist = string.gsub( artist, "\\u0026", "&" )
  824. end
  825. end
  826. if not new_layout then
  827. if string.match( line, '<script nonce="' ) then
  828. vlc.msg.dbg( "Detected new YouTube HTML code layout" )
  829. new_layout = true
  830. end
  831. end
  832. -- We need this when parsing the main stream configuration;
  833. -- it can indeed be found on that same line (among others).
  834. if not js_url then
  835. js_url = string.match( line, '"jsUrl":"(.-)"' )
  836. or string.match( line, "\"js\": *\"(.-)\"" )
  837. if js_url then
  838. js_url = string.gsub( js_url, "\\/", "/" )
  839. -- Resolve URL
  840. if string.match( js_url, "^/[^/]" ) then
  841. local authority = string.match( vlc.path, "^([^/]*)/" )
  842. js_url = "//"..authority..js_url
  843. end
  844. js_url = string.gsub( js_url, "^//", vlc.access.."://" )
  845. end
  846. end
  847. -- JSON parameters, also formerly known as "swfConfig",
  848. -- "SWF_ARGS", "swfArgs", "PLAYER_CONFIG", "playerConfig",
  849. -- "ytplayer.config" ...
  850. if string.match( line, "ytInitialPlayerResponse ?= ?{" )
  851. or string.match( line, "ytplayer%.config" ) then
  852. -- Classic parameters - out of use since early 2020
  853. if not fmt then
  854. fmt_list = string.match( line, "\"fmt_list\": *\"(.-)\"" )
  855. if fmt_list then
  856. fmt_list = string.gsub( fmt_list, "\\/", "/" )
  857. fmt = get_fmt( fmt_list )
  858. end
  859. end
  860. url_map = string.match( line, "\"url_encoded_fmt_stream_map\": *\"(.-)\"" )
  861. if url_map then
  862. vlc.msg.dbg( "Found classic parameters for youtube video stream, parsing..." )
  863. -- FIXME: do this properly (see #24958)
  864. url_map = string.gsub( url_map, "\\u0026", "&" )
  865. path = pick_url( url_map, fmt, js_url )
  866. end
  867. -- New-style parameters
  868. if not path then
  869. local stream_map = string.match( line, '\\"formats\\":%[(.-)%]' )
  870. if stream_map then
  871. -- FIXME: do this properly (see #24958)
  872. stream_map = string.gsub( stream_map, '\\(["\\/])', '%1' )
  873. else
  874. stream_map = string.match( line, '"formats":%[(.-)%]' )
  875. end
  876. if stream_map then
  877. -- FIXME: do this properly (see #24958)
  878. stream_map = string.gsub( stream_map, "\\u0026", "&" )
  879. end
  880. local adaptive_map = string.match( line, '"adaptiveFormats":%[(.-)%]' )
  881. if adaptive_map then
  882. -- FIXME: do this properly (see #24958)
  883. adaptive_map = string.gsub( adaptive_map, "\\u0026", "&" )
  884. end
  885. if stream_map or adaptive_map then
  886. vlc.msg.dbg( "Found new-style parameters for youtube video stream, parsing..." )
  887. path, path2 = pick_stream_url( stream_map, adaptive_map, js_url, fmt )
  888. end
  889. end
  890. if not path then
  891. -- If this is a live stream, the URL map will be empty
  892. -- and we get the URL from this field instead
  893. local hlsvp = string.match( line, '\\"hlsManifestUrl\\": *\\"(.-)\\"' )
  894. or string.match( line, '"hlsManifestUrl":"(.-)"' )
  895. if hlsvp then
  896. hlsvp = string.gsub( hlsvp, "\\/", "/" )
  897. path = hlsvp
  898. end
  899. end
  900. end
  901. end
  902. if not path then
  903. vlc.msg.err( "Couldn't extract youtube video URL, please check for updates to this script" )
  904. return { }
  905. end
  906. if not arturl then
  907. arturl = get_arturl()
  908. end
  909. local options = { }
  910. if path2 then
  911. table.insert( options, ":input-slave="..path2 )
  912. end
  913. return { { path = path; name = title; description = description; artist = artist; arturl = arturl; options = options } }
  914. elseif string.match( vlc.path, "/get_video_info%?" ) then
  915. -- video info API, retired since summer 2021
  916. -- Replacement Innertube API requires HTTP POST requests
  917. -- and so remains for now unworkable from lua parser scripts
  918. -- (see #26185)
  919. local line = vlc.read( 1024*1024 ) -- data is on one line only
  920. if not line then
  921. vlc.msg.err( "YouTube API output missing" )
  922. return { }
  923. end
  924. local js_url = get_url_param( vlc.path, "jsurl" )
  925. if js_url then
  926. js_url= vlc.strings.decode_uri( js_url )
  927. end
  928. -- Classic parameters - out of use since early 2020
  929. local fmt = get_url_param( vlc.path, "fmt" )
  930. if not fmt then
  931. local fmt_list = string.match( line, "&fmt_list=([^&]*)" )
  932. if fmt_list then
  933. fmt_list = vlc.strings.decode_uri( fmt_list )
  934. fmt = get_fmt( fmt_list )
  935. end
  936. end
  937. local url_map = string.match( line, "&url_encoded_fmt_stream_map=([^&]*)" )
  938. if url_map then
  939. vlc.msg.dbg( "Found classic parameters for youtube video stream, parsing..." )
  940. url_map = vlc.strings.decode_uri( url_map )
  941. path = pick_url( url_map, fmt, js_url )
  942. end
  943. -- New-style parameters
  944. if not path then
  945. local stream_map = string.match( line, '%%22formats%%22%%3A%%5B(.-)%%5D' )
  946. if stream_map then
  947. vlc.msg.dbg( "Found new-style parameters for youtube video stream, parsing..." )
  948. stream_map = vlc.strings.decode_uri( stream_map )
  949. -- FIXME: do this properly (see #24958)
  950. stream_map = string.gsub( stream_map, "\\u0026", "&" )
  951. path = pick_stream_url( stream_map, nil, js_url, fmt )
  952. end
  953. end
  954. if not path then
  955. -- If this is a live stream, the URL map will be empty
  956. -- and we get the URL from this field instead
  957. local hlsvp = string.match( line, "%%22hlsManifestUrl%%22%%3A%%22(.-)%%22" )
  958. if hlsvp then
  959. hlsvp = vlc.strings.decode_uri( hlsvp )
  960. path = hlsvp
  961. end
  962. end
  963. if not path and get_url_param( vlc.path, "el" ) ~= "detailpage" then
  964. -- Retry with the other known value for the "el" parameter;
  965. -- either value has historically been wrong and failed for
  966. -- some videos but not others.
  967. local video_id = get_url_param( vlc.path, "video_id" )
  968. if video_id then
  969. path = vlc.access.."://www.youtube.com/get_video_info?video_id="..video_id.."&el=detailpage"..copy_url_param( vlc.path, "fmt" )..copy_url_param( vlc.path, "jsurl" )
  970. vlc.msg.warn( "Couldn't extract video URL, retrying with alternate YouTube API parameters" )
  971. end
  972. end
  973. if not path then
  974. vlc.msg.err( "Couldn't extract youtube video URL, please check for updates to this script" )
  975. return { }
  976. end
  977. local title = string.match( line, "%%22title%%22%%3A%%22(.-)%%22" )
  978. if title then
  979. title = string.gsub( title, "+", " " )
  980. title = vlc.strings.decode_uri( title )
  981. -- FIXME: do this properly (see #24958)
  982. title = string.gsub( title, "\\u0026", "&" )
  983. end
  984. -- FIXME: description gets truncated if it contains a double quote
  985. local description = string.match( line, "%%22shortDescription%%22%%3A%%22(.-)%%22" )
  986. if description then
  987. description = string.gsub( description, "+", " " )
  988. description = vlc.strings.decode_uri( description )
  989. -- FIXME: do this properly (see #24958)
  990. description = string.gsub( description, '\\(["\\/])', '%1' )
  991. description = string.gsub( description, '\\n', '\n' )
  992. description = string.gsub( description, '\\r', '\r' )
  993. description = string.gsub( description, "\\u0026", "&" )
  994. end
  995. local artist = string.match( line, "%%22author%%22%%3A%%22(.-)%%22" )
  996. if artist then
  997. artist = string.gsub( artist, "+", " " )
  998. artist = vlc.strings.decode_uri( artist )
  999. -- FIXME: do this properly (see #24958)
  1000. artist = string.gsub( artist, "\\u0026", "&" )
  1001. end
  1002. local arturl = string.match( line, "%%22playerMicroformatRenderer%%22%%3A%%7B%%22thumbnail%%22%%3A%%7B%%22thumbnails%%22%%3A%%5B%%7B%%22url%%22%%3A%%22(.-)%%22" )
  1003. if arturl then
  1004. arturl = vlc.strings.decode_uri( arturl )
  1005. end
  1006. return { { path = path, name = title, description = description, artist = artist, arturl = arturl } }
  1007. else -- Other supported URL formats
  1008. local video_id = string.match( vlc.path, "/[^/]+/([^?]*)" )
  1009. if not video_id then
  1010. vlc.msg.err( "Couldn't extract youtube video URL" )
  1011. return { }
  1012. end
  1013. return { { path = vlc.access.."://www.youtube.com/watch?v="..video_id..copy_url_param( vlc.path, "fmt" ) } }
  1014. end
  1015. end