test_networking.py 59 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440
  1. #!/usr/bin/env python3
  2. # Allow direct execution
  3. import os
  4. import sys
  5. import pytest
  6. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  7. import gzip
  8. import http.client
  9. import http.cookiejar
  10. import http.server
  11. import io
  12. import pathlib
  13. import random
  14. import ssl
  15. import tempfile
  16. import threading
  17. import time
  18. import urllib.error
  19. import urllib.request
  20. import warnings
  21. import zlib
  22. from email.message import Message
  23. from http.cookiejar import CookieJar
  24. from test.helper import FakeYDL, http_server_port
  25. from hypervideo_dl.cookies import YoutubeDLCookieJar
  26. from hypervideo_dl.dependencies import brotli
  27. from hypervideo_dl.networking import (
  28. HEADRequest,
  29. PUTRequest,
  30. Request,
  31. RequestDirector,
  32. RequestHandler,
  33. Response,
  34. )
  35. from hypervideo_dl.networking._urllib import UrllibRH
  36. from hypervideo_dl.networking.exceptions import (
  37. CertificateVerifyError,
  38. HTTPError,
  39. IncompleteRead,
  40. NoSupportingHandlers,
  41. RequestError,
  42. SSLError,
  43. TransportError,
  44. UnsupportedRequest,
  45. )
  46. from hypervideo_dl.utils._utils import _YDLLogger as FakeLogger
  47. from hypervideo_dl.utils.networking import HTTPHeaderDict
  48. TEST_DIR = os.path.dirname(os.path.abspath(__file__))
  49. def _build_proxy_handler(name):
  50. class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
  51. proxy_name = name
  52. def log_message(self, format, *args):
  53. pass
  54. def do_GET(self):
  55. self.send_response(200)
  56. self.send_header('Content-Type', 'text/plain; charset=utf-8')
  57. self.end_headers()
  58. self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode())
  59. return HTTPTestRequestHandler
  60. class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
  61. protocol_version = 'HTTP/1.1'
  62. def log_message(self, format, *args):
  63. pass
  64. def _headers(self):
  65. payload = str(self.headers).encode()
  66. self.send_response(200)
  67. self.send_header('Content-Type', 'application/json')
  68. self.send_header('Content-Length', str(len(payload)))
  69. self.end_headers()
  70. self.wfile.write(payload)
  71. def _redirect(self):
  72. self.send_response(int(self.path[len('/redirect_'):]))
  73. self.send_header('Location', '/method')
  74. self.send_header('Content-Length', '0')
  75. self.end_headers()
  76. def _method(self, method, payload=None):
  77. self.send_response(200)
  78. self.send_header('Content-Length', str(len(payload or '')))
  79. self.send_header('Method', method)
  80. self.end_headers()
  81. if payload:
  82. self.wfile.write(payload)
  83. def _status(self, status):
  84. payload = f'<html>{status} NOT FOUND</html>'.encode()
  85. self.send_response(int(status))
  86. self.send_header('Content-Type', 'text/html; charset=utf-8')
  87. self.send_header('Content-Length', str(len(payload)))
  88. self.end_headers()
  89. self.wfile.write(payload)
  90. def _read_data(self):
  91. if 'Content-Length' in self.headers:
  92. return self.rfile.read(int(self.headers['Content-Length']))
  93. def do_POST(self):
  94. data = self._read_data() + str(self.headers).encode()
  95. if self.path.startswith('/redirect_'):
  96. self._redirect()
  97. elif self.path.startswith('/method'):
  98. self._method('POST', data)
  99. elif self.path.startswith('/headers'):
  100. self._headers()
  101. else:
  102. self._status(404)
  103. def do_HEAD(self):
  104. if self.path.startswith('/redirect_'):
  105. self._redirect()
  106. elif self.path.startswith('/method'):
  107. self._method('HEAD')
  108. else:
  109. self._status(404)
  110. def do_PUT(self):
  111. data = self._read_data() + str(self.headers).encode()
  112. if self.path.startswith('/redirect_'):
  113. self._redirect()
  114. elif self.path.startswith('/method'):
  115. self._method('PUT', data)
  116. else:
  117. self._status(404)
  118. def do_GET(self):
  119. if self.path == '/video.html':
  120. payload = b'<html><video src="/vid.mp4" /></html>'
  121. self.send_response(200)
  122. self.send_header('Content-Type', 'text/html; charset=utf-8')
  123. self.send_header('Content-Length', str(len(payload)))
  124. self.end_headers()
  125. self.wfile.write(payload)
  126. elif self.path == '/vid.mp4':
  127. payload = b'\x00\x00\x00\x00\x20\x66\x74[video]'
  128. self.send_response(200)
  129. self.send_header('Content-Type', 'video/mp4')
  130. self.send_header('Content-Length', str(len(payload)))
  131. self.end_headers()
  132. self.wfile.write(payload)
  133. elif self.path == '/%E4%B8%AD%E6%96%87.html':
  134. payload = b'<html><video src="/vid.mp4" /></html>'
  135. self.send_response(200)
  136. self.send_header('Content-Type', 'text/html; charset=utf-8')
  137. self.send_header('Content-Length', str(len(payload)))
  138. self.end_headers()
  139. self.wfile.write(payload)
  140. elif self.path == '/%c7%9f':
  141. payload = b'<html><video src="/vid.mp4" /></html>'
  142. self.send_response(200)
  143. self.send_header('Content-Type', 'text/html; charset=utf-8')
  144. self.send_header('Content-Length', str(len(payload)))
  145. self.end_headers()
  146. self.wfile.write(payload)
  147. elif self.path.startswith('/redirect_loop'):
  148. self.send_response(301)
  149. self.send_header('Location', self.path)
  150. self.send_header('Content-Length', '0')
  151. self.end_headers()
  152. elif self.path == '/redirect_dotsegments':
  153. self.send_response(301)
  154. # redirect to /headers but with dot segments before
  155. self.send_header('Location', '/a/b/./../../headers')
  156. self.send_header('Content-Length', '0')
  157. self.end_headers()
  158. elif self.path.startswith('/redirect_'):
  159. self._redirect()
  160. elif self.path.startswith('/method'):
  161. self._method('GET', str(self.headers).encode())
  162. elif self.path.startswith('/headers'):
  163. self._headers()
  164. elif self.path.startswith('/308-to-headers'):
  165. self.send_response(308)
  166. self.send_header('Location', '/headers')
  167. self.send_header('Content-Length', '0')
  168. self.end_headers()
  169. elif self.path == '/trailing_garbage':
  170. payload = b'<html><video src="/vid.mp4" /></html>'
  171. self.send_response(200)
  172. self.send_header('Content-Type', 'text/html; charset=utf-8')
  173. self.send_header('Content-Encoding', 'gzip')
  174. buf = io.BytesIO()
  175. with gzip.GzipFile(fileobj=buf, mode='wb') as f:
  176. f.write(payload)
  177. compressed = buf.getvalue() + b'trailing garbage'
  178. self.send_header('Content-Length', str(len(compressed)))
  179. self.end_headers()
  180. self.wfile.write(compressed)
  181. elif self.path == '/302-non-ascii-redirect':
  182. new_url = f'http://127.0.0.1:{http_server_port(self.server)}/中文.html'
  183. self.send_response(301)
  184. self.send_header('Location', new_url)
  185. self.send_header('Content-Length', '0')
  186. self.end_headers()
  187. elif self.path == '/content-encoding':
  188. encodings = self.headers.get('ytdl-encoding', '')
  189. payload = b'<html><video src="/vid.mp4" /></html>'
  190. for encoding in filter(None, (e.strip() for e in encodings.split(','))):
  191. if encoding == 'br' and brotli:
  192. payload = brotli.compress(payload)
  193. elif encoding == 'gzip':
  194. buf = io.BytesIO()
  195. with gzip.GzipFile(fileobj=buf, mode='wb') as f:
  196. f.write(payload)
  197. payload = buf.getvalue()
  198. elif encoding == 'deflate':
  199. payload = zlib.compress(payload)
  200. elif encoding == 'unsupported':
  201. payload = b'raw'
  202. break
  203. else:
  204. self._status(415)
  205. return
  206. self.send_response(200)
  207. self.send_header('Content-Encoding', encodings)
  208. self.send_header('Content-Length', str(len(payload)))
  209. self.end_headers()
  210. self.wfile.write(payload)
  211. elif self.path.startswith('/gen_'):
  212. payload = b'<html></html>'
  213. self.send_response(int(self.path[len('/gen_'):]))
  214. self.send_header('Content-Type', 'text/html; charset=utf-8')
  215. self.send_header('Content-Length', str(len(payload)))
  216. self.end_headers()
  217. self.wfile.write(payload)
  218. elif self.path.startswith('/incompleteread'):
  219. payload = b'<html></html>'
  220. self.send_response(200)
  221. self.send_header('Content-Type', 'text/html; charset=utf-8')
  222. self.send_header('Content-Length', '234234')
  223. self.end_headers()
  224. self.wfile.write(payload)
  225. self.finish()
  226. elif self.path.startswith('/timeout_'):
  227. time.sleep(int(self.path[len('/timeout_'):]))
  228. self._headers()
  229. elif self.path == '/source_address':
  230. payload = str(self.client_address[0]).encode()
  231. self.send_response(200)
  232. self.send_header('Content-Type', 'text/html; charset=utf-8')
  233. self.send_header('Content-Length', str(len(payload)))
  234. self.end_headers()
  235. self.wfile.write(payload)
  236. self.finish()
  237. else:
  238. self._status(404)
  239. def send_header(self, keyword, value):
  240. """
  241. Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
  242. This is against what is defined in RFC 3986, however we need to test we support this
  243. since some sites incorrectly do this.
  244. """
  245. if keyword.lower() == 'connection':
  246. return super().send_header(keyword, value)
  247. if not hasattr(self, '_headers_buffer'):
  248. self._headers_buffer = []
  249. self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode())
  250. def validate_and_send(rh, req):
  251. rh.validate(req)
  252. return rh.send(req)
  253. class TestRequestHandlerBase:
  254. @classmethod
  255. def setup_class(cls):
  256. cls.http_httpd = http.server.ThreadingHTTPServer(
  257. ('127.0.0.1', 0), HTTPTestRequestHandler)
  258. cls.http_port = http_server_port(cls.http_httpd)
  259. cls.http_server_thread = threading.Thread(target=cls.http_httpd.serve_forever)
  260. # FIXME: we should probably stop the http server thread after each test
  261. # See: https://github.com/hypervideo/hypervideo/pull/7094#discussion_r1199746041
  262. cls.http_server_thread.daemon = True
  263. cls.http_server_thread.start()
  264. # HTTPS server
  265. certfn = os.path.join(TEST_DIR, 'testcert.pem')
  266. cls.https_httpd = http.server.ThreadingHTTPServer(
  267. ('127.0.0.1', 0), HTTPTestRequestHandler)
  268. sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
  269. sslctx.load_cert_chain(certfn, None)
  270. cls.https_httpd.socket = sslctx.wrap_socket(cls.https_httpd.socket, server_side=True)
  271. cls.https_port = http_server_port(cls.https_httpd)
  272. cls.https_server_thread = threading.Thread(target=cls.https_httpd.serve_forever)
  273. cls.https_server_thread.daemon = True
  274. cls.https_server_thread.start()
  275. class TestHTTPRequestHandler(TestRequestHandlerBase):
  276. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  277. def test_verify_cert(self, handler):
  278. with handler() as rh:
  279. with pytest.raises(CertificateVerifyError):
  280. validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
  281. with handler(verify=False) as rh:
  282. r = validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
  283. assert r.status == 200
  284. r.close()
  285. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  286. def test_ssl_error(self, handler):
  287. # HTTPS server with too old TLS version
  288. # XXX: is there a better way to test this than to create a new server?
  289. https_httpd = http.server.ThreadingHTTPServer(
  290. ('127.0.0.1', 0), HTTPTestRequestHandler)
  291. sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
  292. https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True)
  293. https_port = http_server_port(https_httpd)
  294. https_server_thread = threading.Thread(target=https_httpd.serve_forever)
  295. https_server_thread.daemon = True
  296. https_server_thread.start()
  297. with handler(verify=False) as rh:
  298. with pytest.raises(SSLError, match='sslv3 alert handshake failure') as exc_info:
  299. validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
  300. assert not issubclass(exc_info.type, CertificateVerifyError)
  301. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  302. def test_percent_encode(self, handler):
  303. with handler() as rh:
  304. # Unicode characters should be encoded with uppercase percent-encoding
  305. res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/中文.html'))
  306. assert res.status == 200
  307. res.close()
  308. # don't normalize existing percent encodings
  309. res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/%c7%9f'))
  310. assert res.status == 200
  311. res.close()
  312. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  313. def test_remove_dot_segments(self, handler):
  314. with handler() as rh:
  315. # This isn't a comprehensive test,
  316. # but it should be enough to check whether the handler is removing dot segments
  317. res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/a/b/./../../headers'))
  318. assert res.status == 200
  319. assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
  320. res.close()
  321. res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_dotsegments'))
  322. assert res.status == 200
  323. assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
  324. res.close()
  325. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  326. def test_unicode_path_redirection(self, handler):
  327. with handler() as rh:
  328. r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
  329. assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
  330. r.close()
  331. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  332. def test_raise_http_error(self, handler):
  333. with handler() as rh:
  334. for bad_status in (400, 500, 599, 302):
  335. with pytest.raises(HTTPError):
  336. validate_and_send(rh, Request('http://127.0.0.1:%d/gen_%d' % (self.http_port, bad_status)))
  337. # Should not raise an error
  338. validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close()
  339. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  340. def test_response_url(self, handler):
  341. with handler() as rh:
  342. # Response url should be that of the last url in redirect chain
  343. res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_301'))
  344. assert res.url == f'http://127.0.0.1:{self.http_port}/method'
  345. res.close()
  346. res2 = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200'))
  347. assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200'
  348. res2.close()
  349. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  350. def test_redirect(self, handler):
  351. with handler() as rh:
  352. def do_req(redirect_status, method, assert_no_content=False):
  353. data = b'testdata' if method in ('POST', 'PUT') else None
  354. res = validate_and_send(
  355. rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data))
  356. headers = b''
  357. data_sent = b''
  358. if data is not None:
  359. data_sent += res.read(len(data))
  360. if data_sent != data:
  361. headers += data_sent
  362. data_sent = b''
  363. headers += res.read()
  364. if assert_no_content or data is None:
  365. assert b'Content-Type' not in headers
  366. assert b'Content-Length' not in headers
  367. else:
  368. assert b'Content-Type' in headers
  369. assert b'Content-Length' in headers
  370. return data_sent.decode(), res.headers.get('method', '')
  371. # A 303 must either use GET or HEAD for subsequent request
  372. assert do_req(303, 'POST', True) == ('', 'GET')
  373. assert do_req(303, 'HEAD') == ('', 'HEAD')
  374. assert do_req(303, 'PUT', True) == ('', 'GET')
  375. # 301 and 302 turn POST only into a GET
  376. assert do_req(301, 'POST', True) == ('', 'GET')
  377. assert do_req(301, 'HEAD') == ('', 'HEAD')
  378. assert do_req(302, 'POST', True) == ('', 'GET')
  379. assert do_req(302, 'HEAD') == ('', 'HEAD')
  380. assert do_req(301, 'PUT') == ('testdata', 'PUT')
  381. assert do_req(302, 'PUT') == ('testdata', 'PUT')
  382. # 307 and 308 should not change method
  383. for m in ('POST', 'PUT'):
  384. assert do_req(307, m) == ('testdata', m)
  385. assert do_req(308, m) == ('testdata', m)
  386. assert do_req(307, 'HEAD') == ('', 'HEAD')
  387. assert do_req(308, 'HEAD') == ('', 'HEAD')
  388. # These should not redirect and instead raise an HTTPError
  389. for code in (300, 304, 305, 306):
  390. with pytest.raises(HTTPError):
  391. do_req(code, 'GET')
  392. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  393. def test_request_cookie_header(self, handler):
  394. # We should accept a Cookie header being passed as in normal headers and handle it appropriately.
  395. with handler() as rh:
  396. # Specified Cookie header should be used
  397. res = validate_and_send(
  398. rh, Request(
  399. f'http://127.0.0.1:{self.http_port}/headers',
  400. headers={'Cookie': 'test=test'})).read().decode()
  401. assert 'Cookie: test=test' in res
  402. # Specified Cookie header should be removed on any redirect
  403. res = validate_and_send(
  404. rh, Request(
  405. f'http://127.0.0.1:{self.http_port}/308-to-headers',
  406. headers={'Cookie': 'test=test'})).read().decode()
  407. assert 'Cookie: test=test' not in res
  408. # Specified Cookie header should override global cookiejar for that request
  409. cookiejar = YoutubeDLCookieJar()
  410. cookiejar.set_cookie(http.cookiejar.Cookie(
  411. version=0, name='test', value='ytdlp', port=None, port_specified=False,
  412. domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/',
  413. path_specified=True, secure=False, expires=None, discard=False, comment=None,
  414. comment_url=None, rest={}))
  415. with handler(cookiejar=cookiejar) as rh:
  416. data = validate_and_send(
  417. rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test'})).read()
  418. assert b'Cookie: test=ytdlp' not in data
  419. assert b'Cookie: test=test' in data
  420. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  421. def test_redirect_loop(self, handler):
  422. with handler() as rh:
  423. with pytest.raises(HTTPError, match='redirect loop'):
  424. validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
  425. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  426. def test_incompleteread(self, handler):
  427. with handler(timeout=2) as rh:
  428. with pytest.raises(IncompleteRead):
  429. validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read()
  430. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  431. def test_cookies(self, handler):
  432. cookiejar = YoutubeDLCookieJar()
  433. cookiejar.set_cookie(http.cookiejar.Cookie(
  434. 0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
  435. False, '/headers', True, False, None, False, None, None, {}))
  436. with handler(cookiejar=cookiejar) as rh:
  437. data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
  438. assert b'Cookie: test=ytdlp' in data
  439. # Per request
  440. with handler() as rh:
  441. data = validate_and_send(
  442. rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
  443. assert b'Cookie: test=ytdlp' in data
  444. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  445. def test_headers(self, handler):
  446. with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
  447. # Global Headers
  448. data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
  449. assert b'Test1: test' in data
  450. # Per request headers, merged with global
  451. data = validate_and_send(rh, Request(
  452. f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read()
  453. assert b'Test1: test' in data
  454. assert b'Test2: changed' in data
  455. assert b'Test2: test2' not in data
  456. assert b'Test3: test3' in data
  457. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  458. def test_timeout(self, handler):
  459. with handler() as rh:
  460. # Default timeout is 20 seconds, so this should go through
  461. validate_and_send(
  462. rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_3'))
  463. with handler(timeout=0.5) as rh:
  464. with pytest.raises(TransportError):
  465. validate_and_send(
  466. rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1'))
  467. # Per request timeout, should override handler timeout
  468. validate_and_send(
  469. rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
  470. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  471. def test_source_address(self, handler):
  472. source_address = f'127.0.0.{random.randint(5, 255)}'
  473. with handler(source_address=source_address) as rh:
  474. data = validate_and_send(
  475. rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()
  476. assert source_address == data
  477. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  478. def test_gzip_trailing_garbage(self, handler):
  479. with handler() as rh:
  480. data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
  481. assert data == '<html><video src="/vid.mp4" /></html>'
  482. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  483. @pytest.mark.skipif(not brotli, reason='brotli support is not installed')
  484. def test_brotli(self, handler):
  485. with handler() as rh:
  486. res = validate_and_send(
  487. rh, Request(
  488. f'http://127.0.0.1:{self.http_port}/content-encoding',
  489. headers={'ytdl-encoding': 'br'}))
  490. assert res.headers.get('Content-Encoding') == 'br'
  491. assert res.read() == b'<html><video src="/vid.mp4" /></html>'
  492. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  493. def test_deflate(self, handler):
  494. with handler() as rh:
  495. res = validate_and_send(
  496. rh, Request(
  497. f'http://127.0.0.1:{self.http_port}/content-encoding',
  498. headers={'ytdl-encoding': 'deflate'}))
  499. assert res.headers.get('Content-Encoding') == 'deflate'
  500. assert res.read() == b'<html><video src="/vid.mp4" /></html>'
  501. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  502. def test_gzip(self, handler):
  503. with handler() as rh:
  504. res = validate_and_send(
  505. rh, Request(
  506. f'http://127.0.0.1:{self.http_port}/content-encoding',
  507. headers={'ytdl-encoding': 'gzip'}))
  508. assert res.headers.get('Content-Encoding') == 'gzip'
  509. assert res.read() == b'<html><video src="/vid.mp4" /></html>'
  510. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  511. def test_multiple_encodings(self, handler):
  512. with handler() as rh:
  513. for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
  514. res = validate_and_send(
  515. rh, Request(
  516. f'http://127.0.0.1:{self.http_port}/content-encoding',
  517. headers={'ytdl-encoding': pair}))
  518. assert res.headers.get('Content-Encoding') == pair
  519. assert res.read() == b'<html><video src="/vid.mp4" /></html>'
  520. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  521. def test_unsupported_encoding(self, handler):
  522. with handler() as rh:
  523. res = validate_and_send(
  524. rh, Request(
  525. f'http://127.0.0.1:{self.http_port}/content-encoding',
  526. headers={'ytdl-encoding': 'unsupported'}))
  527. assert res.headers.get('Content-Encoding') == 'unsupported'
  528. assert res.read() == b'raw'
  529. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  530. def test_read(self, handler):
  531. with handler() as rh:
  532. res = validate_and_send(
  533. rh, Request(f'http://127.0.0.1:{self.http_port}/headers'))
  534. assert res.readable()
  535. assert res.read(1) == b'H'
  536. assert res.read(3) == b'ost'
  537. class TestHTTPProxy(TestRequestHandlerBase):
  538. @classmethod
  539. def setup_class(cls):
  540. super().setup_class()
  541. # HTTP Proxy server
  542. cls.proxy = http.server.ThreadingHTTPServer(
  543. ('127.0.0.1', 0), _build_proxy_handler('normal'))
  544. cls.proxy_port = http_server_port(cls.proxy)
  545. cls.proxy_thread = threading.Thread(target=cls.proxy.serve_forever)
  546. cls.proxy_thread.daemon = True
  547. cls.proxy_thread.start()
  548. # Geo proxy server
  549. cls.geo_proxy = http.server.ThreadingHTTPServer(
  550. ('127.0.0.1', 0), _build_proxy_handler('geo'))
  551. cls.geo_port = http_server_port(cls.geo_proxy)
  552. cls.geo_proxy_thread = threading.Thread(target=cls.geo_proxy.serve_forever)
  553. cls.geo_proxy_thread.daemon = True
  554. cls.geo_proxy_thread.start()
  555. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  556. def test_http_proxy(self, handler):
  557. http_proxy = f'http://127.0.0.1:{self.proxy_port}'
  558. geo_proxy = f'http://127.0.0.1:{self.geo_port}'
  559. # Test global http proxy
  560. # Test per request http proxy
  561. # Test per request http proxy disables proxy
  562. url = 'http://foo.com/bar'
  563. # Global HTTP proxy
  564. with handler(proxies={'http': http_proxy}) as rh:
  565. res = validate_and_send(rh, Request(url)).read().decode()
  566. assert res == f'normal: {url}'
  567. # Per request proxy overrides global
  568. res = validate_and_send(rh, Request(url, proxies={'http': geo_proxy})).read().decode()
  569. assert res == f'geo: {url}'
  570. # and setting to None disables all proxies for that request
  571. real_url = f'http://127.0.0.1:{self.http_port}/headers'
  572. res = validate_and_send(
  573. rh, Request(real_url, proxies={'http': None})).read().decode()
  574. assert res != f'normal: {real_url}'
  575. assert 'Accept' in res
  576. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  577. def test_noproxy(self, handler):
  578. with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh:
  579. # NO_PROXY
  580. for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
  581. nop_response = validate_and_send(
  582. rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy})).read().decode(
  583. 'utf-8')
  584. assert 'Accept' in nop_response
  585. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  586. def test_allproxy(self, handler):
  587. url = 'http://foo.com/bar'
  588. with handler() as rh:
  589. response = validate_and_send(rh, Request(url, proxies={'all': f'http://127.0.0.1:{self.proxy_port}'})).read().decode(
  590. 'utf-8')
  591. assert response == f'normal: {url}'
  592. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  593. def test_http_proxy_with_idn(self, handler):
  594. with handler(proxies={
  595. 'http': f'http://127.0.0.1:{self.proxy_port}',
  596. }) as rh:
  597. url = 'http://中文.tw/'
  598. response = rh.send(Request(url)).read().decode()
  599. # b'xn--fiq228c' is '中文'.encode('idna')
  600. assert response == 'normal: http://xn--fiq228c.tw/'
  601. class TestClientCertificate:
  602. @classmethod
  603. def setup_class(cls):
  604. certfn = os.path.join(TEST_DIR, 'testcert.pem')
  605. cls.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate')
  606. cacertfn = os.path.join(cls.certdir, 'ca.crt')
  607. cls.httpd = http.server.ThreadingHTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler)
  608. sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
  609. sslctx.verify_mode = ssl.CERT_REQUIRED
  610. sslctx.load_verify_locations(cafile=cacertfn)
  611. sslctx.load_cert_chain(certfn, None)
  612. cls.httpd.socket = sslctx.wrap_socket(cls.httpd.socket, server_side=True)
  613. cls.port = http_server_port(cls.httpd)
  614. cls.server_thread = threading.Thread(target=cls.httpd.serve_forever)
  615. cls.server_thread.daemon = True
  616. cls.server_thread.start()
  617. def _run_test(self, handler, **handler_kwargs):
  618. with handler(
  619. # Disable client-side validation of unacceptable self-signed testcert.pem
  620. # The test is of a check on the server side, so unaffected
  621. verify=False,
  622. **handler_kwargs,
  623. ) as rh:
  624. validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
  625. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  626. def test_certificate_combined_nopass(self, handler):
  627. self._run_test(handler, client_cert={
  628. 'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
  629. })
  630. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  631. def test_certificate_nocombined_nopass(self, handler):
  632. self._run_test(handler, client_cert={
  633. 'client_certificate': os.path.join(self.certdir, 'client.crt'),
  634. 'client_certificate_key': os.path.join(self.certdir, 'client.key'),
  635. })
  636. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  637. def test_certificate_combined_pass(self, handler):
  638. self._run_test(handler, client_cert={
  639. 'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
  640. 'client_certificate_password': 'foobar',
  641. })
  642. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  643. def test_certificate_nocombined_pass(self, handler):
  644. self._run_test(handler, client_cert={
  645. 'client_certificate': os.path.join(self.certdir, 'client.crt'),
  646. 'client_certificate_key': os.path.join(self.certdir, 'clientencrypted.key'),
  647. 'client_certificate_password': 'foobar',
  648. })
  649. class TestUrllibRequestHandler(TestRequestHandlerBase):
  650. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  651. def test_file_urls(self, handler):
  652. # See https://github.com/ytdl-org/youtube-dl/issues/8227
  653. tf = tempfile.NamedTemporaryFile(delete=False)
  654. tf.write(b'foobar')
  655. tf.close()
  656. req = Request(pathlib.Path(tf.name).as_uri())
  657. with handler() as rh:
  658. with pytest.raises(UnsupportedRequest):
  659. rh.validate(req)
  660. # Test that urllib never loaded FileHandler
  661. with pytest.raises(TransportError):
  662. rh.send(req)
  663. with handler(enable_file_urls=True) as rh:
  664. res = validate_and_send(rh, req)
  665. assert res.read() == b'foobar'
  666. res.close()
  667. os.unlink(tf.name)
  668. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  669. def test_http_error_returns_content(self, handler):
  670. # urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
  671. def get_response():
  672. with handler() as rh:
  673. # headers url
  674. try:
  675. validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_404'))
  676. except HTTPError as e:
  677. return e.response
  678. assert get_response().read() == b'<html></html>'
  679. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  680. def test_verify_cert_error_text(self, handler):
  681. # Check the output of the error message
  682. with handler() as rh:
  683. with pytest.raises(
  684. CertificateVerifyError,
  685. match=r'\[SSL: CERTIFICATE_VERIFY_FAILED\] certificate verify failed: self.signed certificate'
  686. ):
  687. validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
  688. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  689. @pytest.mark.parametrize('req,match,version_check', [
  690. # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
  691. # bpo-39603: Check implemented in 3.7.9+, 3.8.5+
  692. (
  693. Request('http://127.0.0.1', method='GET\n'),
  694. 'method can\'t contain control characters',
  695. lambda v: v < (3, 7, 9) or (3, 8, 0) <= v < (3, 8, 5)
  696. ),
  697. # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1265
  698. # bpo-38576: Check implemented in 3.7.8+, 3.8.3+
  699. (
  700. Request('http://127.0.0. 1', method='GET'),
  701. 'URL can\'t contain control characters',
  702. lambda v: v < (3, 7, 8) or (3, 8, 0) <= v < (3, 8, 3)
  703. ),
  704. # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1288C31-L1288C50
  705. (Request('http://127.0.0.1', headers={'foo\n': 'bar'}), 'Invalid header name', None),
  706. ])
  707. def test_httplib_validation_errors(self, handler, req, match, version_check):
  708. if version_check and version_check(sys.version_info):
  709. pytest.skip(f'Python {sys.version} version does not have the required validation for this test.')
  710. with handler() as rh:
  711. with pytest.raises(RequestError, match=match) as exc_info:
  712. validate_and_send(rh, req)
  713. assert not isinstance(exc_info.value, TransportError)
  714. def run_validation(handler, error, req, **handler_kwargs):
  715. with handler(**handler_kwargs) as rh:
  716. if error:
  717. with pytest.raises(error):
  718. rh.validate(req)
  719. else:
  720. rh.validate(req)
  721. class TestRequestHandlerValidation:
  722. class ValidationRH(RequestHandler):
  723. def _send(self, request):
  724. raise RequestError('test')
  725. class NoCheckRH(ValidationRH):
  726. _SUPPORTED_FEATURES = None
  727. _SUPPORTED_PROXY_SCHEMES = None
  728. _SUPPORTED_URL_SCHEMES = None
  729. def _check_extensions(self, extensions):
  730. extensions.clear()
  731. class HTTPSupportedRH(ValidationRH):
  732. _SUPPORTED_URL_SCHEMES = ('http',)
  733. URL_SCHEME_TESTS = [
  734. # scheme, expected to fail, handler kwargs
  735. ('Urllib', [
  736. ('http', False, {}),
  737. ('https', False, {}),
  738. ('data', False, {}),
  739. ('ftp', False, {}),
  740. ('file', UnsupportedRequest, {}),
  741. ('file', False, {'enable_file_urls': True}),
  742. ]),
  743. (NoCheckRH, [('http', False, {})]),
  744. (ValidationRH, [('http', UnsupportedRequest, {})])
  745. ]
  746. PROXY_SCHEME_TESTS = [
  747. # scheme, expected to fail
  748. ('Urllib', [
  749. ('http', False),
  750. ('https', UnsupportedRequest),
  751. ('socks4', False),
  752. ('socks4a', False),
  753. ('socks5', False),
  754. ('socks5h', False),
  755. ('socks', UnsupportedRequest),
  756. ]),
  757. (NoCheckRH, [('http', False)]),
  758. (HTTPSupportedRH, [('http', UnsupportedRequest)]),
  759. ]
  760. PROXY_KEY_TESTS = [
  761. # key, expected to fail
  762. ('Urllib', [
  763. ('all', False),
  764. ('unrelated', False),
  765. ]),
  766. (NoCheckRH, [('all', False)]),
  767. (HTTPSupportedRH, [('all', UnsupportedRequest)]),
  768. (HTTPSupportedRH, [('no', UnsupportedRequest)]),
  769. ]
  770. EXTENSION_TESTS = [
  771. ('Urllib', [
  772. ({'cookiejar': 'notacookiejar'}, AssertionError),
  773. ({'cookiejar': YoutubeDLCookieJar()}, False),
  774. ({'cookiejar': CookieJar()}, AssertionError),
  775. ({'timeout': 1}, False),
  776. ({'timeout': 'notatimeout'}, AssertionError),
  777. ({'unsupported': 'value'}, UnsupportedRequest),
  778. ]),
  779. (NoCheckRH, [
  780. ({'cookiejar': 'notacookiejar'}, False),
  781. ({'somerandom': 'test'}, False), # but any extension is allowed through
  782. ]),
  783. ]
  784. @pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
  785. (handler_tests[0], scheme, fail, handler_kwargs)
  786. for handler_tests in URL_SCHEME_TESTS
  787. for scheme, fail, handler_kwargs in handler_tests[1]
  788. ], indirect=['handler'])
  789. def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
  790. run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
  791. @pytest.mark.parametrize('handler,fail', [('Urllib', False)], indirect=['handler'])
  792. def test_no_proxy(self, handler, fail):
  793. run_validation(handler, fail, Request('http://', proxies={'no': '127.0.0.1,github.com'}))
  794. run_validation(handler, fail, Request('http://'), proxies={'no': '127.0.0.1,github.com'})
  795. @pytest.mark.parametrize('handler,proxy_key,fail', [
  796. (handler_tests[0], proxy_key, fail)
  797. for handler_tests in PROXY_KEY_TESTS
  798. for proxy_key, fail in handler_tests[1]
  799. ], indirect=['handler'])
  800. def test_proxy_key(self, handler, proxy_key, fail):
  801. run_validation(handler, fail, Request('http://', proxies={proxy_key: 'http://example.com'}))
  802. run_validation(handler, fail, Request('http://'), proxies={proxy_key: 'http://example.com'})
  803. @pytest.mark.parametrize('handler,scheme,fail', [
  804. (handler_tests[0], scheme, fail)
  805. for handler_tests in PROXY_SCHEME_TESTS
  806. for scheme, fail in handler_tests[1]
  807. ], indirect=['handler'])
  808. def test_proxy_scheme(self, handler, scheme, fail):
  809. run_validation(handler, fail, Request('http://', proxies={'http': f'{scheme}://example.com'}))
  810. run_validation(handler, fail, Request('http://'), proxies={'http': f'{scheme}://example.com'})
  811. @pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH], indirect=True)
  812. def test_empty_proxy(self, handler):
  813. run_validation(handler, False, Request('http://', proxies={'http': None}))
  814. run_validation(handler, False, Request('http://'), proxies={'http': None})
  815. @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
  816. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  817. def test_invalid_proxy_url(self, handler, proxy_url):
  818. run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url}))
  819. @pytest.mark.parametrize('handler,extensions,fail', [
  820. (handler_tests[0], extensions, fail)
  821. for handler_tests in EXTENSION_TESTS
  822. for extensions, fail in handler_tests[1]
  823. ], indirect=['handler'])
  824. def test_extension(self, handler, extensions, fail):
  825. run_validation(
  826. handler, fail, Request('http://', extensions=extensions))
  827. def test_invalid_request_type(self):
  828. rh = self.ValidationRH(logger=FakeLogger())
  829. for method in (rh.validate, rh.send):
  830. with pytest.raises(TypeError, match='Expected an instance of Request'):
  831. method('not a request')
  832. class FakeResponse(Response):
  833. def __init__(self, request):
  834. # XXX: we could make request part of standard response interface
  835. self.request = request
  836. super().__init__(fp=io.BytesIO(b''), headers={}, url=request.url)
  837. class FakeRH(RequestHandler):
  838. def _validate(self, request):
  839. return
  840. def _send(self, request: Request):
  841. if request.url.startswith('ssl://'):
  842. raise SSLError(request.url[len('ssl://'):])
  843. return FakeResponse(request)
  844. class FakeRHYDL(FakeYDL):
  845. def __init__(self, *args, **kwargs):
  846. super().__init__(*args, **kwargs)
  847. self._request_director = self.build_request_director([FakeRH])
  848. class TestRequestDirector:
  849. def test_handler_operations(self):
  850. director = RequestDirector(logger=FakeLogger())
  851. handler = FakeRH(logger=FakeLogger())
  852. director.add_handler(handler)
  853. assert director.handlers.get(FakeRH.RH_KEY) is handler
  854. # Handler should overwrite
  855. handler2 = FakeRH(logger=FakeLogger())
  856. director.add_handler(handler2)
  857. assert director.handlers.get(FakeRH.RH_KEY) is not handler
  858. assert director.handlers.get(FakeRH.RH_KEY) is handler2
  859. assert len(director.handlers) == 1
  860. class AnotherFakeRH(FakeRH):
  861. pass
  862. director.add_handler(AnotherFakeRH(logger=FakeLogger()))
  863. assert len(director.handlers) == 2
  864. assert director.handlers.get(AnotherFakeRH.RH_KEY).RH_KEY == AnotherFakeRH.RH_KEY
  865. director.handlers.pop(FakeRH.RH_KEY, None)
  866. assert director.handlers.get(FakeRH.RH_KEY) is None
  867. assert len(director.handlers) == 1
  868. # RequestErrors should passthrough
  869. with pytest.raises(SSLError):
  870. director.send(Request('ssl://something'))
  871. def test_send(self):
  872. director = RequestDirector(logger=FakeLogger())
  873. with pytest.raises(RequestError):
  874. director.send(Request('any://'))
  875. director.add_handler(FakeRH(logger=FakeLogger()))
  876. assert isinstance(director.send(Request('http://')), FakeResponse)
  877. def test_unsupported_handlers(self):
  878. class SupportedRH(RequestHandler):
  879. _SUPPORTED_URL_SCHEMES = ['http']
  880. def _send(self, request: Request):
  881. return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
  882. director = RequestDirector(logger=FakeLogger())
  883. director.add_handler(SupportedRH(logger=FakeLogger()))
  884. director.add_handler(FakeRH(logger=FakeLogger()))
  885. # First should take preference
  886. assert director.send(Request('http://')).read() == b'supported'
  887. assert director.send(Request('any://')).read() == b''
  888. director.handlers.pop(FakeRH.RH_KEY)
  889. with pytest.raises(NoSupportingHandlers):
  890. director.send(Request('any://'))
  891. def test_unexpected_error(self):
  892. director = RequestDirector(logger=FakeLogger())
  893. class UnexpectedRH(FakeRH):
  894. def _send(self, request: Request):
  895. raise TypeError('something')
  896. director.add_handler(UnexpectedRH(logger=FakeLogger))
  897. with pytest.raises(NoSupportingHandlers, match=r'1 unexpected error'):
  898. director.send(Request('any://'))
  899. director.handlers.clear()
  900. assert len(director.handlers) == 0
  901. # Should not be fatal
  902. director.add_handler(FakeRH(logger=FakeLogger()))
  903. director.add_handler(UnexpectedRH(logger=FakeLogger))
  904. assert director.send(Request('any://'))
  905. def test_preference(self):
  906. director = RequestDirector(logger=FakeLogger())
  907. director.add_handler(FakeRH(logger=FakeLogger()))
  908. class SomeRH(RequestHandler):
  909. _SUPPORTED_URL_SCHEMES = ['http']
  910. def _send(self, request: Request):
  911. return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
  912. def some_preference(rh, request):
  913. return (0 if not isinstance(rh, SomeRH)
  914. else 100 if 'prefer' in request.headers
  915. else -1)
  916. director.add_handler(SomeRH(logger=FakeLogger()))
  917. director.preferences.add(some_preference)
  918. assert director.send(Request('http://')).read() == b''
  919. assert director.send(Request('http://', headers={'prefer': '1'})).read() == b'supported'
  920. # XXX: do we want to move this to test_YoutubeDL.py?
  921. class TestYoutubeDLNetworking:
  922. @staticmethod
  923. def build_handler(ydl, handler: RequestHandler = FakeRH):
  924. return ydl.build_request_director([handler]).handlers.get(handler.RH_KEY)
  925. def test_compat_opener(self):
  926. with FakeYDL() as ydl:
  927. with warnings.catch_warnings():
  928. warnings.simplefilter('ignore', category=DeprecationWarning)
  929. assert isinstance(ydl._opener, urllib.request.OpenerDirector)
  930. @pytest.mark.parametrize('proxy,expected', [
  931. ('http://127.0.0.1:8080', {'all': 'http://127.0.0.1:8080'}),
  932. ('', {'all': '__noproxy__'}),
  933. (None, {'http': 'http://127.0.0.1:8081', 'https': 'http://127.0.0.1:8081'}) # env, set https
  934. ])
  935. def test_proxy(self, proxy, expected):
  936. old_http_proxy = os.environ.get('HTTP_PROXY')
  937. try:
  938. os.environ['HTTP_PROXY'] = 'http://127.0.0.1:8081' # ensure that provided proxies override env
  939. with FakeYDL({'proxy': proxy}) as ydl:
  940. assert ydl.proxies == expected
  941. finally:
  942. if old_http_proxy:
  943. os.environ['HTTP_PROXY'] = old_http_proxy
  944. def test_compat_request(self):
  945. with FakeRHYDL() as ydl:
  946. assert ydl.urlopen('test://')
  947. urllib_req = urllib.request.Request('http://foo.bar', data=b'test', method='PUT', headers={'X-Test': '1'})
  948. urllib_req.add_unredirected_header('Cookie', 'bob=bob')
  949. urllib_req.timeout = 2
  950. with warnings.catch_warnings():
  951. warnings.simplefilter('ignore', category=DeprecationWarning)
  952. req = ydl.urlopen(urllib_req).request
  953. assert req.url == urllib_req.get_full_url()
  954. assert req.data == urllib_req.data
  955. assert req.method == urllib_req.get_method()
  956. assert 'X-Test' in req.headers
  957. assert 'Cookie' in req.headers
  958. assert req.extensions.get('timeout') == 2
  959. with pytest.raises(AssertionError):
  960. ydl.urlopen(None)
  961. def test_extract_basic_auth(self):
  962. with FakeRHYDL() as ydl:
  963. res = ydl.urlopen(Request('http://user:pass@foo.bar'))
  964. assert res.request.headers['Authorization'] == 'Basic dXNlcjpwYXNz'
  965. def test_sanitize_url(self):
  966. with FakeRHYDL() as ydl:
  967. res = ydl.urlopen(Request('httpss://foo.bar'))
  968. assert res.request.url == 'https://foo.bar'
  969. def test_file_urls_error(self):
  970. # use urllib handler
  971. with FakeYDL() as ydl:
  972. with pytest.raises(RequestError, match=r'file:// URLs are disabled by default'):
  973. ydl.urlopen('file://')
  974. def test_legacy_server_connect_error(self):
  975. with FakeRHYDL() as ydl:
  976. for error in ('UNSAFE_LEGACY_RENEGOTIATION_DISABLED', 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
  977. with pytest.raises(RequestError, match=r'Try using --legacy-server-connect'):
  978. ydl.urlopen(f'ssl://{error}')
  979. with pytest.raises(SSLError, match='testerror'):
  980. ydl.urlopen('ssl://testerror')
  981. @pytest.mark.parametrize('proxy_key,proxy_url,expected', [
  982. ('http', '__noproxy__', None),
  983. ('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'),
  984. ('https', 'example.com', 'http://example.com'),
  985. ('https', '//example.com', 'http://example.com'),
  986. ('https', 'socks5://example.com', 'socks5h://example.com'),
  987. ('http', 'socks://example.com', 'socks4://example.com'),
  988. ('http', 'socks4://example.com', 'socks4://example.com'),
  989. ('unrelated', '/bad/proxy', '/bad/proxy'), # clean_proxies should ignore bad proxies
  990. ])
  991. def test_clean_proxy(self, proxy_key, proxy_url, expected):
  992. # proxies should be cleaned in urlopen()
  993. with FakeRHYDL() as ydl:
  994. req = ydl.urlopen(Request('test://', proxies={proxy_key: proxy_url})).request
  995. assert req.proxies[proxy_key] == expected
  996. # and should also be cleaned when building the handler
  997. env_key = f'{proxy_key.upper()}_PROXY'
  998. old_env_proxy = os.environ.get(env_key)
  999. try:
  1000. os.environ[env_key] = proxy_url # ensure that provided proxies override env
  1001. with FakeYDL() as ydl:
  1002. rh = self.build_handler(ydl)
  1003. assert rh.proxies[proxy_key] == expected
  1004. finally:
  1005. if old_env_proxy:
  1006. os.environ[env_key] = old_env_proxy
  1007. def test_clean_proxy_header(self):
  1008. with FakeRHYDL() as ydl:
  1009. req = ydl.urlopen(Request('test://', headers={'ytdl-request-proxy': '//foo.bar'})).request
  1010. assert 'ytdl-request-proxy' not in req.headers
  1011. assert req.proxies == {'all': 'http://foo.bar'}
  1012. with FakeYDL({'http_headers': {'ytdl-request-proxy': '//foo.bar'}}) as ydl:
  1013. rh = self.build_handler(ydl)
  1014. assert 'ytdl-request-proxy' not in rh.headers
  1015. assert rh.proxies == {'all': 'http://foo.bar'}
  1016. def test_clean_header(self):
  1017. with FakeRHYDL() as ydl:
  1018. res = ydl.urlopen(Request('test://', headers={'Youtubedl-no-compression': True}))
  1019. assert 'Youtubedl-no-compression' not in res.request.headers
  1020. assert res.request.headers.get('Accept-Encoding') == 'identity'
  1021. with FakeYDL({'http_headers': {'Youtubedl-no-compression': True}}) as ydl:
  1022. rh = self.build_handler(ydl)
  1023. assert 'Youtubedl-no-compression' not in rh.headers
  1024. assert rh.headers.get('Accept-Encoding') == 'identity'
  1025. def test_build_handler_params(self):
  1026. with FakeYDL({
  1027. 'http_headers': {'test': 'testtest'},
  1028. 'socket_timeout': 2,
  1029. 'proxy': 'http://127.0.0.1:8080',
  1030. 'source_address': '127.0.0.45',
  1031. 'debug_printtraffic': True,
  1032. 'compat_opts': ['no-certifi'],
  1033. 'nocheckcertificate': True,
  1034. 'legacyserverconnect': True,
  1035. }) as ydl:
  1036. rh = self.build_handler(ydl)
  1037. assert rh.headers.get('test') == 'testtest'
  1038. assert 'Accept' in rh.headers # ensure std_headers are still there
  1039. assert rh.timeout == 2
  1040. assert rh.proxies.get('all') == 'http://127.0.0.1:8080'
  1041. assert rh.source_address == '127.0.0.45'
  1042. assert rh.verbose is True
  1043. assert rh.prefer_system_certs is True
  1044. assert rh.verify is False
  1045. assert rh.legacy_ssl_support is True
  1046. @pytest.mark.parametrize('ydl_params', [
  1047. {'client_certificate': 'fakecert.crt'},
  1048. {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key'},
  1049. {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
  1050. {'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
  1051. ])
  1052. def test_client_certificate(self, ydl_params):
  1053. with FakeYDL(ydl_params) as ydl:
  1054. rh = self.build_handler(ydl)
  1055. assert rh._client_cert == ydl_params # XXX: Too bound to implementation
  1056. def test_urllib_file_urls(self):
  1057. with FakeYDL({'enable_file_urls': False}) as ydl:
  1058. rh = self.build_handler(ydl, UrllibRH)
  1059. assert rh.enable_file_urls is False
  1060. with FakeYDL({'enable_file_urls': True}) as ydl:
  1061. rh = self.build_handler(ydl, UrllibRH)
  1062. assert rh.enable_file_urls is True
  1063. class TestRequest:
  1064. def test_query(self):
  1065. req = Request('http://example.com?q=something', query={'v': 'xyz'})
  1066. assert req.url == 'http://example.com?q=something&v=xyz'
  1067. req.update(query={'v': '123'})
  1068. assert req.url == 'http://example.com?q=something&v=123'
  1069. req.update(url='http://example.com', query={'v': 'xyz'})
  1070. assert req.url == 'http://example.com?v=xyz'
  1071. def test_method(self):
  1072. req = Request('http://example.com')
  1073. assert req.method == 'GET'
  1074. req.data = b'test'
  1075. assert req.method == 'POST'
  1076. req.data = None
  1077. assert req.method == 'GET'
  1078. req.data = b'test2'
  1079. req.method = 'PUT'
  1080. assert req.method == 'PUT'
  1081. req.data = None
  1082. assert req.method == 'PUT'
  1083. with pytest.raises(TypeError):
  1084. req.method = 1
  1085. def test_request_helpers(self):
  1086. assert HEADRequest('http://example.com').method == 'HEAD'
  1087. assert PUTRequest('http://example.com').method == 'PUT'
  1088. def test_headers(self):
  1089. req = Request('http://example.com', headers={'tesT': 'test'})
  1090. assert req.headers == HTTPHeaderDict({'test': 'test'})
  1091. req.update(headers={'teSt2': 'test2'})
  1092. assert req.headers == HTTPHeaderDict({'test': 'test', 'test2': 'test2'})
  1093. req.headers = new_headers = HTTPHeaderDict({'test': 'test'})
  1094. assert req.headers == HTTPHeaderDict({'test': 'test'})
  1095. assert req.headers is new_headers
  1096. # test converts dict to case insensitive dict
  1097. req.headers = new_headers = {'test2': 'test2'}
  1098. assert isinstance(req.headers, HTTPHeaderDict)
  1099. assert req.headers is not new_headers
  1100. with pytest.raises(TypeError):
  1101. req.headers = None
  1102. def test_data_type(self):
  1103. req = Request('http://example.com')
  1104. assert req.data is None
  1105. # test bytes is allowed
  1106. req.data = b'test'
  1107. assert req.data == b'test'
  1108. # test iterable of bytes is allowed
  1109. i = [b'test', b'test2']
  1110. req.data = i
  1111. assert req.data == i
  1112. # test file-like object is allowed
  1113. f = io.BytesIO(b'test')
  1114. req.data = f
  1115. assert req.data == f
  1116. # common mistake: test str not allowed
  1117. with pytest.raises(TypeError):
  1118. req.data = 'test'
  1119. assert req.data != 'test'
  1120. # common mistake: test dict is not allowed
  1121. with pytest.raises(TypeError):
  1122. req.data = {'test': 'test'}
  1123. assert req.data != {'test': 'test'}
  1124. def test_content_length_header(self):
  1125. req = Request('http://example.com', headers={'Content-Length': '0'}, data=b'')
  1126. assert req.headers.get('Content-Length') == '0'
  1127. req.data = b'test'
  1128. assert 'Content-Length' not in req.headers
  1129. req = Request('http://example.com', headers={'Content-Length': '10'})
  1130. assert 'Content-Length' not in req.headers
  1131. def test_content_type_header(self):
  1132. req = Request('http://example.com', headers={'Content-Type': 'test'}, data=b'test')
  1133. assert req.headers.get('Content-Type') == 'test'
  1134. req.data = b'test2'
  1135. assert req.headers.get('Content-Type') == 'test'
  1136. req.data = None
  1137. assert 'Content-Type' not in req.headers
  1138. req.data = b'test3'
  1139. assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
  1140. def test_update_req(self):
  1141. req = Request('http://example.com')
  1142. assert req.data is None
  1143. assert req.method == 'GET'
  1144. assert 'Content-Type' not in req.headers
  1145. # Test that zero-byte payloads will be sent
  1146. req.update(data=b'')
  1147. assert req.data == b''
  1148. assert req.method == 'POST'
  1149. assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
  1150. def test_proxies(self):
  1151. req = Request(url='http://example.com', proxies={'http': 'http://127.0.0.1:8080'})
  1152. assert req.proxies == {'http': 'http://127.0.0.1:8080'}
  1153. def test_extensions(self):
  1154. req = Request(url='http://example.com', extensions={'timeout': 2})
  1155. assert req.extensions == {'timeout': 2}
  1156. def test_copy(self):
  1157. req = Request(
  1158. url='http://example.com',
  1159. extensions={'cookiejar': CookieJar()},
  1160. headers={'Accept-Encoding': 'br'},
  1161. proxies={'http': 'http://127.0.0.1'},
  1162. data=[b'123']
  1163. )
  1164. req_copy = req.copy()
  1165. assert req_copy is not req
  1166. assert req_copy.url == req.url
  1167. assert req_copy.headers == req.headers
  1168. assert req_copy.headers is not req.headers
  1169. assert req_copy.proxies == req.proxies
  1170. assert req_copy.proxies is not req.proxies
  1171. # Data is not able to be copied
  1172. assert req_copy.data == req.data
  1173. assert req_copy.data is req.data
  1174. # Shallow copy extensions
  1175. assert req_copy.extensions is not req.extensions
  1176. assert req_copy.extensions['cookiejar'] == req.extensions['cookiejar']
  1177. # Subclasses are copied by default
  1178. class AnotherRequest(Request):
  1179. pass
  1180. req = AnotherRequest(url='http://127.0.0.1')
  1181. assert isinstance(req.copy(), AnotherRequest)
  1182. def test_url(self):
  1183. req = Request(url='https://фtest.example.com/ some spaceв?ä=c',)
  1184. assert req.url == 'https://xn--test-z6d.example.com/%20some%20space%D0%B2?%C3%A4=c'
  1185. assert Request(url='//example.com').url == 'http://example.com'
  1186. with pytest.raises(TypeError):
  1187. Request(url='https://').url = None
  1188. class TestResponse:
  1189. @pytest.mark.parametrize('reason,status,expected', [
  1190. ('custom', 200, 'custom'),
  1191. (None, 404, 'Not Found'), # fallback status
  1192. ('', 403, 'Forbidden'),
  1193. (None, 999, None)
  1194. ])
  1195. def test_reason(self, reason, status, expected):
  1196. res = Response(io.BytesIO(b''), url='test://', headers={}, status=status, reason=reason)
  1197. assert res.reason == expected
  1198. def test_headers(self):
  1199. headers = Message()
  1200. headers.add_header('Test', 'test')
  1201. headers.add_header('Test', 'test2')
  1202. headers.add_header('content-encoding', 'br')
  1203. res = Response(io.BytesIO(b''), headers=headers, url='test://')
  1204. assert res.headers.get_all('test') == ['test', 'test2']
  1205. assert 'Content-Encoding' in res.headers
  1206. def test_get_header(self):
  1207. headers = Message()
  1208. headers.add_header('Set-Cookie', 'cookie1')
  1209. headers.add_header('Set-cookie', 'cookie2')
  1210. headers.add_header('Test', 'test')
  1211. headers.add_header('Test', 'test2')
  1212. res = Response(io.BytesIO(b''), headers=headers, url='test://')
  1213. assert res.get_header('test') == 'test, test2'
  1214. assert res.get_header('set-Cookie') == 'cookie1'
  1215. assert res.get_header('notexist', 'default') == 'default'
  1216. def test_compat(self):
  1217. res = Response(io.BytesIO(b''), url='test://', status=404, headers={'test': 'test'})
  1218. with warnings.catch_warnings():
  1219. warnings.simplefilter('ignore', category=DeprecationWarning)
  1220. assert res.code == res.getcode() == res.status
  1221. assert res.geturl() == res.url
  1222. assert res.info() is res.headers
  1223. assert res.getheader('test') == res.get_header('test')