test_http_proxy.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. import abc
  2. import base64
  3. import contextlib
  4. import functools
  5. import json
  6. import os
  7. import random
  8. import ssl
  9. import threading
  10. from http.server import BaseHTTPRequestHandler
  11. from socketserver import ThreadingTCPServer
  12. import pytest
  13. from test.helper import http_server_port, verify_address_availability
  14. from test.test_networking import TEST_DIR
  15. from test.test_socks import IPv6ThreadingTCPServer
  16. from yt_dlp.dependencies import urllib3
  17. from yt_dlp.networking import Request
  18. from yt_dlp.networking.exceptions import HTTPError, ProxyError, SSLError
  19. class HTTPProxyAuthMixin:
  20. def proxy_auth_error(self):
  21. self.send_response(407)
  22. self.send_header('Proxy-Authenticate', 'Basic realm="test http proxy"')
  23. self.end_headers()
  24. return False
  25. def do_proxy_auth(self, username, password):
  26. if username is None and password is None:
  27. return True
  28. proxy_auth_header = self.headers.get('Proxy-Authorization', None)
  29. if proxy_auth_header is None:
  30. return self.proxy_auth_error()
  31. if not proxy_auth_header.startswith('Basic '):
  32. return self.proxy_auth_error()
  33. auth = proxy_auth_header[6:]
  34. try:
  35. auth_username, auth_password = base64.b64decode(auth).decode().split(':', 1)
  36. except Exception:
  37. return self.proxy_auth_error()
  38. if auth_username != (username or '') or auth_password != (password or ''):
  39. return self.proxy_auth_error()
  40. return True
  41. class HTTPProxyHandler(BaseHTTPRequestHandler, HTTPProxyAuthMixin):
  42. def __init__(self, *args, proxy_info=None, username=None, password=None, request_handler=None, **kwargs):
  43. self.username = username
  44. self.password = password
  45. self.proxy_info = proxy_info
  46. super().__init__(*args, **kwargs)
  47. def do_GET(self):
  48. if not self.do_proxy_auth(self.username, self.password):
  49. self.server.close_request(self.request)
  50. return
  51. if self.path.endswith('/proxy_info'):
  52. payload = json.dumps(self.proxy_info or {
  53. 'client_address': self.client_address,
  54. 'connect': False,
  55. 'connect_host': None,
  56. 'connect_port': None,
  57. 'headers': dict(self.headers),
  58. 'path': self.path,
  59. 'proxy': ':'.join(str(y) for y in self.connection.getsockname()),
  60. })
  61. self.send_response(200)
  62. self.send_header('Content-Type', 'application/json; charset=utf-8')
  63. self.send_header('Content-Length', str(len(payload)))
  64. self.end_headers()
  65. self.wfile.write(payload.encode())
  66. else:
  67. self.send_response(404)
  68. self.end_headers()
  69. self.server.close_request(self.request)
  70. if urllib3:
  71. import urllib3.util.ssltransport
  72. class SSLTransport(urllib3.util.ssltransport.SSLTransport):
  73. """
  74. Modified version of urllib3 SSLTransport to support server side SSL
  75. This allows us to chain multiple TLS connections.
  76. """
  77. def __init__(self, socket, ssl_context, server_hostname=None, suppress_ragged_eofs=True, server_side=False):
  78. self.incoming = ssl.MemoryBIO()
  79. self.outgoing = ssl.MemoryBIO()
  80. self.suppress_ragged_eofs = suppress_ragged_eofs
  81. self.socket = socket
  82. self.sslobj = ssl_context.wrap_bio(
  83. self.incoming,
  84. self.outgoing,
  85. server_hostname=server_hostname,
  86. server_side=server_side,
  87. )
  88. self._ssl_io_loop(self.sslobj.do_handshake)
  89. @property
  90. def _io_refs(self):
  91. return self.socket._io_refs
  92. @_io_refs.setter
  93. def _io_refs(self, value):
  94. self.socket._io_refs = value
  95. def shutdown(self, *args, **kwargs):
  96. self.socket.shutdown(*args, **kwargs)
  97. else:
  98. SSLTransport = None
  99. class HTTPSProxyHandler(HTTPProxyHandler):
  100. def __init__(self, request, *args, **kwargs):
  101. certfn = os.path.join(TEST_DIR, 'testcert.pem')
  102. sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
  103. sslctx.load_cert_chain(certfn, None)
  104. if isinstance(request, ssl.SSLSocket):
  105. request = SSLTransport(request, ssl_context=sslctx, server_side=True)
  106. else:
  107. request = sslctx.wrap_socket(request, server_side=True)
  108. super().__init__(request, *args, **kwargs)
  109. class HTTPConnectProxyHandler(BaseHTTPRequestHandler, HTTPProxyAuthMixin):
  110. protocol_version = 'HTTP/1.1'
  111. default_request_version = 'HTTP/1.1'
  112. def __init__(self, *args, username=None, password=None, request_handler=None, **kwargs):
  113. self.username = username
  114. self.password = password
  115. self.request_handler = request_handler
  116. super().__init__(*args, **kwargs)
  117. def do_CONNECT(self):
  118. if not self.do_proxy_auth(self.username, self.password):
  119. self.server.close_request(self.request)
  120. return
  121. self.send_response(200)
  122. self.end_headers()
  123. proxy_info = {
  124. 'client_address': self.client_address,
  125. 'connect': True,
  126. 'connect_host': self.path.split(':')[0],
  127. 'connect_port': int(self.path.split(':')[1]),
  128. 'headers': dict(self.headers),
  129. 'path': self.path,
  130. 'proxy': ':'.join(str(y) for y in self.connection.getsockname()),
  131. }
  132. self.request_handler(self.request, self.client_address, self.server, proxy_info=proxy_info)
  133. self.server.close_request(self.request)
  134. class HTTPSConnectProxyHandler(HTTPConnectProxyHandler):
  135. def __init__(self, request, *args, **kwargs):
  136. certfn = os.path.join(TEST_DIR, 'testcert.pem')
  137. sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
  138. sslctx.load_cert_chain(certfn, None)
  139. request = sslctx.wrap_socket(request, server_side=True)
  140. self._original_request = request
  141. super().__init__(request, *args, **kwargs)
  142. def do_CONNECT(self):
  143. super().do_CONNECT()
  144. self.server.close_request(self._original_request)
  145. @contextlib.contextmanager
  146. def proxy_server(proxy_server_class, request_handler, bind_ip=None, **proxy_server_kwargs):
  147. server = server_thread = None
  148. try:
  149. bind_address = bind_ip or '127.0.0.1'
  150. server_type = ThreadingTCPServer if '.' in bind_address else IPv6ThreadingTCPServer
  151. server = server_type(
  152. (bind_address, 0), functools.partial(proxy_server_class, request_handler=request_handler, **proxy_server_kwargs))
  153. server_port = http_server_port(server)
  154. server_thread = threading.Thread(target=server.serve_forever)
  155. server_thread.daemon = True
  156. server_thread.start()
  157. if '.' not in bind_address:
  158. yield f'[{bind_address}]:{server_port}'
  159. else:
  160. yield f'{bind_address}:{server_port}'
  161. finally:
  162. server.shutdown()
  163. server.server_close()
  164. server_thread.join(2.0)
  165. class HTTPProxyTestContext(abc.ABC):
  166. REQUEST_HANDLER_CLASS = None
  167. REQUEST_PROTO = None
  168. def http_server(self, server_class, *args, **kwargs):
  169. return proxy_server(server_class, self.REQUEST_HANDLER_CLASS, *args, **kwargs)
  170. @abc.abstractmethod
  171. def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs) -> dict:
  172. """return a dict of proxy_info"""
  173. class HTTPProxyHTTPTestContext(HTTPProxyTestContext):
  174. # Standard HTTP Proxy for http requests
  175. REQUEST_HANDLER_CLASS = HTTPProxyHandler
  176. REQUEST_PROTO = 'http'
  177. def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs):
  178. request = Request(f'http://{target_domain or "127.0.0.1"}:{target_port or "40000"}/proxy_info', **req_kwargs)
  179. handler.validate(request)
  180. return json.loads(handler.send(request).read().decode())
  181. class HTTPProxyHTTPSTestContext(HTTPProxyTestContext):
  182. # HTTP Connect proxy, for https requests
  183. REQUEST_HANDLER_CLASS = HTTPSProxyHandler
  184. REQUEST_PROTO = 'https'
  185. def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs):
  186. request = Request(f'https://{target_domain or "127.0.0.1"}:{target_port or "40000"}/proxy_info', **req_kwargs)
  187. handler.validate(request)
  188. return json.loads(handler.send(request).read().decode())
  189. CTX_MAP = {
  190. 'http': HTTPProxyHTTPTestContext,
  191. 'https': HTTPProxyHTTPSTestContext,
  192. }
  193. @pytest.fixture(scope='module')
  194. def ctx(request):
  195. return CTX_MAP[request.param]()
  196. @pytest.mark.parametrize(
  197. 'handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  198. @pytest.mark.parametrize('ctx', ['http'], indirect=True) # pure http proxy can only support http
  199. class TestHTTPProxy:
  200. def test_http_no_auth(self, handler, ctx):
  201. with ctx.http_server(HTTPProxyHandler) as server_address:
  202. with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh:
  203. proxy_info = ctx.proxy_info_request(rh)
  204. assert proxy_info['proxy'] == server_address
  205. assert proxy_info['connect'] is False
  206. assert 'Proxy-Authorization' not in proxy_info['headers']
  207. def test_http_auth(self, handler, ctx):
  208. with ctx.http_server(HTTPProxyHandler, username='test', password='test') as server_address:
  209. with handler(proxies={ctx.REQUEST_PROTO: f'http://test:test@{server_address}'}) as rh:
  210. proxy_info = ctx.proxy_info_request(rh)
  211. assert proxy_info['proxy'] == server_address
  212. assert 'Proxy-Authorization' in proxy_info['headers']
  213. def test_http_bad_auth(self, handler, ctx):
  214. with ctx.http_server(HTTPProxyHandler, username='test', password='test') as server_address:
  215. with handler(proxies={ctx.REQUEST_PROTO: f'http://test:bad@{server_address}'}) as rh:
  216. with pytest.raises(HTTPError) as exc_info:
  217. ctx.proxy_info_request(rh)
  218. assert exc_info.value.response.status == 407
  219. exc_info.value.response.close()
  220. def test_http_source_address(self, handler, ctx):
  221. with ctx.http_server(HTTPProxyHandler) as server_address:
  222. source_address = f'127.0.0.{random.randint(5, 255)}'
  223. verify_address_availability(source_address)
  224. with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'},
  225. source_address=source_address) as rh:
  226. proxy_info = ctx.proxy_info_request(rh)
  227. assert proxy_info['proxy'] == server_address
  228. assert proxy_info['client_address'][0] == source_address
  229. @pytest.mark.skip_handler('Urllib', 'urllib does not support https proxies')
  230. def test_https(self, handler, ctx):
  231. with ctx.http_server(HTTPSProxyHandler) as server_address:
  232. with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
  233. proxy_info = ctx.proxy_info_request(rh)
  234. assert proxy_info['proxy'] == server_address
  235. assert proxy_info['connect'] is False
  236. assert 'Proxy-Authorization' not in proxy_info['headers']
  237. @pytest.mark.skip_handler('Urllib', 'urllib does not support https proxies')
  238. def test_https_verify_failed(self, handler, ctx):
  239. with ctx.http_server(HTTPSProxyHandler) as server_address:
  240. with handler(verify=True, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
  241. # Accept SSLError as may not be feasible to tell if it is proxy or request error.
  242. # note: if request proto also does ssl verification, this may also be the error of the request.
  243. # Until we can support passing custom cacerts to handlers, we cannot properly test this for all cases.
  244. with pytest.raises((ProxyError, SSLError)):
  245. ctx.proxy_info_request(rh)
  246. def test_http_with_idn(self, handler, ctx):
  247. with ctx.http_server(HTTPProxyHandler) as server_address:
  248. with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh:
  249. proxy_info = ctx.proxy_info_request(rh, target_domain='中文.tw')
  250. assert proxy_info['proxy'] == server_address
  251. assert proxy_info['path'].startswith('http://xn--fiq228c.tw')
  252. assert proxy_info['headers']['Host'].split(':', 1)[0] == 'xn--fiq228c.tw'
  253. @pytest.mark.parametrize(
  254. 'handler,ctx', [
  255. ('Requests', 'https'),
  256. ('CurlCFFI', 'https'),
  257. ], indirect=True)
  258. class TestHTTPConnectProxy:
  259. def test_http_connect_no_auth(self, handler, ctx):
  260. with ctx.http_server(HTTPConnectProxyHandler) as server_address:
  261. with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh:
  262. proxy_info = ctx.proxy_info_request(rh)
  263. assert proxy_info['proxy'] == server_address
  264. assert proxy_info['connect'] is True
  265. assert 'Proxy-Authorization' not in proxy_info['headers']
  266. def test_http_connect_auth(self, handler, ctx):
  267. with ctx.http_server(HTTPConnectProxyHandler, username='test', password='test') as server_address:
  268. with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://test:test@{server_address}'}) as rh:
  269. proxy_info = ctx.proxy_info_request(rh)
  270. assert proxy_info['proxy'] == server_address
  271. assert 'Proxy-Authorization' in proxy_info['headers']
  272. @pytest.mark.skip_handler(
  273. 'Requests',
  274. 'bug in urllib3 causes unclosed socket: https://github.com/urllib3/urllib3/issues/3374',
  275. )
  276. def test_http_connect_bad_auth(self, handler, ctx):
  277. with ctx.http_server(HTTPConnectProxyHandler, username='test', password='test') as server_address:
  278. with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://test:bad@{server_address}'}) as rh:
  279. with pytest.raises(ProxyError):
  280. ctx.proxy_info_request(rh)
  281. def test_http_connect_source_address(self, handler, ctx):
  282. with ctx.http_server(HTTPConnectProxyHandler) as server_address:
  283. source_address = f'127.0.0.{random.randint(5, 255)}'
  284. verify_address_availability(source_address)
  285. with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'},
  286. source_address=source_address,
  287. verify=False) as rh:
  288. proxy_info = ctx.proxy_info_request(rh)
  289. assert proxy_info['proxy'] == server_address
  290. assert proxy_info['client_address'][0] == source_address
  291. @pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test')
  292. def test_https_connect_proxy(self, handler, ctx):
  293. with ctx.http_server(HTTPSConnectProxyHandler) as server_address:
  294. with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
  295. proxy_info = ctx.proxy_info_request(rh)
  296. assert proxy_info['proxy'] == server_address
  297. assert proxy_info['connect'] is True
  298. assert 'Proxy-Authorization' not in proxy_info['headers']
  299. @pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test')
  300. def test_https_connect_verify_failed(self, handler, ctx):
  301. with ctx.http_server(HTTPSConnectProxyHandler) as server_address:
  302. with handler(verify=True, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
  303. # Accept SSLError as may not be feasible to tell if it is proxy or request error.
  304. # note: if request proto also does ssl verification, this may also be the error of the request.
  305. # Until we can support passing custom cacerts to handlers, we cannot properly test this for all cases.
  306. with pytest.raises((ProxyError, SSLError)):
  307. ctx.proxy_info_request(rh)
  308. @pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test')
  309. def test_https_connect_proxy_auth(self, handler, ctx):
  310. with ctx.http_server(HTTPSConnectProxyHandler, username='test', password='test') as server_address:
  311. with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://test:test@{server_address}'}) as rh:
  312. proxy_info = ctx.proxy_info_request(rh)
  313. assert proxy_info['proxy'] == server_address
  314. assert 'Proxy-Authorization' in proxy_info['headers']