123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381 |
- import abc
- import base64
- import contextlib
- import functools
- import json
- import os
- import random
- import ssl
- import threading
- from http.server import BaseHTTPRequestHandler
- from socketserver import ThreadingTCPServer
- import pytest
- from test.helper import http_server_port, verify_address_availability
- from test.test_networking import TEST_DIR
- from test.test_socks import IPv6ThreadingTCPServer
- from yt_dlp.dependencies import urllib3
- from yt_dlp.networking import Request
- from yt_dlp.networking.exceptions import HTTPError, ProxyError, SSLError
- class HTTPProxyAuthMixin:
- def proxy_auth_error(self):
- self.send_response(407)
- self.send_header('Proxy-Authenticate', 'Basic realm="test http proxy"')
- self.end_headers()
- return False
- def do_proxy_auth(self, username, password):
- if username is None and password is None:
- return True
- proxy_auth_header = self.headers.get('Proxy-Authorization', None)
- if proxy_auth_header is None:
- return self.proxy_auth_error()
- if not proxy_auth_header.startswith('Basic '):
- return self.proxy_auth_error()
- auth = proxy_auth_header[6:]
- try:
- auth_username, auth_password = base64.b64decode(auth).decode().split(':', 1)
- except Exception:
- return self.proxy_auth_error()
- if auth_username != (username or '') or auth_password != (password or ''):
- return self.proxy_auth_error()
- return True
- class HTTPProxyHandler(BaseHTTPRequestHandler, HTTPProxyAuthMixin):
- def __init__(self, *args, proxy_info=None, username=None, password=None, request_handler=None, **kwargs):
- self.username = username
- self.password = password
- self.proxy_info = proxy_info
- super().__init__(*args, **kwargs)
- def do_GET(self):
- if not self.do_proxy_auth(self.username, self.password):
- self.server.close_request(self.request)
- return
- if self.path.endswith('/proxy_info'):
- payload = json.dumps(self.proxy_info or {
- 'client_address': self.client_address,
- 'connect': False,
- 'connect_host': None,
- 'connect_port': None,
- 'headers': dict(self.headers),
- 'path': self.path,
- 'proxy': ':'.join(str(y) for y in self.connection.getsockname()),
- })
- self.send_response(200)
- self.send_header('Content-Type', 'application/json; charset=utf-8')
- self.send_header('Content-Length', str(len(payload)))
- self.end_headers()
- self.wfile.write(payload.encode())
- else:
- self.send_response(404)
- self.end_headers()
- self.server.close_request(self.request)
- if urllib3:
- import urllib3.util.ssltransport
- class SSLTransport(urllib3.util.ssltransport.SSLTransport):
- """
- Modified version of urllib3 SSLTransport to support server side SSL
- This allows us to chain multiple TLS connections.
- """
- def __init__(self, socket, ssl_context, server_hostname=None, suppress_ragged_eofs=True, server_side=False):
- self.incoming = ssl.MemoryBIO()
- self.outgoing = ssl.MemoryBIO()
- self.suppress_ragged_eofs = suppress_ragged_eofs
- self.socket = socket
- self.sslobj = ssl_context.wrap_bio(
- self.incoming,
- self.outgoing,
- server_hostname=server_hostname,
- server_side=server_side,
- )
- self._ssl_io_loop(self.sslobj.do_handshake)
- @property
- def _io_refs(self):
- return self.socket._io_refs
- @_io_refs.setter
- def _io_refs(self, value):
- self.socket._io_refs = value
- def shutdown(self, *args, **kwargs):
- self.socket.shutdown(*args, **kwargs)
- else:
- SSLTransport = None
- class HTTPSProxyHandler(HTTPProxyHandler):
- def __init__(self, request, *args, **kwargs):
- certfn = os.path.join(TEST_DIR, 'testcert.pem')
- sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
- sslctx.load_cert_chain(certfn, None)
- if isinstance(request, ssl.SSLSocket):
- request = SSLTransport(request, ssl_context=sslctx, server_side=True)
- else:
- request = sslctx.wrap_socket(request, server_side=True)
- super().__init__(request, *args, **kwargs)
- class HTTPConnectProxyHandler(BaseHTTPRequestHandler, HTTPProxyAuthMixin):
- protocol_version = 'HTTP/1.1'
- default_request_version = 'HTTP/1.1'
- def __init__(self, *args, username=None, password=None, request_handler=None, **kwargs):
- self.username = username
- self.password = password
- self.request_handler = request_handler
- super().__init__(*args, **kwargs)
- def do_CONNECT(self):
- if not self.do_proxy_auth(self.username, self.password):
- self.server.close_request(self.request)
- return
- self.send_response(200)
- self.end_headers()
- proxy_info = {
- 'client_address': self.client_address,
- 'connect': True,
- 'connect_host': self.path.split(':')[0],
- 'connect_port': int(self.path.split(':')[1]),
- 'headers': dict(self.headers),
- 'path': self.path,
- 'proxy': ':'.join(str(y) for y in self.connection.getsockname()),
- }
- self.request_handler(self.request, self.client_address, self.server, proxy_info=proxy_info)
- self.server.close_request(self.request)
- class HTTPSConnectProxyHandler(HTTPConnectProxyHandler):
- def __init__(self, request, *args, **kwargs):
- certfn = os.path.join(TEST_DIR, 'testcert.pem')
- sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
- sslctx.load_cert_chain(certfn, None)
- request = sslctx.wrap_socket(request, server_side=True)
- self._original_request = request
- super().__init__(request, *args, **kwargs)
- def do_CONNECT(self):
- super().do_CONNECT()
- self.server.close_request(self._original_request)
- @contextlib.contextmanager
- def proxy_server(proxy_server_class, request_handler, bind_ip=None, **proxy_server_kwargs):
- server = server_thread = None
- try:
- bind_address = bind_ip or '127.0.0.1'
- server_type = ThreadingTCPServer if '.' in bind_address else IPv6ThreadingTCPServer
- server = server_type(
- (bind_address, 0), functools.partial(proxy_server_class, request_handler=request_handler, **proxy_server_kwargs))
- server_port = http_server_port(server)
- server_thread = threading.Thread(target=server.serve_forever)
- server_thread.daemon = True
- server_thread.start()
- if '.' not in bind_address:
- yield f'[{bind_address}]:{server_port}'
- else:
- yield f'{bind_address}:{server_port}'
- finally:
- server.shutdown()
- server.server_close()
- server_thread.join(2.0)
- class HTTPProxyTestContext(abc.ABC):
- REQUEST_HANDLER_CLASS = None
- REQUEST_PROTO = None
- def http_server(self, server_class, *args, **kwargs):
- return proxy_server(server_class, self.REQUEST_HANDLER_CLASS, *args, **kwargs)
- @abc.abstractmethod
- def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs) -> dict:
- """return a dict of proxy_info"""
- class HTTPProxyHTTPTestContext(HTTPProxyTestContext):
- # Standard HTTP Proxy for http requests
- REQUEST_HANDLER_CLASS = HTTPProxyHandler
- REQUEST_PROTO = 'http'
- def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs):
- request = Request(f'http://{target_domain or "127.0.0.1"}:{target_port or "40000"}/proxy_info', **req_kwargs)
- handler.validate(request)
- return json.loads(handler.send(request).read().decode())
- class HTTPProxyHTTPSTestContext(HTTPProxyTestContext):
- # HTTP Connect proxy, for https requests
- REQUEST_HANDLER_CLASS = HTTPSProxyHandler
- REQUEST_PROTO = 'https'
- def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs):
- request = Request(f'https://{target_domain or "127.0.0.1"}:{target_port or "40000"}/proxy_info', **req_kwargs)
- handler.validate(request)
- return json.loads(handler.send(request).read().decode())
- CTX_MAP = {
- 'http': HTTPProxyHTTPTestContext,
- 'https': HTTPProxyHTTPSTestContext,
- }
- @pytest.fixture(scope='module')
- def ctx(request):
- return CTX_MAP[request.param]()
- @pytest.mark.parametrize(
- 'handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
- @pytest.mark.parametrize('ctx', ['http'], indirect=True) # pure http proxy can only support http
- class TestHTTPProxy:
- def test_http_no_auth(self, handler, ctx):
- with ctx.http_server(HTTPProxyHandler) as server_address:
- with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh:
- proxy_info = ctx.proxy_info_request(rh)
- assert proxy_info['proxy'] == server_address
- assert proxy_info['connect'] is False
- assert 'Proxy-Authorization' not in proxy_info['headers']
- def test_http_auth(self, handler, ctx):
- with ctx.http_server(HTTPProxyHandler, username='test', password='test') as server_address:
- with handler(proxies={ctx.REQUEST_PROTO: f'http://test:test@{server_address}'}) as rh:
- proxy_info = ctx.proxy_info_request(rh)
- assert proxy_info['proxy'] == server_address
- assert 'Proxy-Authorization' in proxy_info['headers']
- def test_http_bad_auth(self, handler, ctx):
- with ctx.http_server(HTTPProxyHandler, username='test', password='test') as server_address:
- with handler(proxies={ctx.REQUEST_PROTO: f'http://test:bad@{server_address}'}) as rh:
- with pytest.raises(HTTPError) as exc_info:
- ctx.proxy_info_request(rh)
- assert exc_info.value.response.status == 407
- exc_info.value.response.close()
- def test_http_source_address(self, handler, ctx):
- with ctx.http_server(HTTPProxyHandler) as server_address:
- source_address = f'127.0.0.{random.randint(5, 255)}'
- verify_address_availability(source_address)
- with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'},
- source_address=source_address) as rh:
- proxy_info = ctx.proxy_info_request(rh)
- assert proxy_info['proxy'] == server_address
- assert proxy_info['client_address'][0] == source_address
- @pytest.mark.skip_handler('Urllib', 'urllib does not support https proxies')
- def test_https(self, handler, ctx):
- with ctx.http_server(HTTPSProxyHandler) as server_address:
- with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
- proxy_info = ctx.proxy_info_request(rh)
- assert proxy_info['proxy'] == server_address
- assert proxy_info['connect'] is False
- assert 'Proxy-Authorization' not in proxy_info['headers']
- @pytest.mark.skip_handler('Urllib', 'urllib does not support https proxies')
- def test_https_verify_failed(self, handler, ctx):
- with ctx.http_server(HTTPSProxyHandler) as server_address:
- with handler(verify=True, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
- # Accept SSLError as may not be feasible to tell if it is proxy or request error.
- # note: if request proto also does ssl verification, this may also be the error of the request.
- # Until we can support passing custom cacerts to handlers, we cannot properly test this for all cases.
- with pytest.raises((ProxyError, SSLError)):
- ctx.proxy_info_request(rh)
- def test_http_with_idn(self, handler, ctx):
- with ctx.http_server(HTTPProxyHandler) as server_address:
- with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh:
- proxy_info = ctx.proxy_info_request(rh, target_domain='中文.tw')
- assert proxy_info['proxy'] == server_address
- assert proxy_info['path'].startswith('http://xn--fiq228c.tw')
- assert proxy_info['headers']['Host'].split(':', 1)[0] == 'xn--fiq228c.tw'
- @pytest.mark.parametrize(
- 'handler,ctx', [
- ('Requests', 'https'),
- ('CurlCFFI', 'https'),
- ], indirect=True)
- class TestHTTPConnectProxy:
- def test_http_connect_no_auth(self, handler, ctx):
- with ctx.http_server(HTTPConnectProxyHandler) as server_address:
- with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh:
- proxy_info = ctx.proxy_info_request(rh)
- assert proxy_info['proxy'] == server_address
- assert proxy_info['connect'] is True
- assert 'Proxy-Authorization' not in proxy_info['headers']
- def test_http_connect_auth(self, handler, ctx):
- with ctx.http_server(HTTPConnectProxyHandler, username='test', password='test') as server_address:
- with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://test:test@{server_address}'}) as rh:
- proxy_info = ctx.proxy_info_request(rh)
- assert proxy_info['proxy'] == server_address
- assert 'Proxy-Authorization' in proxy_info['headers']
- @pytest.mark.skip_handler(
- 'Requests',
- 'bug in urllib3 causes unclosed socket: https://github.com/urllib3/urllib3/issues/3374',
- )
- def test_http_connect_bad_auth(self, handler, ctx):
- with ctx.http_server(HTTPConnectProxyHandler, username='test', password='test') as server_address:
- with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://test:bad@{server_address}'}) as rh:
- with pytest.raises(ProxyError):
- ctx.proxy_info_request(rh)
- def test_http_connect_source_address(self, handler, ctx):
- with ctx.http_server(HTTPConnectProxyHandler) as server_address:
- source_address = f'127.0.0.{random.randint(5, 255)}'
- verify_address_availability(source_address)
- with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'},
- source_address=source_address,
- verify=False) as rh:
- proxy_info = ctx.proxy_info_request(rh)
- assert proxy_info['proxy'] == server_address
- assert proxy_info['client_address'][0] == source_address
- @pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test')
- def test_https_connect_proxy(self, handler, ctx):
- with ctx.http_server(HTTPSConnectProxyHandler) as server_address:
- with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
- proxy_info = ctx.proxy_info_request(rh)
- assert proxy_info['proxy'] == server_address
- assert proxy_info['connect'] is True
- assert 'Proxy-Authorization' not in proxy_info['headers']
- @pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test')
- def test_https_connect_verify_failed(self, handler, ctx):
- with ctx.http_server(HTTPSConnectProxyHandler) as server_address:
- with handler(verify=True, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
- # Accept SSLError as may not be feasible to tell if it is proxy or request error.
- # note: if request proto also does ssl verification, this may also be the error of the request.
- # Until we can support passing custom cacerts to handlers, we cannot properly test this for all cases.
- with pytest.raises((ProxyError, SSLError)):
- ctx.proxy_info_request(rh)
- @pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test')
- def test_https_connect_proxy_auth(self, handler, ctx):
- with ctx.http_server(HTTPSConnectProxyHandler, username='test', password='test') as server_address:
- with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://test:test@{server_address}'}) as rh:
- proxy_info = ctx.proxy_info_request(rh)
- assert proxy_info['proxy'] == server_address
- assert 'Proxy-Authorization' in proxy_info['headers']
|