123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539 |
- ########################################################################
- # Searx-Qt - Lightweight desktop application for Searx.
- # Copyright (C) 2020-2022 CYBERDEViL
- #
- # This file is part of Searx-Qt.
- #
- # Searx-Qt is free software: you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- #
- # Searx-Qt is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program. If not, see <https://www.gnu.org/licenses/>.
- #
- ########################################################################
- import requests
- from requests.exceptions import (
- HTTPError,
- ConnectionError,
- Timeout,
- ProxyError,
- SSLError,
- RequestException
- )
- import json
- from jsonschema import validate as JsonValidate
- from jsonschema.exceptions import ValidationError, SchemaError
- import random
- from searxqt.core import log
- HAVE_SOCKS = False
- try:
- import socks
- HAVE_SOCKS = True
- del socks
- except ImportError:
- log.debug("pysocks not installed! No socks proxy support.")
- class ErrorType:
- Success = 0
- HttpError = 1
- ConnectionError = 2
- Timeout = 3
- WrongStatus = 4
- DecodeError = 5
- NoResults = 6
- ProxyError = 7
- SSLError = 8
- InvalidSchema = 9
- ContentSizeExceeded = 10
- Other = 11
- ErrorTypeStr = {
- ErrorType.Success: "Success",
- ErrorType.HttpError: "HttpError",
- ErrorType.ConnectionError: "ConnectionError",
- ErrorType.Timeout: "Timeout",
- ErrorType.WrongStatus: "WrongStatus",
- ErrorType.DecodeError: "DecodeError",
- ErrorType.NoResults: "NoResults",
- ErrorType.ProxyError: "ProxyError",
- ErrorType.SSLError: "SSLError",
- ErrorType.InvalidSchema: "InvalidSchema",
- ErrorType.ContentSizeExceeded: "ContentSizeExceeded",
- ErrorType.Other: "Other"
- }
- # Global json schema container so we won't have to read a json schema file
- # from disk everytime we need to verify some json data.
- Schemas = {}
- # Loads an json schema into the global 'Schemas' container.
- # @param key: A name to store the json schema to, existing keys will be
- # overwritten!
- # @type key: string
- # @param filepath: Path where the json schema file is located (including
- # filename)
- # @type filepath: string
- def json_schema_load(key, filepath):
- # may raise an json.JSONDecodeError or UnicodeDecodeError when the schema
- # json is invalid, or an OSError when it cannot access the given filepath.
- with open(filepath, 'r') as f:
- data = json.load(f)
- # An exception should have been raised when open() or json.load() has
- # failed, so at this point the schema json looks valid.
- # @note: Existing keys will be overwritten/reloaded.
- Schemas.update({key: data})
- class Result:
- def __init__(self, url, response, err="", errType=ErrorType.Success,
- acceptCodes=None):
- self._url = url # url used for request.
- self._response = response
- self._err = err
- self._errType = errType
- acceptCodes = acceptCodes
- if not acceptCodes:
- acceptCodes = [200]
- if errType == ErrorType.Success and response.status_code not in acceptCodes:
- self._errType = ErrorType.WrongStatus
- self._err = f"WrongStatus: {self._response.status_code}"
- else:
- self.verifyFurther()
- def __bool__(self):
- return not self.failed()
- def url(self):
- return self._url
- def errorType(self): return self._errType
- def error(self): return self._err
- def content(self):
- """ In case json.loads failed and we want to debug.
- """
- if self._response is None:
- return b''
- return self._response.content
- def text(self):
- if self._response is None:
- return ''
- return self._response.text
- def failed(self):
- if self._errType is not ErrorType.Success:
- return True
- return False
- def statusCode(self):
- if self._response is not None:
- return self._response.status_code
- return 0
- def verifyFurther(self):
- pass
- # JsonResult should not be used directly, it should be subclassed. The subclass
- # is responsible for setting JsonResult.Schema
- class JsonResult(Result):
- Schema = {}
- def __init__(self, url, response, err="", errType=ErrorType.Success,
- acceptCodes=None):
- Result.__init__(
- self,
- url,
- response,
- err=err,
- errType=errType,
- acceptCodes=acceptCodes
- )
- def verifyFurther(self):
- try:
- self.json()
- except json.JSONDecodeError as err:
- self._errType = ErrorType.DecodeError
- self._err = f"DecodeError: `{err}`"
- except UnicodeDecodeError as err:
- # This could happen when the response encoding isn't plain ? (gzip)
- # Or we just have malformed data/crap.
- self._errType = ErrorType.DecodeError
- self._err = f"DecodeError: `{err}`"
- try:
- JsonValidate(instance=self.json(), schema=self.Schema)
- except ValidationError as err:
- self._errType = ErrorType.InvalidSchema
- self._err = f"InvalidSchema: `{err}`"
- except SchemaError as err:
- self._errType = ErrorType.InvalidSchema
- self._err = f"InvalidSchema: `{err}`"
- def json(self):
- if self.errorType() != ErrorType.Success:
- return {}
- return json.loads(self._response.content)
- class ProxyProtocol:
- HTTP = 1
- SOCKS4 = 2
- SOCKS5 = 4
- ProxyProtocolString = {
- 0: "none",
- 1: "http",
- 2: "socks4",
- 4: "socks5"
- }
- class RequestSettings:
- def __init__(self):
- # Settings
- self._useragents = ["searx-qt"]
- self._randomUserAgent = False # Use a random useragent for each
- # request.
- self._verifySSL = True # Verify SSL certificates (HTTPS).
- self._timeout = 10 # Connection timeout in seconds.
- self._maxSize = 10 * 1024 * 1024 # Maximum content receive size in KiB.
- self._chunkSize = 500 * 1024 # Receive chunk size.
- self._proxyEnabled = False # Enable the use of a proxy.
- self._proxyDNS = True # Only available for socks
- self._proxyHost = "" # Proxy string user:pass@host:port
- self._proxyProtocol = 0 # Proxy protocol, example:
- # ProxyProtocol.SOCKS5H
- self._extraHeaders = {} # Extra header values
- # Compiled settings
- self._headers = {} # headers kwarg (will be included in self._kwargs)
- # kwargs passed to requests.get or requests.post.
- # This will be compiled each time a setting has
- # changed, so it won't have to be compiled each
- # time a request is made.
- self._kwargs = {
- "verify": True,
- "timeout": 10,
- "headers": self._headers
- }
- self._compileKwargs()
- def getData(self):
- return {
- "useragents": self.useragents,
- "randomUserAgent": self.randomUserAgent,
- "verifySSL": self.verifySSL,
- "timeout": self.timeout,
- "maxSize": self.maxSize,
- "chunkSize": self.chunkSize,
- "proxyEnabled": self.proxyEnabled,
- "proxyDNS": self.proxyDNS,
- "proxyHost": self.proxyHost,
- "proxyProtocol": self.proxyProtocol,
- "extraHeaders": self._extraHeaders
- }
- def setData(self, data):
- self.useragents.clear()
- for useragent in data.get("useragents", []):
- self.useragents.append(useragent)
- self.randomUserAgent = data.get("randomUserAgent", False)
- self.verifySSL = data.get("verifySSL", True)
- self.timeout = data.get("timeout", 10)
- self.maxSize = data.get("maxSize", 10 * 1024 * 1024)
- self.chunkSize = data.get("chunkSize", 500 * 1024)
- self.proxyEnabled = data.get("proxyEnabled", False)
- self.proxyDNS = data.get("proxyDNS", True)
- self.proxyHost = data.get("proxyHost", "")
- self.proxyProtocol = data.get("proxyProtocol", 0)
- self._extraHeaders = data.get("extraHeaders", {})
- self.updateRequestKwargs()
- """ Settings """
- @property
- def extraHeaders(self):
- return self._extraHeaders
- @property
- def verifySSL(self):
- return self._verifySSL
- @verifySSL.setter
- def verifySSL(self, state):
- self._verifySSL = state
- @property
- def timeout(self):
- return self._timeout
- @timeout.setter
- def timeout(self, state):
- self._timeout = state
- @property
- def maxSize(self):
- return self._maxSize
- @maxSize.setter
- def maxSize(self, size):
- self._maxSize = size
- @property
- def chunkSize(self):
- return self._chunkSize
- @chunkSize.setter
- def chunkSize(self, size):
- self._chunkSize = size
- @property
- def proxyEnabled(self):
- return self._proxyEnabled
- @proxyEnabled.setter
- def proxyEnabled(self, state):
- self._proxyEnabled = state
- @property
- def proxyHost(self):
- return self._proxyHost
- @proxyHost.setter
- def proxyHost(self, host):
- self._proxyHost = host
- @property
- def proxyProtocol(self):
- return self._proxyProtocol
- # @type protocol: class ProxyProtocol
- @proxyProtocol.setter
- def proxyProtocol(self, protocol):
- self._proxyProtocol = protocol
- @property
- def proxyDNS(self):
- return self._proxyDNS
- @proxyDNS.setter
- def proxyDNS(self, state):
- self._proxyDNS = state
- # Use this to add/remove/clear useragents, it returns a list with strings
- @property
- def useragents(self):
- return self._useragents
- @property
- def randomUserAgent(self):
- return self._randomUserAgent
- @randomUserAgent.setter
- def randomUserAgent(self, state):
- self._randomUserAgent = state
- """ Make python-requests compatible """
- @property
- def requestsKwargs(self):
- return self._kwargs
- # Update requests kwargs (call this each time after you changed one or
- # multiple settings)
- def updateRequestKwargs(self):
- self._compileKwargs()
- # Get requests kwargs for a new request.
- def kwargsForNewRequest(self):
- useragent = self._getUseragent()
- if useragent:
- self._headers.update({"User-Agent": useragent})
- elif "User-Agent" in self._headers:
- del self._headers["User-Agent"]
- return self._kwargs
- def _compileProxies(self):
- dnsStr = ""
- if self.proxyProtocol in [ProxyProtocol.SOCKS4, ProxyProtocol.SOCKS5]:
- if self.proxyDNS:
- dnsStr = "h"
- protoStr = ProxyProtocolString[self.proxyProtocol]
- proxyStr = f"{protoStr}{dnsStr}://{self.proxyHost}"
- return {
- "http": proxyStr,
- "https": proxyStr
- }
- def _compileKwargs(self):
- kwargs = {
- "verify": self.verifySSL,
- "timeout": self.timeout,
- "headers": self._headers
- }
- self._headers.clear()
- self._headers.update(self.extraHeaders)
- if self._proxyEnabled:
- kwargs.update({"proxies": self._compileProxies()})
- self._kwargs.clear()
- self._kwargs.update(kwargs)
- def _getUseragent(self):
- if not self._useragents:
- return ""
- # Return first useragent string
- if len(self._useragents) == 1 or not self._randomUserAgent:
- return self._useragents[0]
- # Return random useragent
- return random.choice(self._useragents)
- class RequestsHandler:
- def __init__(self):
- self._settings = RequestSettings()
- @property
- def settings(self):
- return self._settings
- def failSafeRequestFactory(func):
- def failSafeRequest(self, url, data=None, ResultType=None):
- response = None
- err = ""
- errType = ErrorType.Success
- if not ResultType:
- # When 'ResultType' isn't specified, set 'JsonResult' as
- # default.
- ResultType = JsonResult
- log.debug("<NEW Request>", self)
- log.debug("# ------------------------", self)
- log.debug(f"# ResultType : {ResultType}", self)
- requestKwargs = self._settings.kwargsForNewRequest()
- """
- Request exceptions
- https://docs.python-requests.org/en/master/_modules/requests/exceptions/
- """
- try:
- response = func(self, url, data=data, **requestKwargs)
- chunkSize = self.settings.chunkSize
- maxSize = self.settings.maxSize
- curSize = 0
- headerContentSize = response.headers.get("Content-Length", None)
- if headerContentSize is not None:
- if int(headerContentSize) > maxSize:
- e = f"Maximum content size limit of '{maxSize}' bytes exceeded. (1)"
- log.debug(f"Request failed! ContentSizeExceeded: {e}", self)
- errType = ErrorType.ContentSizeExceeded
- err = e
- if errType == ErrorType.Success:
- response._content = b""
- for chunk in response.iter_content(chunkSize):
- # The server might have its own chunk size that is
- # smaller then ours, so thats why we add the lenght
- # of the received content instead of adding our
- # chunkSize.
- curSize += len(chunk)
- if curSize > maxSize:
- e = f"Maximum content size limit of '{maxSize}' bytes exceeded. (2)"
- log.debug(f"Request failed! ContentSizeExceeded: {e}", self)
- errType = ErrorType.ContentSizeExceeded
- err = e
- break
- response._content += chunk
- response.close()
- except HTTPError as e:
- # HTTPError is subclass of RequestException
- log.debug(f"Request failed! HTTPError: {e}", self)
- errType = ErrorType.HttpError
- err = str(e)
- except Timeout as e:
- # Timeout is subclass of RequestException
- log.debug(f"Request failed! Timeout: {e}", self)
- errType = ErrorType.Timeout
- err = str(e)
- except ProxyError as e:
- # ProxyError is subclass of ConnectionError
- log.debug(f"Request failed! ProxyError: {e}", self)
- errType = ErrorType.ProxyError
- err = str(e)
- except SSLError as e:
- # SSLError is subclass of ConnectionError
- log.debug(f"Request failed! SSLError: {e}", self)
- errType = ErrorType.SSLError
- err = str(e)
- except ConnectionError as e:
- # ConnectionError is subclass of RequestException
- log.debug(f"Request failed! ConnectionError: {e}", self)
- errType = ErrorType.ConnectionError
- err = str(e)
- except RequestException as e:
- # This should catch all other
- log.debug(f"Request failed! RequestException: {e}", self)
- errType = ErrorType.Other
- err = str(e)
- log.debug("# ------------------------\n", self)
- return ResultType(url, response, err=err, errType=errType)
- return failSafeRequest
- @failSafeRequestFactory
- def get(self, url, data=None, ResultType=None, **settingsKwargs):
- log.debug("# Type : GET", self)
- log.debug(f"# URL : {url}", self)
- log.debug(f"# Data : {data}", self)
- log.debug(f"# Kwargs : {settingsKwargs}", self)
- return requests.get(url, data=data, stream=True, **settingsKwargs)
- @failSafeRequestFactory
- def post(self, url, data=None, ResultType=None, **settingsKwargs):
- log.debug("# Type : POST", self)
- log.debug(f"# URL : {url}", self)
- log.debug(f"# Data : {data}", self)
- log.debug(f"# Kwargs : {settingsKwargs}", self)
- return requests.post(url, data=data, stream=True, **settingsKwargs)
|