requests.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539
  1. ########################################################################
  2. # Searx-Qt - Lightweight desktop application for Searx.
  3. # Copyright (C) 2020-2022 CYBERDEViL
  4. #
  5. # This file is part of Searx-Qt.
  6. #
  7. # Searx-Qt is free software: you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, either version 3 of the License, or
  10. # (at your option) any later version.
  11. #
  12. # Searx-Qt is distributed in the hope that it will be useful,
  13. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. # GNU General Public License for more details.
  16. #
  17. # You should have received a copy of the GNU General Public License
  18. # along with this program. If not, see <https://www.gnu.org/licenses/>.
  19. #
  20. ########################################################################
  21. import requests
  22. from requests.exceptions import (
  23. HTTPError,
  24. ConnectionError,
  25. Timeout,
  26. ProxyError,
  27. SSLError,
  28. RequestException
  29. )
  30. import json
  31. from jsonschema import validate as JsonValidate
  32. from jsonschema.exceptions import ValidationError, SchemaError
  33. import random
  34. from searxqt.core import log
  35. HAVE_SOCKS = False
  36. try:
  37. import socks
  38. HAVE_SOCKS = True
  39. del socks
  40. except ImportError:
  41. log.debug("pysocks not installed! No socks proxy support.")
  42. class ErrorType:
  43. Success = 0
  44. HttpError = 1
  45. ConnectionError = 2
  46. Timeout = 3
  47. WrongStatus = 4
  48. DecodeError = 5
  49. NoResults = 6
  50. ProxyError = 7
  51. SSLError = 8
  52. InvalidSchema = 9
  53. ContentSizeExceeded = 10
  54. Other = 11
  55. ErrorTypeStr = {
  56. ErrorType.Success: "Success",
  57. ErrorType.HttpError: "HttpError",
  58. ErrorType.ConnectionError: "ConnectionError",
  59. ErrorType.Timeout: "Timeout",
  60. ErrorType.WrongStatus: "WrongStatus",
  61. ErrorType.DecodeError: "DecodeError",
  62. ErrorType.NoResults: "NoResults",
  63. ErrorType.ProxyError: "ProxyError",
  64. ErrorType.SSLError: "SSLError",
  65. ErrorType.InvalidSchema: "InvalidSchema",
  66. ErrorType.ContentSizeExceeded: "ContentSizeExceeded",
  67. ErrorType.Other: "Other"
  68. }
  69. # Global json schema container so we won't have to read a json schema file
  70. # from disk everytime we need to verify some json data.
  71. Schemas = {}
  72. # Loads an json schema into the global 'Schemas' container.
  73. # @param key: A name to store the json schema to, existing keys will be
  74. # overwritten!
  75. # @type key: string
  76. # @param filepath: Path where the json schema file is located (including
  77. # filename)
  78. # @type filepath: string
  79. def json_schema_load(key, filepath):
  80. # may raise an json.JSONDecodeError or UnicodeDecodeError when the schema
  81. # json is invalid, or an OSError when it cannot access the given filepath.
  82. with open(filepath, 'r') as f:
  83. data = json.load(f)
  84. # An exception should have been raised when open() or json.load() has
  85. # failed, so at this point the schema json looks valid.
  86. # @note: Existing keys will be overwritten/reloaded.
  87. Schemas.update({key: data})
  88. class Result:
  89. def __init__(self, url, response, err="", errType=ErrorType.Success,
  90. acceptCodes=None):
  91. self._url = url # url used for request.
  92. self._response = response
  93. self._err = err
  94. self._errType = errType
  95. acceptCodes = acceptCodes
  96. if not acceptCodes:
  97. acceptCodes = [200]
  98. if errType == ErrorType.Success and response.status_code not in acceptCodes:
  99. self._errType = ErrorType.WrongStatus
  100. self._err = f"WrongStatus: {self._response.status_code}"
  101. else:
  102. self.verifyFurther()
  103. def __bool__(self):
  104. return not self.failed()
  105. def url(self):
  106. return self._url
  107. def errorType(self): return self._errType
  108. def error(self): return self._err
  109. def content(self):
  110. """ In case json.loads failed and we want to debug.
  111. """
  112. if self._response is None:
  113. return b''
  114. return self._response.content
  115. def text(self):
  116. if self._response is None:
  117. return ''
  118. return self._response.text
  119. def failed(self):
  120. if self._errType is not ErrorType.Success:
  121. return True
  122. return False
  123. def statusCode(self):
  124. if self._response is not None:
  125. return self._response.status_code
  126. return 0
  127. def verifyFurther(self):
  128. pass
  129. # JsonResult should not be used directly, it should be subclassed. The subclass
  130. # is responsible for setting JsonResult.Schema
  131. class JsonResult(Result):
  132. Schema = {}
  133. def __init__(self, url, response, err="", errType=ErrorType.Success,
  134. acceptCodes=None):
  135. Result.__init__(
  136. self,
  137. url,
  138. response,
  139. err=err,
  140. errType=errType,
  141. acceptCodes=acceptCodes
  142. )
  143. def verifyFurther(self):
  144. try:
  145. self.json()
  146. except json.JSONDecodeError as err:
  147. self._errType = ErrorType.DecodeError
  148. self._err = f"DecodeError: `{err}`"
  149. except UnicodeDecodeError as err:
  150. # This could happen when the response encoding isn't plain ? (gzip)
  151. # Or we just have malformed data/crap.
  152. self._errType = ErrorType.DecodeError
  153. self._err = f"DecodeError: `{err}`"
  154. try:
  155. JsonValidate(instance=self.json(), schema=self.Schema)
  156. except ValidationError as err:
  157. self._errType = ErrorType.InvalidSchema
  158. self._err = f"InvalidSchema: `{err}`"
  159. except SchemaError as err:
  160. self._errType = ErrorType.InvalidSchema
  161. self._err = f"InvalidSchema: `{err}`"
  162. def json(self):
  163. if self.errorType() != ErrorType.Success:
  164. return {}
  165. return json.loads(self._response.content)
  166. class ProxyProtocol:
  167. HTTP = 1
  168. SOCKS4 = 2
  169. SOCKS5 = 4
  170. ProxyProtocolString = {
  171. 0: "none",
  172. 1: "http",
  173. 2: "socks4",
  174. 4: "socks5"
  175. }
  176. class RequestSettings:
  177. def __init__(self):
  178. # Settings
  179. self._useragents = ["searx-qt"]
  180. self._randomUserAgent = False # Use a random useragent for each
  181. # request.
  182. self._verifySSL = True # Verify SSL certificates (HTTPS).
  183. self._timeout = 10 # Connection timeout in seconds.
  184. self._maxSize = 10 * 1024 * 1024 # Maximum content receive size in KiB.
  185. self._chunkSize = 500 * 1024 # Receive chunk size.
  186. self._proxyEnabled = False # Enable the use of a proxy.
  187. self._proxyDNS = True # Only available for socks
  188. self._proxyHost = "" # Proxy string user:pass@host:port
  189. self._proxyProtocol = 0 # Proxy protocol, example:
  190. # ProxyProtocol.SOCKS5H
  191. self._extraHeaders = {} # Extra header values
  192. # Compiled settings
  193. self._headers = {} # headers kwarg (will be included in self._kwargs)
  194. # kwargs passed to requests.get or requests.post.
  195. # This will be compiled each time a setting has
  196. # changed, so it won't have to be compiled each
  197. # time a request is made.
  198. self._kwargs = {
  199. "verify": True,
  200. "timeout": 10,
  201. "headers": self._headers
  202. }
  203. self._compileKwargs()
  204. def getData(self):
  205. return {
  206. "useragents": self.useragents,
  207. "randomUserAgent": self.randomUserAgent,
  208. "verifySSL": self.verifySSL,
  209. "timeout": self.timeout,
  210. "maxSize": self.maxSize,
  211. "chunkSize": self.chunkSize,
  212. "proxyEnabled": self.proxyEnabled,
  213. "proxyDNS": self.proxyDNS,
  214. "proxyHost": self.proxyHost,
  215. "proxyProtocol": self.proxyProtocol,
  216. "extraHeaders": self._extraHeaders
  217. }
  218. def setData(self, data):
  219. self.useragents.clear()
  220. for useragent in data.get("useragents", []):
  221. self.useragents.append(useragent)
  222. self.randomUserAgent = data.get("randomUserAgent", False)
  223. self.verifySSL = data.get("verifySSL", True)
  224. self.timeout = data.get("timeout", 10)
  225. self.maxSize = data.get("maxSize", 10 * 1024 * 1024)
  226. self.chunkSize = data.get("chunkSize", 500 * 1024)
  227. self.proxyEnabled = data.get("proxyEnabled", False)
  228. self.proxyDNS = data.get("proxyDNS", True)
  229. self.proxyHost = data.get("proxyHost", "")
  230. self.proxyProtocol = data.get("proxyProtocol", 0)
  231. self._extraHeaders = data.get("extraHeaders", {})
  232. self.updateRequestKwargs()
  233. """ Settings """
  234. @property
  235. def extraHeaders(self):
  236. return self._extraHeaders
  237. @property
  238. def verifySSL(self):
  239. return self._verifySSL
  240. @verifySSL.setter
  241. def verifySSL(self, state):
  242. self._verifySSL = state
  243. @property
  244. def timeout(self):
  245. return self._timeout
  246. @timeout.setter
  247. def timeout(self, state):
  248. self._timeout = state
  249. @property
  250. def maxSize(self):
  251. return self._maxSize
  252. @maxSize.setter
  253. def maxSize(self, size):
  254. self._maxSize = size
  255. @property
  256. def chunkSize(self):
  257. return self._chunkSize
  258. @chunkSize.setter
  259. def chunkSize(self, size):
  260. self._chunkSize = size
  261. @property
  262. def proxyEnabled(self):
  263. return self._proxyEnabled
  264. @proxyEnabled.setter
  265. def proxyEnabled(self, state):
  266. self._proxyEnabled = state
  267. @property
  268. def proxyHost(self):
  269. return self._proxyHost
  270. @proxyHost.setter
  271. def proxyHost(self, host):
  272. self._proxyHost = host
  273. @property
  274. def proxyProtocol(self):
  275. return self._proxyProtocol
  276. # @type protocol: class ProxyProtocol
  277. @proxyProtocol.setter
  278. def proxyProtocol(self, protocol):
  279. self._proxyProtocol = protocol
  280. @property
  281. def proxyDNS(self):
  282. return self._proxyDNS
  283. @proxyDNS.setter
  284. def proxyDNS(self, state):
  285. self._proxyDNS = state
  286. # Use this to add/remove/clear useragents, it returns a list with strings
  287. @property
  288. def useragents(self):
  289. return self._useragents
  290. @property
  291. def randomUserAgent(self):
  292. return self._randomUserAgent
  293. @randomUserAgent.setter
  294. def randomUserAgent(self, state):
  295. self._randomUserAgent = state
  296. """ Make python-requests compatible """
  297. @property
  298. def requestsKwargs(self):
  299. return self._kwargs
  300. # Update requests kwargs (call this each time after you changed one or
  301. # multiple settings)
  302. def updateRequestKwargs(self):
  303. self._compileKwargs()
  304. # Get requests kwargs for a new request.
  305. def kwargsForNewRequest(self):
  306. useragent = self._getUseragent()
  307. if useragent:
  308. self._headers.update({"User-Agent": useragent})
  309. elif "User-Agent" in self._headers:
  310. del self._headers["User-Agent"]
  311. return self._kwargs
  312. def _compileProxies(self):
  313. dnsStr = ""
  314. if self.proxyProtocol in [ProxyProtocol.SOCKS4, ProxyProtocol.SOCKS5]:
  315. if self.proxyDNS:
  316. dnsStr = "h"
  317. protoStr = ProxyProtocolString[self.proxyProtocol]
  318. proxyStr = f"{protoStr}{dnsStr}://{self.proxyHost}"
  319. return {
  320. "http": proxyStr,
  321. "https": proxyStr
  322. }
  323. def _compileKwargs(self):
  324. kwargs = {
  325. "verify": self.verifySSL,
  326. "timeout": self.timeout,
  327. "headers": self._headers
  328. }
  329. self._headers.clear()
  330. self._headers.update(self.extraHeaders)
  331. if self._proxyEnabled:
  332. kwargs.update({"proxies": self._compileProxies()})
  333. self._kwargs.clear()
  334. self._kwargs.update(kwargs)
  335. def _getUseragent(self):
  336. if not self._useragents:
  337. return ""
  338. # Return first useragent string
  339. if len(self._useragents) == 1 or not self._randomUserAgent:
  340. return self._useragents[0]
  341. # Return random useragent
  342. return random.choice(self._useragents)
  343. class RequestsHandler:
  344. def __init__(self):
  345. self._settings = RequestSettings()
  346. @property
  347. def settings(self):
  348. return self._settings
  349. def failSafeRequestFactory(func):
  350. def failSafeRequest(self, url, data=None, ResultType=None):
  351. response = None
  352. err = ""
  353. errType = ErrorType.Success
  354. if not ResultType:
  355. # When 'ResultType' isn't specified, set 'JsonResult' as
  356. # default.
  357. ResultType = JsonResult
  358. log.debug("<NEW Request>", self)
  359. log.debug("# ------------------------", self)
  360. log.debug(f"# ResultType : {ResultType}", self)
  361. requestKwargs = self._settings.kwargsForNewRequest()
  362. """
  363. Request exceptions
  364. https://docs.python-requests.org/en/master/_modules/requests/exceptions/
  365. """
  366. try:
  367. response = func(self, url, data=data, **requestKwargs)
  368. chunkSize = self.settings.chunkSize
  369. maxSize = self.settings.maxSize
  370. curSize = 0
  371. headerContentSize = response.headers.get("Content-Length", None)
  372. if headerContentSize is not None:
  373. if int(headerContentSize) > maxSize:
  374. e = f"Maximum content size limit of '{maxSize}' bytes exceeded. (1)"
  375. log.debug(f"Request failed! ContentSizeExceeded: {e}", self)
  376. errType = ErrorType.ContentSizeExceeded
  377. err = e
  378. if errType == ErrorType.Success:
  379. response._content = b""
  380. for chunk in response.iter_content(chunkSize):
  381. # The server might have its own chunk size that is
  382. # smaller then ours, so thats why we add the lenght
  383. # of the received content instead of adding our
  384. # chunkSize.
  385. curSize += len(chunk)
  386. if curSize > maxSize:
  387. e = f"Maximum content size limit of '{maxSize}' bytes exceeded. (2)"
  388. log.debug(f"Request failed! ContentSizeExceeded: {e}", self)
  389. errType = ErrorType.ContentSizeExceeded
  390. err = e
  391. break
  392. response._content += chunk
  393. response.close()
  394. except HTTPError as e:
  395. # HTTPError is subclass of RequestException
  396. log.debug(f"Request failed! HTTPError: {e}", self)
  397. errType = ErrorType.HttpError
  398. err = str(e)
  399. except Timeout as e:
  400. # Timeout is subclass of RequestException
  401. log.debug(f"Request failed! Timeout: {e}", self)
  402. errType = ErrorType.Timeout
  403. err = str(e)
  404. except ProxyError as e:
  405. # ProxyError is subclass of ConnectionError
  406. log.debug(f"Request failed! ProxyError: {e}", self)
  407. errType = ErrorType.ProxyError
  408. err = str(e)
  409. except SSLError as e:
  410. # SSLError is subclass of ConnectionError
  411. log.debug(f"Request failed! SSLError: {e}", self)
  412. errType = ErrorType.SSLError
  413. err = str(e)
  414. except ConnectionError as e:
  415. # ConnectionError is subclass of RequestException
  416. log.debug(f"Request failed! ConnectionError: {e}", self)
  417. errType = ErrorType.ConnectionError
  418. err = str(e)
  419. except RequestException as e:
  420. # This should catch all other
  421. log.debug(f"Request failed! RequestException: {e}", self)
  422. errType = ErrorType.Other
  423. err = str(e)
  424. log.debug("# ------------------------\n", self)
  425. return ResultType(url, response, err=err, errType=errType)
  426. return failSafeRequest
  427. @failSafeRequestFactory
  428. def get(self, url, data=None, ResultType=None, **settingsKwargs):
  429. log.debug("# Type : GET", self)
  430. log.debug(f"# URL : {url}", self)
  431. log.debug(f"# Data : {data}", self)
  432. log.debug(f"# Kwargs : {settingsKwargs}", self)
  433. return requests.get(url, data=data, stream=True, **settingsKwargs)
  434. @failSafeRequestFactory
  435. def post(self, url, data=None, ResultType=None, **settingsKwargs):
  436. log.debug("# Type : POST", self)
  437. log.debug(f"# URL : {url}", self)
  438. log.debug(f"# Data : {data}", self)
  439. log.debug(f"# Kwargs : {settingsKwargs}", self)
  440. return requests.post(url, data=data, stream=True, **settingsKwargs)