12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037 |
- ########################################################################
- # Searx-Qt - Lightweight desktop application for Searx.
- # Copyright (C) 2020-2022 CYBERDEViL
- #
- # This file is part of Searx-Qt.
- #
- # Searx-Qt is free software: you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- #
- # Searx-Qt is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program. If not, see <https://www.gnu.org/licenses/>.
- #
- ########################################################################
- import time
- import urllib.parse
- from bs4 import BeautifulSoup
- from searxqt.core.requests import JsonResult, ErrorType, Schemas, Result
- from searxqt.core.handler import HandlerProto, NetworkTypes
- from searxqt.utils.string import parseFilesize
- from searxqt.translations import _
- class LinkTokenResult(Result):
- def __init__(self, url, response, err="", errType=ErrorType.Success):
- Result.__init__(self, url, response, err=err, errType=errType)
- ## API result (format=json)
- class SearchResult(JsonResult):
- Schema = Schemas['searxng_query']
- def __init__(self, url, response, err="", errType=ErrorType.Success):
- JsonResult.__init__(self, url, response, err=err, errType=errType)
- def verifyFurther(self):
- JsonResult.verifyFurther(self)
- # One of the following keys has to be not empty, else we count it as
- # no (usable) result.
- validKeys = [
- 'results',
- 'answers',
- 'corrections',
- 'infoboxes',
- 'suggestions'
- ]
- if self._errType == ErrorType.Success:
- data = self.json()
- valid = False
- for key in validKeys:
- if len(data.get(key, [])):
- valid = True
- break
- if not valid:
- self._errType = ErrorType.NoResults
- self._err = f"NoResults: got: `{self.json()}`"
- ## HTML result that will be parsed into JSON
- class SearchResult2(SearchResult):
- Schema = Schemas['searxng_query']
- def __init__(self, url, response, err="", errType=ErrorType.Success):
- ## @see https://github.com/searxng/searxng/blob/master/searx/botdetection/link_token.py
- self._linktoken = None
- SearchResult.__init__(self, url, response, err=err, errType=errType)
- @property
- def linktoken(self):
- return self._linktoken
- def makeUrlAbsolute(self, url):
- """! Returns a absolute URL. It will add the SearXNG instance its
- schema and location in front when they are missing."""
- parsedUrl = urllib.parse.urlparse(url)
- instanceUrl = urllib.parse.urlparse(self.url())
- if not parsedUrl.netloc:
- url = f"{instanceUrl.netloc}{url}"
- if not parsedUrl.scheme:
- url = f"{instanceUrl.scheme}://{url}"
- return url
- def json(self):
- if self.errorType() != ErrorType.Success:
- return {}
- jsonResult = {
- 'results': [],
- 'answers': [],
- 'corrections': [],
- 'infoboxes': [],
- 'suggestions': [],
- 'unresponsive_engines': []
- }
- soup = BeautifulSoup(self.content(), "html.parser")
- # Find css bot detection file
- # <link rel="stylesheet" href="/client8uw9qw2jc3yhiq2c.css" type="text/css">
- for link in soup.find_all("link", {"rel": "stylesheet"}, href=True):
- href = link.get("href")
- if href.startswith("/client"):
- self._linktoken = self.makeUrlAbsolute(href)
- break
- #######################################################################
- ## 'results' key
- ##########################################################################
- for result in soup.find_all("article", {"class": "result"}):
- """
- <article class="result result-default category-general qwant duckduckgo google">
- <a href="https://linuxize.com/post/curl-post-request/" class="url_wrapper" rel="noreferrer">
- <span class="url_o1">
- <span class="url_i1">https://linuxize.com</span>
- </span>
- <span class="url_o2">
- <span class="url_i2"> › post › curl-post-request</span>
- </span>
- </a>
- <h3>
- <a href="https://linuxize.com/post/curl-post-request/" rel="noreferrer">
- How to make a <span class="highlight">POST</span>
- <span class="highlight">request</span>
- with <span class="highlight">curl</span>
- </a>
- </h3>
- <p class="content">
- Learn how to use <span class="highlight">curl</span>, a command-line utility for transferring data from or to a remote server, to make <span class="highlight">POST</span> requests. See examples of sending data, files, and JSON data with <span class="highlight">curl</span> options and options.
- </p>
- <div class="engines">
- <span>qwant</span>
- <span>duckduckgo</span>
- <span>google</span>
- <a href="https://web.archive.org/web/https://linuxize.com/post/curl-post-request/" class="cache_link" rel="noreferrer">
- <svg SVG_STUFF .../></svg>
- cached
- </a>
- ‎
- </div>
- <div class="break"></div>
- </article>
- """
- """
- <article class="result result-torrent category-files solidtorrents">
- <a href="https://solidtorrents.to/torrents/STUFF .../" class="url_wrapper" rel="noreferrer">
- <span class="url_o1">
- <span class="url_i1">https://solidtorrents.to</span>
- </span>
- <span class="url_o2">
- <span class="url_i2"> › torrents › SOME_NAME › SOME_HASH</span>
- </span>
- </a>
- <h3>
- <a href="https://solidtorrents.to/torrents/SOME_NAME/SOME_HASH/" rel="noreferrer">
- <span class="highlight">SOME</span>-<span class="highlight">NAME</span>
- </a>
- </h3>
- <time class="published_date" datetime="2018-10-20 00:00:00" >Oct 20, 2018</time>
- <div class="highlight">Other/Archive</div>
- <p class="altlink">
- •
- <a href="magnet:MAGNET_LINK ..." class="magnetlink" rel="noreferrer"><svg SVG_STUFF .../></svg>magnet link</a>
- </p>
- <p class="altlink">
- •
- <a href="https://itorrents.org/torrent/TORRENT_LINK ..." class="torrentfile" rel="noreferrer">torrent file</a>
- </p>
- <p class="stat">
- • Seeder
- <span class="badge">407</span>
- • Leecher
- <span class="badge">748</span>
- </p>
- <p class="stat"> Filesize
- <span class="badge">2.88 GiB</span>
- </p>
- <div class="engines">
- <span>solidtorrents</span>
- <a href="https://web.archive.org/web/https://solidtorrents.to/torrents/TORRENT_STUFF ..." class="cache_link" rel="noreferrer"><svg SVG_STUFF .../></svg>cached</a>
- ‎
- </div>
- <div class="break"></div>
- </article>
- """
- title = ''
- url = ''
- content = ''
- engines = []
- publishedDate = ''
- magnetlink = ''
- torrentfile = ''
- filesize = 0
- files = 0 # TODO unused for now
- seed = None
- leech = None
- # !! GET Title
- try:
- title = result.h3.a.get_text().lstrip().rstrip()
- except AttributeError:
- print("Failed to get title")
- # !! GET URL
- try:
- url = result.h3.a.get("href")
- except AttributeError:
- print("Failed to get url")
- # !! GET Content
- felem = result.find("p", {"class": "content"})
- if felem:
- content = felem.get_text().lstrip().rstrip()
- # !! GET Engines
- felem = result.find("div", {"class": "engines"})
- if felem:
- for engine in felem.find_all("span"):
- engines.append(engine.get_text().rstrip().lstrip())
- ## !! Get publishDate
- felem = result.find("time", {"class": "published_date"})
- if felem:
- publishedDate = felem.get("datetime", "")
- ## !! Get magnetlink
- felem = result.find("a", {"class": "magnetlink"})
- if felem:
- magnetlink = felem.get('href')
- ## !! Get torrentfile
- felem = result.find("a", {"class": "torrentfile"})
- if felem:
- torrentfile = felem.get('href')
- ## !! Get filesize
- for felem in result.find_all("span", {"class": "badge"}):
- if felem.previousSibling:
- precedingText = felem.previousSibling
- if "Filesize" in precedingText:
- filesize = parseFilesize(felem.get_text().rstrip().lstrip())
- elif "Seeder" in precedingText:
- seed = felem.get_text()
- elif "Leecher" in precedingText:
- leech = felem.get_text()
- # !! Add result
- resultData = {
- 'title': title,
- 'url': url,
- 'content': content,
- 'engines': [engine for engine in engines],
- # Optional
- 'publishedDate': publishedDate,
- # File attributes
- 'magnetlink': magnetlink,
- 'torrentfile': torrentfile,
- 'filesize': filesize,
- 'files': files,
- 'img_format': '' # TODO
- }
- if seed is not None:
- resultData.update({'seed': seed})
- if leech is not None:
- resultData.update({'leech': leech})
- jsonResult['results'].append(resultData)
- ##########################################################################
- ## 'suggestions' key
- ##########################################################################
- """
- <div id="sidebar">
- <div id="suggestions" role="complementary" aria-labelledby="suggestions-title">
- <details class="sidebar-collapsable">
- <summary class="title" id="suggestions-title">Suggestions</summary>
- <div class="wrapper">
- <form method="POST" action="/search">
- <input type="hidden" name="q" value="curl post request json">
- <input type="hidden" name="category_general" value="1">
- <input type="hidden" name="language" value="auto">
- <input type="hidden" name="time_range" value="">
- <input type="hidden" name="safesearch" value="0">
- <input type="hidden" name="theme" value="simple">
- <input type="submit" class="suggestion" role="link" value="• curl post request json">
- """
- felem = soup.find("div", {"id": "suggestions"})
- if felem:
- for suggestion in felem.find_all("input", {"name": "q"}):
- jsonResult['suggestions'].append(suggestion.get("value"))
- ##########################################################################
- ## 'answers' key
- ##########################################################################
- """
- <h4 class="title" id="answers-title">Answers : </h4>
- <div class="answer">
- <span>LONG TEXT ...</span>
- <a href="some url ..." class="answer-url">url text ...</a>
- </div>
- """
- for answer in soup.find_all("div", {"class": "answer"}):
- felem = answer.find("span")
- if felem:
- jsonResult['answers'].append(felem.get_text())
- ##########################################################################
- ## 'corrections' key
- ##########################################################################
- """ TODO """
- ##########################################################################
- ## 'infoboxes' key
- ##########################################################################
- """
- <details open="" class="sidebar-collapsable">
- <summary class="title">Info</summary>
- <aside class="infobox" aria-label="Banana">
- <h2 class="title"><bdi>Banana</bdi></h2>
- <img src="/image_proxy?url=long_image_url" title="Banana" alt="Banana">
- <p><bdi>LOGNG TEXT HERE ...</bdi></p>
- <div class="urls">
- <ul>
- <li class="url"><bdi><a href="https://en.wikipedia.org/wiki/Banana" rel="noreferrer">Wikipedia</a></bdi></li>
- <li class="url"><bdi><a href="http://www.wikidata.org/entity/Q503" rel="noreferrer">Wikidata</a></bdi></li>
- </ul>
- </div>
- </aside>
- </details>
- """
- """
- <details open="" class="sidebar-collapsable">
- <summary class="title">Info</summary>
- <aside class="infobox" aria-label="Water">
- <h2 class="title"><bdi>Water</bdi></h2>
- <img src="/image_proxy?url=long url .." title="Water" alt="Water">
- <p><bdi>LONG TEXT ...</bdi></p>
- <div class="attributes">
- <dl>
- <dt><bdi>Chemical formula :</bdi></dt>
- <dd><bdi>H₂O</bdi></dd>
- </dl>
- </div>
- <div class="urls">
- <ul>
- <li class="url"><bdi><a href="https://en.wikipedia.org/wiki/Water" rel="noreferrer">Wikipedia</a></bdi></li>
- <li class="url"><bdi><a href="http://www.wikidata.org/entity/Q283" rel="noreferrer">Wikidata</a></bdi></li>
- </ul>
- </div>
- </aside>
- </details>
- """
- """
- infoboxes = []
- ibox = {
- 'infobox': 'str',
- 'id': 'uri',
- 'content': 'str',
- 'img_src': 'uri' | null
- 'urls': [
- {
- 'title': 'str',
- 'url': 'uri',
- 'entity': 'str',
- 'official': true
- }
- ],
- 'attributes': [
- {
- 'label': 'str',
- 'value': 'str',
- 'entity': 'str'
- }
- ],
- 'engines': ['str'],
- 'engine': 'str'
- }
- """
- for infobox in soup.find_all("aside", {"class": "infobox"}):
- title = ""
- id = ""
- content = ""
- img_src = ""
- urls = []
- attributes = []
- engines = []
- # Title
- felem = infobox.find("h2", {"class": "title"})
- if felem:
- title = felem.get_text().rstrip().lstrip()
- # ID
- # TODO
- # Content
- felem = infobox.find("p")
- if felem:
- felem = felem.find("bdi")
- if felem:
- content = felem.get_text().rstrip().lstrip()
- # Image
- felem = infobox.find("img")
- if felem:
- img_src = felem.get("src")
- # URLs
- for felem in infobox.find_all("li", {"class": "url"}):
- felem = felem.find("a")
- if felem:
- urls.append({
- 'title': felem.get_text().lstrip().rstrip(),
- 'url': felem.get("href", ""),
- 'entity': '', # TODO
- 'official': False # TODO
- })
- # Attributes
- """
- <div class="attributes">
- <dl>
- <dt><bdi>Chemical formula :</bdi></dt>
- <dd><bdi>H₂O</bdi></dd>
- </dl>
- </div>
- """
- felem = infobox.find("div", {"class": "attributes"})
- if felem:
- for item in felem.find_all("dl"):
- label = ""
- value = ""
- entity = "" # TODO
- try:
- label = item.dt.bdi.get_text().rstrip().lstrip()
- value = item.dd.bdi.get_text().rstrip().lstrip()
- except AttributeError:
- continue
- attributes.append({
- "label": label,
- "value": value,
- "entity": entity
- })
- # Engines
- for url in urls:
- engines.append(url['title'].lower())
- jsonResult['infoboxes'].append({
- "infobox": title,
- "id": id,
- "content": content,
- "img_src": img_src,
- "urls": urls,
- "attributes": attributes,
- "engines": engines
- })
- ##########################################################################
- ## 'unresponsive_engines' key
- ##########################################################################
- """
- <div id="engines_msg">
- <details class="sidebar-collapsable" open="">
- <summary class="title" id="engines_msg-title">Messages from the search engines</summary>
- <div class="dialog-error" role="alert">
- <svg class="ion-icon-big" etc..></svg>
- <div>
- <p>
- <strong>Error!</strong>
- Engines cannot retrieve results:
- </p>
- <p>
- brave (<a href="/stats?engine=brave" title="View error logs and submit a bug report">Suspended: too many requests</a>)
- </p>
- <p>
- qwant (<a href="/stats?engine=qwant" title="View error logs and submit a bug report">Suspended: too many requests</a>)
- </p>
- </div>
- </div>
- </details>
- </div>
- """
- felem = soup.find("div", {"id": "engines_msg"})
- if felem:
- for errDialog in felem.find_all("div", {"class": "dialog-error"}):
- for p in errDialog.find_all("p"):
- a = p.find("a")
- if not a:
- continue
- engine, msg = p.get_text().split(" ", 1)
- jsonResult['unresponsive_engines'].append([engine, msg])
- return jsonResult
- class SearxConfigResult(JsonResult):
- Schema = Schemas['searxng_config']
- def __init__(self, url, response, err="", errType=ErrorType.Success):
- JsonResult.__init__(self, url, response, err=err, errType=errType)
- class Categories:
- types = {
- 'general': (_('General'), 'category_general'),
- 'files': (_('Files'), 'category_files'),
- 'images': (_('Images'), 'category_images'),
- 'videos': (_('Videos'), 'category_videos'),
- 'it': (_('IT'), 'category_it'),
- 'map': (_('Location'), 'category_map'),
- 'music': (_('Music'), 'category_music'),
- 'news': (_('News'), 'category_news'),
- 'science': (_('Science'), 'category_science'),
- 'social media': (_('Social'), 'category_social media'),
- 'onions': (_('Onions'), 'category_onions'),
- 'shopping': (_('Shopping'), 'category_shopping')
- }
- def __init__(self):
- self._options = {}
- self.__makeOptions()
- def __makeOptions(self):
- self._options.clear()
- for key, t in self.types.items():
- self._options.update({key: False})
- def reset(self):
- self.__makeOptions()
- def get(self, key):
- return self._options[key]
- def set(self, key, state):
- """
- @param key: One of the keys in Categories.types
- @type key: str
- @param state: Enabled / disabled state
- @type state: bool
- """
- self._options[key] = state
- def dict(self):
- newDict = {}
- for key, state in self._options.items():
- if state:
- newDict.update({self.types[key][1]: 'on'})
- return newDict
- def enabledKeys(self):
- """ Returns a list with enabled engine strings (key from
- Categories.types)
- """
- return [key for key, state in self._options.items() if state]
- class Engines(list):
- def __init__(self):
- list.__init__(self)
- def dict(self):
- if not self:
- return {}
- return {
- 'engines': ",".join(self)
- }
- class SearX:
- Periods = {
- '': _('Anytime'),
- 'day': _('Last day'),
- 'week': _('Last week'),
- 'month': _('Last month'),
- 'year': _('Last year')
- }
- # https://github.com/asciimoo/searx/blob/master/searx/languages.py
- Languages = {
- '': _('No language'),
- 'all': _('Default language'),
- 'af-NA': 'Afrikaans - af-NA',
- 'ca-AD': 'Català - ca-AD',
- 'da-DK': 'Dansk - da-DK',
- 'de': 'Deutsch - de',
- 'de-AT': 'Deutsch (Österreich) - de-AT',
- 'de-CH': 'Deutsch (Schweiz) - de-CH',
- 'de-DE': 'Deutsch (Deutschland) - de-DE',
- 'et-EE': 'Eesti - et-EE',
- 'en': 'English - en',
- 'en-AU': 'English (Australia) - en-AU',
- 'en-CA': 'English (Canada) - en-CA',
- 'en-GB': 'English (United Kingdom) - en-GB',
- 'en-IE': 'English (Ireland) - en-IE',
- 'en-IN': 'English (India) - en-IN',
- 'en-NZ': 'English (New Zealand) - en-NZ',
- 'en-PH': 'English (Philippines) - en-PH',
- 'en-SG': 'English (Singapore) - en-SG',
- 'en-US': 'English (United States) - en-US',
- 'es': 'Español - es',
- 'es-AR': 'Español (Argentina) - es-AR',
- 'es-CL': 'Español (Chile) - es-CL',
- 'es-ES': 'Español (España) - es-ES',
- 'es-MX': 'Español (México) - es-MX',
- 'fr': 'Français - fr',
- 'fr-BE': 'Français (Belgique) - fr-BE',
- 'fr-CA': 'Français (Canada) - fr-CA',
- 'fr-CH': 'Français (Suisse) - fr-CH',
- 'fr-FR': 'Français (France) - fr-FR',
- 'hr-HR': 'Hrvatski - hr-HR',
- 'id-ID': 'Indonesia - id-ID',
- 'it-IT': 'Italiano - it-IT',
- 'sw-KE': 'Kiswahili - sw-KE',
- 'lv-LV': 'Latviešu - lv-LV',
- 'lt-LT': 'Lietuvių - lt-LT',
- 'hu-HU': 'Magyar - hu-HU',
- 'ms-MY': 'Melayu - ms-MY',
- 'nl': 'Nederlands - nl',
- 'nl-BE': 'Nederlands (België) - nl-BE',
- 'nl-NL': 'Nederlands (Nederland) - nl-NL',
- 'nb-NO': 'Norsk Bokmål - nb-NO',
- 'pl-PL': 'Polski - pl-PL',
- 'pt': 'Português - pt',
- 'pt-BR': 'Português (Brasil) - pt-BR',
- 'pt-PT': 'Português (Portugal) - pt-PT',
- 'ro-RO': 'Română - ro-RO',
- 'sk-SK': 'Slovenčina - sk-SK',
- 'sl-SI': 'Slovenščina - sl-SI',
- 'sr-RS': 'Srpski - sr-RS',
- 'fi-FI': 'Suomi - fi-FI',
- 'sv-SE': 'Svenska - sv-SE',
- 'vi-VN': 'Tiếng Việt - vi-VN',
- 'tr-TR': 'Türkçe - tr-TR',
- 'is-IS': 'Íslenska - is-IS',
- 'cs-CZ': 'Čeština - cs-CZ',
- 'el-GR': 'Ελληνικά - el-GR',
- 'be-BY': 'Беларуская - be-BY',
- 'bg-BG': 'Български - bg-BG',
- 'ru-RU': 'Русский - ru-RU',
- 'uk-UA': 'Українська - uk-UA',
- 'hy-AM': 'Հայերեն - hy-AM',
- 'he-IL': 'עברית - he-IL',
- 'ar-SA': 'العربية - ar-SA',
- 'fa-IR': 'فارسی - fa-IR',
- 'th-TH': 'ไทย - th-TH',
- 'zh': '中文 - zh',
- 'zh-CN': '中文 (中国) - zh-CN',
- 'zh-TW': '中文 (台灣) - zh-TW',
- 'ja-JP': '日本語 - ja-JP',
- 'ko-KR': '한국어 - ko-KR'
- }
- def __init__(self, requestHandler):
- self._requestHandler = requestHandler
- self._url = ""
- self._categories = Categories()
- self._engines = Engines()
- self._query = ""
- self._lang = ""
- self._pageno = "" # int formatted as string
- self._timeRange = "" # '', 'day', 'week', 'month' or 'year'
- self._safesearch = False
- self._parseHtml = True
- @property
- def categories(self): return self._categories
- @property
- def engines(self): return self._engines
- @property
- def url(self):
- """
- @return: Instance url
- @rtype: str
- """
- return self._url
- @url.setter
- def url(self, url):
- """
- @param url: Instance url
- @type url: str
- """
- self._url = url
- @property
- def query(self):
- """
- @return: Search query
- @rtype: str
- """
- return self._query
- @query.setter
- def query(self, q):
- """
- @param q: Search query
- @type q: str
- """
- self._query = q
- @property
- def lang(self):
- """
- @return: Language code
- @rtype: str
- """
- return self._lang
- @lang.setter
- def lang(self, lang):
- """
- @param lang: Language code
- @type lang: str
- """
- self._lang = lang
- @property
- def pageno(self):
- """
- @return: Page number
- @rtype: int
- """
- return int(self._pageno)
- @pageno.setter
- def pageno(self, i):
- """
- @param i: Page number
- @type i: int
- """
- self._pageno = str(i)
- @property
- def timeRange(self):
- """
- @return: Search time range ('', 'day', 'week', 'month' or 'year')
- @rtype: str
- """
- return self._timeRange
- @timeRange.setter
- def timeRange(self, value):
- """
- @param value: Key from SearX.Periods
- @type value: str
- """
- self._timeRange = value
- @property
- def safeSearch(self):
- """
- @return: Whether safe search is enabled or not.
- @rtype: bool
- """
- return self._safesearch
- @safeSearch.setter
- def safeSearch(self, state):
- """
- @param state: Enable/disable safe search.
- @type state: bool
- """
- self._safesearch = state
- @property
- def parseHtml(self):
- """
- @return: Whether parsing HTML is enabled, this will not use the
- JSON API when it returns True.
- @rtype: bool
- """
- return self._parseHtml
- @parseHtml.setter
- def parseHtml(self, state):
- """
- @param value: Enable/disable parsing HTML instead of using the JSON API
- @type value: bool
- """
- self._parseHtml = state
- @property
- def requestKwargs(self):
- """ Returns current data that will be send with the POST
- request used for the search operation. The search query,
- language, page-number and enabled categories/engines.
- @rtype: dict
- """
- data = {
- "q": self.query,
- "safesearch": "1" if self.safeSearch else "0"
- }
- # Choose what resource to use (JSON API or HTML parser)
- if self.parseHtml:
- data.update({"theme": "simple"})
- else:
- data.update({"format": "json"})
- # After testing found that searx will honor only engines when
- # both engines and categories are set.
- if self.engines:
- data.update(self.engines.dict())
- elif self.categories:
- data.update(self.categories.dict())
- if self.lang:
- data.update({"language": self.lang})
- if self.pageno:
- data.update({"pageno": self.pageno})
- if self.timeRange:
- data.update({"time_range": self.timeRange})
- return data
- def reset(self):
- self.url = ""
- self.timeRange = ""
- self.lang = ""
- self.pageno = 1
- self.categories.reset()
- self.engines.clear()
- self.query = ""
- def search(self):
- """ Preform search operation with current set values.
- @returns: The result of this search.
- @rtype: SearchResult
- """
- rtype = SearchResult
- if self.parseHtml:
- rtype = SearchResult2
- result = self._requestHandler.post(
- urllib.parse.urljoin(self.url, "/search"),
- data=self.requestKwargs,
- ResultType=rtype
- )
- result = self.handleLinkToken(result)
- return result
- def handleLinkToken(self, result):
- """! Searx-Qt is not a bot
- @see https://github.com/searxng/searxng/blob/master/searx/botdetection/link_token.py
- @note variables in https://searx.instance/config:
- - bool ["limiter"]["botdetection.ip_limit.link_token"]
- - bool ["limiter"]["botdetection.ip_limit.pass_searxng_org"]
- """
- # Not relevant
- if result.errorType() != ErrorType.NoResults or not self.parseHtml:
- return result
- # No linktoken found
- if result.linktoken is None:
- return result
- # Request the dummy css
- linkResult = self._requestHandler.get(
- result.linktoken,
- ResultType=LinkTokenResult
- )
- # Failed to get dummy css
- if linkResult.errorType() != ErrorType.Success:
- return result
- # Redo the original request
- return self._requestHandler.post(
- result.url(),
- data=self.requestKwargs,
- ResultType=SearchResult2
- )
- class SearxConfigHandler(HandlerProto):
- def __init__(self, requestsHandler):
- HandlerProto.__init__(self, requestsHandler)
- def updateInstance(self, url):
- newUrl = urllib.parse.urljoin(url, "/config")
- result = self.requestsHandler.get(newUrl, ResultType=SearxConfigResult)
- if result:
- instance = self.instances[url]
- j = result.json()
- """ Update instance version
- """
- instance.update({
- "version": j.get("version", "")
- })
- """ Update instance network_type to use our own network type
- definitions as class NetworkTypes (core/handler.py)
- """
- instance.update({"network_type": NetworkTypes.netTypeFromUrl(url)})
- """ Update Engines
- What we get:
- "engines": [
- categories (list, str)
- enabled (bool)
- language_support (bool)
- name (str)
- paging (bool)
- safesearch (bool)
- shortcut (str)
- supported_languages (list, str)
- time_range_support (bool)
- timeout (float)
- ]
- What instanceModel wants
- "engines" : {
- "not evil": {
- "error_rate": 15,
- "errors": [
- 0
- ]
- }
- }
- What enginesModel wants
- "engines": {
- "1337x": {
- "categories": [
- "videos"
- ],
- "language_support": true,
- "paging": true,
- "safesearch": false,
- "shortcut": "1337x",
- "time_range_support": false
- },
- """
- newInstanceEngines = {}
- newEnginesEngines = {}
- for engine in j.get('engines', []):
- name = engine.get('name', "")
- if not name:
- continue
- newInstanceEngines.update({
- name: {}
- })
- if name not in self.engines:
- newEnginesEngines.update({
- name: {
- "categories": list(engine.get('categories', [])),
- "language_support": engine.get(
- 'language_support',
- False
- ),
- "paging": engine.get('paging', False),
- "safesearch": engine.get('safesearch', False),
- "shortcut": engine.get('shortcut', ""),
- "time_range_support": engine.get(
- 'time_range_support',
- False
- )
- }
- })
- instance.update({
- "engines": dict(newInstanceEngines)
- })
- self.engines.update(newEnginesEngines)
- """ Update instance lastUpdated
- """
- instance.update({
- "lastUpdated": time.time()
- })
- return True
- return False
- def addInstance(self, url):
- if url not in self.instances:
- self._instances[url] = {}
- return True
- return False
- def removeInstance(self, url):
- """
- @param url: url of the instance to remove.
- @type url: str
- """
- del self._instances[url]
- def removeMultiInstances(self, urls):
- """ Remove instance(s) by url without emitting changed for every
- instance that got removed.
- @param urls: list with urls of instances to remove.
- @type urls: list
- """
- for url in urls:
- del self._instances[url]
|