seznam.py 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """
  3. Seznam
  4. """
  5. from urllib.parse import urlencode, urlparse
  6. from lxml import html
  7. from searx.poolrequests import get
  8. from searx.exceptions import SearxEngineAccessDeniedException
  9. from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
  10. # about
  11. about = {
  12. "website": "https://www.seznam.cz/",
  13. "wikidata_id": "Q3490485",
  14. "official_api_documentation": "https://api.sklik.cz/",
  15. "use_official_api": False,
  16. "require_api_key": False,
  17. "results": "HTML",
  18. }
  19. base_url = 'https://search.seznam.cz/'
  20. def request(query, params):
  21. response_index = get(base_url, headers=params['headers'], raise_for_httperror=True)
  22. dom = html.fromstring(response_index.text)
  23. url_params = {'q': query}
  24. for e in eval_xpath_list(dom, '//input[@type="hidden"]'):
  25. name = e.get('name')
  26. value = e.get('value')
  27. url_params[name] = value
  28. params['url'] = base_url + '?' + urlencode(url_params)
  29. params['cookies'] = response_index.cookies
  30. return params
  31. def response(resp):
  32. resp_url = urlparse(resp.url)
  33. if resp_url.path.startswith('/verify'):
  34. raise SearxEngineAccessDeniedException()
  35. results = []
  36. dom = html.fromstring(resp.content.decode())
  37. for result_element in eval_xpath_list(dom, '//div[@id="searchpage-root"]//div[@data-dot="results"]/div'):
  38. dot_data = eval_xpath_getindex(result_element, './div/div[@data-dot-data]/@data-dot-data', 0, default=None)
  39. if dot_data is None:
  40. title_element = eval_xpath_getindex(result_element, './/h3/a', 0)
  41. results.append({
  42. 'url': title_element.get('href'),
  43. 'title': extract_text(title_element),
  44. 'content': extract_text(eval_xpath_getindex(title_element, '../../div[2]', 0)),
  45. })
  46. elif dot_data == '{"reporter_name":"hint/related/relates"}':
  47. suggestions_element = eval_xpath_getindex(result_element,
  48. './div/div[@data-dot="main-box"]', 0, default=None)
  49. if suggestions_element is not None:
  50. for suggestion in eval_xpath_list(suggestions_element, './/ul/li'):
  51. results.append({'suggestion': extract_text(suggestion)})
  52. return results