wordnik.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Wordnik (general)
  3. """
  4. from lxml.html import fromstring
  5. from searx.utils import extract_text
  6. from searx.network import raise_for_httperror
  7. # about
  8. about = {
  9. "website": 'https://www.wordnik.com',
  10. "wikidata_id": 'Q8034401',
  11. "official_api_documentation": None,
  12. "use_official_api": False,
  13. "require_api_key": False,
  14. "results": 'HTML',
  15. }
  16. categories = ['general']
  17. paging = False
  18. URL = 'https://www.wordnik.com'
  19. SEARCH_URL = URL + '/words/{query}'
  20. def request(query, params):
  21. params['url'] = SEARCH_URL.format(query=query)
  22. logger.debug(f"query_url --> {params['url']}")
  23. return params
  24. def response(resp):
  25. results = []
  26. raise_for_httperror(resp)
  27. dom = fromstring(resp.text)
  28. word = extract_text(dom.xpath('//*[@id="headword"]/text()'))
  29. definitions = []
  30. for src in dom.xpath('//*[@id="define"]//h3[@class="source"]'):
  31. src_text = extract_text(src).strip()
  32. if src_text.startswith('from '):
  33. src_text = src_text[5:]
  34. src_defs = []
  35. for def_item in src.xpath('following-sibling::ul[1]/li'):
  36. def_abbr = extract_text(def_item.xpath('.//abbr')).strip()
  37. def_text = extract_text(def_item).strip()
  38. if def_abbr:
  39. def_text = def_text[len(def_abbr) :].strip()
  40. src_defs.append((def_abbr, def_text))
  41. definitions.append((src_text, src_defs))
  42. if not definitions:
  43. return results
  44. infobox = ''
  45. for src_text, src_defs in definitions:
  46. infobox += f"<small>{src_text}</small>"
  47. infobox += "<ul>"
  48. for def_abbr, def_text in src_defs:
  49. if def_abbr:
  50. def_abbr += ": "
  51. infobox += f"<li><i>{def_abbr}</i> {def_text}</li>"
  52. infobox += "</ul>"
  53. results.append(
  54. {
  55. 'infobox': word,
  56. 'content': infobox,
  57. }
  58. )
  59. return results