framalibre.py 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """
  3. FramaLibre (It)
  4. """
  5. from html import escape
  6. from urllib.parse import urljoin, urlencode
  7. from lxml import html
  8. from searx.utils import extract_text
  9. # about
  10. about = {
  11. "website": 'https://framalibre.org/',
  12. "wikidata_id": 'Q30213882',
  13. "official_api_documentation": None,
  14. "use_official_api": False,
  15. "require_api_key": False,
  16. "results": 'HTML',
  17. }
  18. # engine dependent config
  19. categories = ['it']
  20. paging = True
  21. # search-url
  22. base_url = 'https://framalibre.org/'
  23. search_url = base_url + 'recherche-par-crit-res?{query}&page={offset}'
  24. # specific xpath variables
  25. results_xpath = '//div[@class="nodes-list-row"]/div[contains(@typeof,"sioc:Item")]'
  26. link_xpath = './/h3[@class="node-title"]/a[@href]'
  27. thumbnail_xpath = './/img[@class="media-object img-responsive"]/@src'
  28. content_xpath = './/div[@class="content"]//p'
  29. # do search-request
  30. def request(query, params):
  31. offset = (params['pageno'] - 1)
  32. params['url'] = search_url.format(query=urlencode({'keys': query}),
  33. offset=offset)
  34. return params
  35. # get response from search-request
  36. def response(resp):
  37. results = []
  38. dom = html.fromstring(resp.text)
  39. # parse results
  40. for result in dom.xpath(results_xpath):
  41. link = result.xpath(link_xpath)[0]
  42. href = urljoin(base_url, link.attrib.get('href'))
  43. # there's also a span (class="rdf-meta element-hidden" property="dc:title")'s content property for this...
  44. title = escape(extract_text(link))
  45. thumbnail_tags = result.xpath(thumbnail_xpath)
  46. thumbnail = None
  47. if len(thumbnail_tags) > 0:
  48. thumbnail = extract_text(thumbnail_tags[0])
  49. if thumbnail[0] == '/':
  50. thumbnail = base_url + thumbnail
  51. content = escape(extract_text(result.xpath(content_xpath)))
  52. # append result
  53. results.append({'url': href,
  54. 'title': title,
  55. 'img_src': thumbnail,
  56. 'content': content})
  57. # return results
  58. return results