google_images.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """This is the implementation of the Google Images engine using the internal
  4. Google API used by the Google Go Android app.
  5. This internal API offer results in
  6. - JSON (``_fmt:json``)
  7. - Protobuf_ (``_fmt:pb``)
  8. - Protobuf_ compressed? (``_fmt:pc``)
  9. - HTML (``_fmt:html``)
  10. - Protobuf_ encoded in JSON (``_fmt:jspb``).
  11. .. _Protobuf: https://en.wikipedia.org/wiki/Protocol_Buffers
  12. """
  13. from typing import TYPE_CHECKING
  14. from urllib.parse import urlencode
  15. from json import loads
  16. from searx.engines.google import fetch_traits # pylint: disable=unused-import
  17. from searx.engines.google import (
  18. get_google_info,
  19. time_range_dict,
  20. detect_google_sorry,
  21. )
  22. if TYPE_CHECKING:
  23. import logging
  24. from searx.enginelib.traits import EngineTraits
  25. logger: logging.Logger
  26. traits: EngineTraits
  27. # about
  28. about = {
  29. "website": 'https://images.google.com',
  30. "wikidata_id": 'Q521550',
  31. "official_api_documentation": 'https://developers.google.com/custom-search',
  32. "use_official_api": False,
  33. "require_api_key": False,
  34. "results": 'JSON',
  35. }
  36. # engine dependent config
  37. categories = ['images', 'web']
  38. paging = True
  39. time_range_support = True
  40. safesearch = True
  41. send_accept_language_header = True
  42. filter_mapping = {0: 'images', 1: 'active', 2: 'active'}
  43. def request(query, params):
  44. """Google-Image search request"""
  45. google_info = get_google_info(params, traits)
  46. query_url = (
  47. 'https://'
  48. + google_info['subdomain']
  49. + '/search'
  50. + "?"
  51. + urlencode(
  52. {
  53. 'q': query,
  54. 'tbm': "isch",
  55. **google_info['params'],
  56. 'asearch': 'isch',
  57. 'async': '_fmt:json,p:1,ijn:' + str(params['pageno']),
  58. }
  59. )
  60. )
  61. if params['time_range'] in time_range_dict:
  62. query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]})
  63. if params['safesearch']:
  64. query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
  65. params['url'] = query_url
  66. params['cookies'] = google_info['cookies']
  67. params['headers'].update(google_info['headers'])
  68. return params
  69. def response(resp):
  70. """Get response from google's search request"""
  71. results = []
  72. detect_google_sorry(resp)
  73. json_start = resp.text.find('{"ischj":')
  74. json_data = loads(resp.text[json_start:])
  75. for item in json_data["ischj"]["metadata"]:
  76. result_item = {
  77. 'url': item["result"]["referrer_url"],
  78. 'title': item["result"]["page_title"],
  79. 'content': item["text_in_grid"]["snippet"],
  80. 'source': item["result"]["site_title"],
  81. 'img_format': f'{item["original_image"]["width"]} x {item["original_image"]["height"]}',
  82. 'img_src': item["original_image"]["url"],
  83. 'thumbnail_src': item["thumbnail"]["url"],
  84. 'template': 'images.html',
  85. }
  86. author = item["result"].get('iptc', {}).get('creator')
  87. if author:
  88. result_item['author'] = ', '.join(author)
  89. copyright_notice = item["result"].get('iptc', {}).get('copyright_notice')
  90. if copyright_notice:
  91. result_item['source'] += ' | ' + copyright_notice
  92. freshness_date = item["result"].get("freshness_date")
  93. if freshness_date:
  94. result_item['source'] += ' | ' + freshness_date
  95. file_size = item.get('gsa', {}).get('file_size')
  96. if file_size:
  97. result_item['source'] += ' (%s)' % file_size
  98. results.append(result_item)
  99. return results