standalone_searx.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. #!/usr/bin/env python
  2. """Script to run searx from terminal.
  3. Getting categories without initiate the engine will only return `['general']`
  4. >>> import searx.engines
  5. ... list(searx.engines.categories.keys())
  6. ['general']
  7. >>> import searx.search
  8. ... searx.search.initialize()
  9. ... list(searx.engines.categories.keys())
  10. ['general', 'it', 'science', 'images', 'news', 'videos', 'music', 'files', 'social media', 'map']
  11. Example to use this script:
  12. .. code:: bash
  13. $ python3 utils/standalone_searx.py rain
  14. Example to run it from python:
  15. >>> import importlib
  16. ... import json
  17. ... import sys
  18. ... import searx.engines
  19. ... import searx.search
  20. ... search_query = 'rain'
  21. ... # initialize engines
  22. ... searx.search.initialize()
  23. ... # load engines categories once instead of each time the function called
  24. ... engine_cs = list(searx.engines.categories.keys())
  25. ... # load module
  26. ... spec = importlib.util.spec_from_file_location(
  27. ... 'utils.standalone_searx', 'utils/standalone_searx.py')
  28. ... sas = importlib.util.module_from_spec(spec)
  29. ... spec.loader.exec_module(sas)
  30. ... # use function from module
  31. ... prog_args = sas.parse_argument([search_query], category_choices=engine_cs)
  32. ... search_q = sas.get_search_query(prog_args, engine_categories=engine_cs)
  33. ... res_dict = sas.to_dict(search_q)
  34. ... sys.stdout.write(json.dumps(
  35. ... res_dict, sort_keys=True, indent=4, ensure_ascii=False,
  36. ... default=sas.json_serial))
  37. {
  38. "answers": [],
  39. "infoboxes": [ {...} ],
  40. "paging": true,
  41. "results": [... ],
  42. "results_number": 820000000.0,
  43. "search": {
  44. "lang": "all",
  45. "pageno": 1,
  46. "q": "rain",
  47. "safesearch": 0,
  48. "timerange": null
  49. },
  50. "suggestions": [...]
  51. }
  52. """ # noqa: E501
  53. # pylint: disable=pointless-string-statement
  54. '''
  55. searx is free software: you can redistribute it and/or modify
  56. it under the terms of the GNU Affero General Public License as published by
  57. the Free Software Foundation, either version 3 of the License, or
  58. (at your option) any later version.
  59. searx is distributed in the hope that it will be useful,
  60. but WITHOUT ANY WARRANTY; without even the implied warranty of
  61. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  62. GNU Affero General Public License for more details.
  63. You should have received a copy of the GNU Affero General Public License
  64. along with searx. If not, see < http://www.gnu.org/licenses/ >.
  65. (C) 2016- by Alexandre Flament, <alex@al-f.net>
  66. '''
  67. # pylint: disable=wrong-import-position
  68. import argparse
  69. import sys
  70. from datetime import datetime
  71. from json import dumps
  72. from typing import Any, Dict, List, Optional
  73. import searx
  74. import searx.preferences
  75. import searx.query
  76. import searx.search
  77. import searx.webadapter
  78. EngineCategoriesVar = Optional[List[str]]
  79. def get_search_query(
  80. args: argparse.Namespace, engine_categories: EngineCategoriesVar = None
  81. ) -> searx.search.SearchQuery:
  82. """Get search results for the query"""
  83. if engine_categories is None:
  84. engine_categories = list(searx.engines.categories.keys())
  85. try:
  86. category = args.category.decode('utf-8')
  87. except AttributeError:
  88. category = args.category
  89. form = {
  90. "q": args.query,
  91. "categories": category,
  92. "pageno": str(args.pageno),
  93. "language": args.lang,
  94. "time_range": args.timerange
  95. }
  96. preferences = searx.preferences.Preferences(
  97. ['oscar'], engine_categories, searx.engines.engines, [])
  98. preferences.key_value_settings['safesearch'].parse(args.safesearch)
  99. search_query = searx.webadapter.get_search_query_from_webapp(
  100. preferences, form)[0]
  101. return search_query
  102. def no_parsed_url(results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
  103. """Remove parsed url from dict."""
  104. for result in results:
  105. del result['parsed_url']
  106. return results
  107. def json_serial(obj: Any) -> Any:
  108. """JSON serializer for objects not serializable by default json code.
  109. :raise TypeError: raised when **obj** is not serializable
  110. """
  111. if isinstance(obj, datetime):
  112. serial = obj.isoformat()
  113. return serial
  114. if isinstance(obj, bytes):
  115. return obj.decode('utf8')
  116. if isinstance(obj, set):
  117. return list(obj)
  118. raise TypeError("Type ({}) not serializable".format(type(obj)))
  119. def to_dict(search_query: searx.search.SearchQuery) -> Dict[str, Any]:
  120. """Get result from parsed arguments."""
  121. result_container = searx.search.Search(search_query).search()
  122. result_container_json = {
  123. "search": {
  124. "q": search_query.query,
  125. "pageno": search_query.pageno,
  126. "lang": search_query.lang,
  127. "safesearch": search_query.safesearch,
  128. "timerange": search_query.time_range,
  129. },
  130. "results": no_parsed_url(result_container.get_ordered_results()),
  131. "infoboxes": result_container.infoboxes,
  132. "suggestions": list(result_container.suggestions),
  133. "answers": list(result_container.answers),
  134. "paging": result_container.paging,
  135. "results_number": result_container.results_number()
  136. }
  137. return result_container_json
  138. def parse_argument(
  139. args: Optional[List[str]]=None,
  140. category_choices: EngineCategoriesVar=None
  141. ) -> argparse.Namespace:
  142. """Parse command line.
  143. :raise SystemExit: Query argument required on `args`
  144. Examples:
  145. >>> import importlib
  146. ... # load module
  147. ... spec = importlib.util.spec_from_file_location(
  148. ... 'utils.standalone_searx', 'utils/standalone_searx.py')
  149. ... sas = importlib.util.module_from_spec(spec)
  150. ... spec.loader.exec_module(sas)
  151. ... sas.parse_argument()
  152. usage: ptipython [-h] [--category [{general}]] [--lang [LANG]] [--pageno [PAGENO]] [--safesearch [{0,1,2}]] [--timerange [{day,week,month,year}]]
  153. query
  154. SystemExit: 2
  155. >>> sas.parse_argument(['rain'])
  156. Namespace(category='general', lang='all', pageno=1, query='rain', safesearch='0', timerange=None)
  157. """ # noqa: E501
  158. if not category_choices:
  159. category_choices = list(searx.engines.categories.keys())
  160. parser = argparse.ArgumentParser(description='Standalone searx.')
  161. parser.add_argument('query', type=str,
  162. help='Text query')
  163. parser.add_argument('--category', type=str, nargs='?',
  164. choices=category_choices,
  165. default='general',
  166. help='Search category')
  167. parser.add_argument('--lang', type=str, nargs='?', default='all',
  168. help='Search language')
  169. parser.add_argument('--pageno', type=int, nargs='?', default=1,
  170. help='Page number starting from 1')
  171. parser.add_argument(
  172. '--safesearch', type=str, nargs='?',
  173. choices=['0', '1', '2'], default='0',
  174. help='Safe content filter from none to strict')
  175. parser.add_argument(
  176. '--timerange', type=str,
  177. nargs='?', choices=['day', 'week', 'month', 'year'],
  178. help='Filter by time range')
  179. return parser.parse_args(args)
  180. if __name__ == '__main__':
  181. searx.search.initialize()
  182. engine_cs = list(searx.engines.categories.keys())
  183. prog_args = parse_argument(category_choices=engine_cs)
  184. search_q = get_search_query(prog_args, engine_categories=engine_cs)
  185. res_dict = to_dict(search_q)
  186. sys.stdout.write(dumps(
  187. res_dict, sort_keys=True, indent=4, ensure_ascii=False,
  188. default=json_serial))