command.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """
  3. Command (offline)
  4. """
  5. import re
  6. from os.path import expanduser, isabs, realpath, commonprefix
  7. from shlex import split as shlex_split
  8. from subprocess import Popen, PIPE
  9. from threading import Thread
  10. from searx import logger
  11. engine_type = 'offline'
  12. paging = True
  13. command = []
  14. delimiter = {}
  15. parse_regex = {}
  16. query_type = ''
  17. query_enum = []
  18. environment_variables = {}
  19. working_dir = realpath('.')
  20. result_separator = '\n'
  21. result_template = 'key-value.html'
  22. timeout = 4.0
  23. _command_logger = logger.getChild('command')
  24. _compiled_parse_regex = {}
  25. def init(engine_settings):
  26. check_parsing_options(engine_settings)
  27. if 'command' not in engine_settings:
  28. raise ValueError('engine command : missing configuration key: command')
  29. global command, working_dir, result_template, delimiter, parse_regex, timeout, environment_variables
  30. command = engine_settings['command']
  31. if 'working_dir' in engine_settings:
  32. working_dir = engine_settings['working_dir']
  33. if not isabs(engine_settings['working_dir']):
  34. working_dir = realpath(working_dir)
  35. if 'parse_regex' in engine_settings:
  36. parse_regex = engine_settings['parse_regex']
  37. for result_key, regex in parse_regex.items():
  38. _compiled_parse_regex[result_key] = re.compile(regex, flags=re.MULTILINE)
  39. if 'delimiter' in engine_settings:
  40. delimiter = engine_settings['delimiter']
  41. if 'environment_variables' in engine_settings:
  42. environment_variables = engine_settings['environment_variables']
  43. def search(query, params):
  44. cmd = _get_command_to_run(query)
  45. if not cmd:
  46. return []
  47. results = []
  48. reader_thread = Thread(target=_get_results_from_process, args=(results, cmd, params['pageno']))
  49. reader_thread.start()
  50. reader_thread.join(timeout=timeout)
  51. return results
  52. def _get_command_to_run(query):
  53. params = shlex_split(query)
  54. __check_query_params(params)
  55. cmd = []
  56. for c in command:
  57. if c == '{{QUERY}}':
  58. cmd.extend(params)
  59. else:
  60. cmd.append(c)
  61. return cmd
  62. def _get_results_from_process(results, cmd, pageno):
  63. leftover = ''
  64. count = 0
  65. start, end = __get_results_limits(pageno)
  66. with Popen(cmd, stdout=PIPE, stderr=PIPE, env=environment_variables) as process:
  67. line = process.stdout.readline()
  68. while line:
  69. buf = leftover + line.decode('utf-8')
  70. raw_results = buf.split(result_separator)
  71. if raw_results[-1]:
  72. leftover = raw_results[-1]
  73. raw_results = raw_results[:-1]
  74. for raw_result in raw_results:
  75. result = __parse_single_result(raw_result)
  76. if result is None:
  77. _command_logger.debug('skipped result:', raw_result)
  78. continue
  79. if start <= count and count <= end:
  80. result['template'] = result_template
  81. results.append(result)
  82. count += 1
  83. if end < count:
  84. return results
  85. line = process.stdout.readline()
  86. return_code = process.wait(timeout=timeout)
  87. if return_code != 0:
  88. raise RuntimeError('non-zero return code when running command', cmd, return_code)
  89. def __get_results_limits(pageno):
  90. start = (pageno - 1) * 10
  91. end = start + 9
  92. return start, end
  93. def __check_query_params(params):
  94. if not query_type:
  95. return
  96. if query_type == 'path':
  97. query_path = params[-1]
  98. query_path = expanduser(query_path)
  99. if commonprefix([realpath(query_path), working_dir]) != working_dir:
  100. raise ValueError('requested path is outside of configured working directory')
  101. elif query_type == 'enum' and len(query_enum) > 0:
  102. for param in params:
  103. if param not in query_enum:
  104. raise ValueError('submitted query params is not allowed', param, 'allowed params:', query_enum)
  105. def check_parsing_options(engine_settings):
  106. """ Checks if delimiter based parsing or regex parsing is configured correctly """
  107. if 'delimiter' not in engine_settings and 'parse_regex' not in engine_settings:
  108. raise ValueError('failed to init settings for parsing lines: missing delimiter or parse_regex')
  109. if 'delimiter' in engine_settings and 'parse_regex' in engine_settings:
  110. raise ValueError('failed to init settings for parsing lines: too many settings')
  111. if 'delimiter' in engine_settings:
  112. if 'chars' not in engine_settings['delimiter'] or 'keys' not in engine_settings['delimiter']:
  113. raise ValueError
  114. def __parse_single_result(raw_result):
  115. """ Parses command line output based on configuration """
  116. result = {}
  117. if delimiter:
  118. elements = raw_result.split(delimiter['chars'], maxsplit=len(delimiter['keys']) - 1)
  119. if len(elements) != len(delimiter['keys']):
  120. return {}
  121. for i in range(len(elements)):
  122. result[delimiter['keys'][i]] = elements[i]
  123. if parse_regex:
  124. for result_key, regex in _compiled_parse_regex.items():
  125. found = regex.search(raw_result)
  126. if not found:
  127. return {}
  128. result[result_key] = raw_result[found.start():found.end()]
  129. return result