find_OOM_errors.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353
  1. #!/usr/bin/env python
  2. # This Source Code Form is subject to the terms of the Mozilla Public
  3. # License, v. 2.0. If a copy of the MPL was not distributed with this
  4. # file, You can obtain one at http://mozilla.org/MPL/2.0/.
  5. from __future__ import print_function
  6. usage = """%prog: A test for OOM conditions in the shell.
  7. %prog finds segfaults and other errors caused by incorrect handling of
  8. allocation during OOM (out-of-memory) conditions.
  9. """
  10. help = """Check for regressions only. This runs a set of files with a known
  11. number of OOM errors (specified by REGRESSION_COUNT), and exits with a non-zero
  12. result if more or less errors are found. See js/src/Makefile.in for invocation.
  13. """
  14. import hashlib
  15. import re
  16. import shlex
  17. import subprocess
  18. import sys
  19. import threading
  20. import time
  21. from optparse import OptionParser
  22. #####################################################################
  23. # Utility functions
  24. #####################################################################
  25. def run(args, stdin=None):
  26. class ThreadWorker(threading.Thread):
  27. def __init__(self, pipe):
  28. super(ThreadWorker, self).__init__()
  29. self.all = ""
  30. self.pipe = pipe
  31. self.setDaemon(True)
  32. def run(self):
  33. while True:
  34. line = self.pipe.readline()
  35. if line == '': break
  36. else:
  37. self.all += line
  38. try:
  39. if type(args) == str:
  40. args = shlex.split(args)
  41. args = [str(a) for a in args] # convert to strs
  42. stdin_pipe = subprocess.PIPE if stdin else None
  43. proc = subprocess.Popen(args, stdin=stdin_pipe, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  44. if stdin_pipe:
  45. proc.stdin.write(stdin)
  46. proc.stdin.close()
  47. stdout_worker = ThreadWorker(proc.stdout)
  48. stderr_worker = ThreadWorker(proc.stderr)
  49. stdout_worker.start()
  50. stderr_worker.start()
  51. proc.wait()
  52. stdout_worker.join()
  53. stderr_worker.join()
  54. except KeyboardInterrupt as e:
  55. sys.exit(-1)
  56. stdout, stderr = stdout_worker.all, stderr_worker.all
  57. result = (stdout, stderr, proc.returncode)
  58. return result
  59. def get_js_files():
  60. (out, err, exit) = run('find ../jit-test/tests -name "*.js"')
  61. if (err, exit) != ("", 0):
  62. sys.exit("Wrong directory, run from an objdir")
  63. return out.split()
  64. #####################################################################
  65. # Blacklisting
  66. #####################################################################
  67. def in_blacklist(sig):
  68. return sig in blacklist
  69. def add_to_blacklist(sig):
  70. blacklist[sig] = blacklist.get(sig, 0)
  71. blacklist[sig] += 1
  72. # How often is a particular lines important for this.
  73. def count_lines():
  74. """Keep track of the amount of times individual lines occur, in order to
  75. prioritize the errors which occur most frequently."""
  76. counts = {}
  77. for string,count in blacklist.items():
  78. for line in string.split("\n"):
  79. counts[line] = counts.get(line, 0) + count
  80. lines = []
  81. for k,v in counts.items():
  82. lines.append("{0:6}: {1}".format(v, k))
  83. lines.sort()
  84. countlog = file("../OOM_count_log", "w")
  85. countlog.write("\n".join(lines))
  86. countlog.flush()
  87. countlog.close()
  88. #####################################################################
  89. # Output cleaning
  90. #####################################################################
  91. def clean_voutput(err):
  92. # Skip what we can't reproduce
  93. err = re.sub(r"^--\d+-- run: /usr/bin/dsymutil \"shell/js\"$", "", err, flags=re.MULTILINE)
  94. err = re.sub(r"^==\d+==", "", err, flags=re.MULTILINE)
  95. err = re.sub(r"^\*\*\d+\*\*", "", err, flags=re.MULTILINE)
  96. err = re.sub(r"^\s+by 0x[0-9A-Fa-f]+: ", "by: ", err, flags=re.MULTILINE)
  97. err = re.sub(r"^\s+at 0x[0-9A-Fa-f]+: ", "at: ", err, flags=re.MULTILINE)
  98. err = re.sub(r"(^\s+Address 0x)[0-9A-Fa-f]+( is not stack'd)", r"\1\2", err, flags=re.MULTILINE)
  99. err = re.sub(r"(^\s+Invalid write of size )\d+", r"\1x", err, flags=re.MULTILINE)
  100. err = re.sub(r"(^\s+Invalid read of size )\d+", r"\1x", err, flags=re.MULTILINE)
  101. err = re.sub(r"(^\s+Address 0x)[0-9A-Fa-f]+( is )\d+( bytes inside a block of size )[0-9,]+( free'd)", r"\1\2\3\4", err, flags=re.MULTILINE)
  102. # Skip the repeating bit due to the segfault
  103. lines = []
  104. for l in err.split('\n'):
  105. if l == " Process terminating with default action of signal 11 (SIGSEGV)":
  106. break
  107. lines.append(l)
  108. err = '\n'.join(lines)
  109. return err
  110. def remove_failed_allocation_backtraces(err):
  111. lines = []
  112. add = True
  113. for l in err.split('\n'):
  114. # Set start and end conditions for including text
  115. if l == " The site of the failed allocation is:":
  116. add = False
  117. elif l[:2] not in ['by: ', 'at:']:
  118. add = True
  119. if add:
  120. lines.append(l)
  121. err = '\n'.join(lines)
  122. return err
  123. def clean_output(err):
  124. err = re.sub(r"^js\(\d+,0x[0-9a-f]+\) malloc: \*\*\* error for object 0x[0-9a-f]+: pointer being freed was not allocated\n\*\*\* set a breakppoint in malloc_error_break to debug\n$", "pointer being freed was not allocated", err, flags=re.MULTILINE)
  125. return err
  126. #####################################################################
  127. # Consts, etc
  128. #####################################################################
  129. command_template = 'shell/js' \
  130. + ' -m -j -p' \
  131. + ' -e "const platform=\'darwin\'; const libdir=\'../jit-test/lib/\';"' \
  132. + ' -f ../jit-test/lib/prolog.js' \
  133. + ' -f {0}'
  134. # Blacklists are things we don't want to see in our logs again (though we do
  135. # want to count them when they happen). Whitelists we do want to see in our
  136. # logs again, principally because the information we have isn't enough.
  137. blacklist = {}
  138. add_to_blacklist(r"('', '', 1)") # 1 means OOM if the shell hasn't launched yet.
  139. add_to_blacklist(r"('', 'out of memory\n', 1)")
  140. whitelist = set()
  141. whitelist.add(r"('', 'out of memory\n', -11)") # -11 means OOM
  142. whitelist.add(r"('', 'out of memory\nout of memory\n', -11)")
  143. #####################################################################
  144. # Program
  145. #####################################################################
  146. # Options
  147. parser = OptionParser(usage=usage)
  148. parser.add_option("-r", "--regression", action="store", metavar="REGRESSION_COUNT", help=help,
  149. type="int", dest="regression", default=None)
  150. (OPTIONS, args) = parser.parse_args()
  151. if OPTIONS.regression != None:
  152. # TODO: This should be expanded as we get a better hang of the OOM problems.
  153. # For now, we'll just check that the number of OOMs in one short file does not
  154. # increase.
  155. files = ["../jit-test/tests/arguments/args-createontrace.js"]
  156. else:
  157. files = get_js_files()
  158. # Use a command-line arg to reduce the set of files
  159. if len (args):
  160. files = [f for f in files if f.find(args[0]) != -1]
  161. if OPTIONS.regression == None:
  162. # Don't use a logfile, this is automated for tinderbox.
  163. log = file("../OOM_log", "w")
  164. num_failures = 0
  165. for f in files:
  166. # Run it once to establish boundaries
  167. command = (command_template + ' -O').format(f)
  168. out, err, exit = run(command)
  169. max = re.match(".*OOM max count: (\d+).*", out, flags=re.DOTALL).groups()[0]
  170. max = int(max)
  171. # OOMs don't recover well for the first 20 allocations or so.
  172. # TODO: revisit this.
  173. for i in range(20, max):
  174. if OPTIONS.regression == None:
  175. print("Testing allocation {0}/{1} in {2}".format(i,max,f))
  176. else:
  177. sys.stdout.write('.') # something short for tinderbox, no space or \n
  178. command = (command_template + ' -A {0}').format(f, i)
  179. out, err, exit = run(command)
  180. # Success (5 is SM's exit code for controlled errors)
  181. if exit == 5 and err.find("out of memory") != -1:
  182. continue
  183. # Failure
  184. else:
  185. if OPTIONS.regression != None:
  186. # Just count them
  187. num_failures += 1
  188. continue
  189. #########################################################################
  190. # The regression tests ends above. The rest of this is for running the
  191. # script manually.
  192. #########################################################################
  193. problem = str((out, err, exit))
  194. if in_blacklist(problem) and problem not in whitelist:
  195. add_to_blacklist(problem)
  196. continue
  197. add_to_blacklist(problem)
  198. # Get valgrind output for a good stack trace
  199. vcommand = "valgrind --dsymutil=yes -q --log-file=OOM_valgrind_log_file " + command
  200. run(vcommand)
  201. vout = file("OOM_valgrind_log_file").read()
  202. vout = clean_voutput(vout)
  203. sans_alloc_sites = remove_failed_allocation_backtraces(vout)
  204. # Don't print duplicate information
  205. if in_blacklist(sans_alloc_sites):
  206. add_to_blacklist(sans_alloc_sites)
  207. continue
  208. add_to_blacklist(sans_alloc_sites)
  209. log.write ("\n")
  210. log.write ("\n")
  211. log.write ("=========================================================================")
  212. log.write ("\n")
  213. log.write ("An allocation failure at\n\tallocation {0}/{1} in {2}\n\t"
  214. "causes problems (detected using bug 624094)"
  215. .format(i, max, f))
  216. log.write ("\n")
  217. log.write ("\n")
  218. log.write ("Command (from obj directory, using patch from bug 624094):\n " + command)
  219. log.write ("\n")
  220. log.write ("\n")
  221. log.write ("stdout, stderr, exitcode:\n " + problem)
  222. log.write ("\n")
  223. log.write ("\n")
  224. double_free = err.find("pointer being freed was not allocated") != -1
  225. oom_detected = err.find("out of memory") != -1
  226. multiple_oom_detected = err.find("out of memory\nout of memory") != -1
  227. segfault_detected = exit == -11
  228. log.write ("Diagnosis: ")
  229. log.write ("\n")
  230. if multiple_oom_detected:
  231. log.write (" - Multiple OOMs reported")
  232. log.write ("\n")
  233. if segfault_detected:
  234. log.write (" - segfault")
  235. log.write ("\n")
  236. if not oom_detected:
  237. log.write (" - No OOM checking")
  238. log.write ("\n")
  239. if double_free:
  240. log.write (" - Double free")
  241. log.write ("\n")
  242. log.write ("\n")
  243. log.write ("Valgrind info:\n" + vout)
  244. log.write ("\n")
  245. log.write ("\n")
  246. log.flush()
  247. if OPTIONS.regression == None:
  248. count_lines()
  249. print()
  250. # Do the actual regression check
  251. if OPTIONS.regression != None:
  252. expected_num_failures = OPTIONS.regression
  253. if num_failures != expected_num_failures:
  254. print("TEST-UNEXPECTED-FAIL |", end='')
  255. if num_failures > expected_num_failures:
  256. print("More out-of-memory errors were found ({0}) than expected ({1}). "
  257. "This probably means an allocation site has been added without a "
  258. "NULL-check. If this is unavoidable, you can account for it by "
  259. "updating Makefile.in.".format(num_failures, expected_num_failures),
  260. end='')
  261. else:
  262. print("Congratulations, you have removed {0} out-of-memory error(s) "
  263. "({1} remain)! Please account for it by updating Makefile.in."
  264. .format(expected_num_failures - num_failures, num_failures),
  265. end='')
  266. sys.exit(-1)
  267. else:
  268. print('TEST-PASS | find_OOM_errors | Found the expected number of OOM '
  269. 'errors ({0})'.format(expected_num_failures))