check_spidermonkey_style.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592
  1. # This Source Code Form is subject to the terms of the Mozilla Public
  2. # License, v. 2.0. If a copy of the MPL was not distributed with this
  3. # file, You can obtain one at http://mozilla.org/MPL/2.0/.
  4. #----------------------------------------------------------------------------
  5. # This script checks various aspects of SpiderMonkey code style. The current checks are as
  6. # follows.
  7. #
  8. # We check the following things in headers.
  9. #
  10. # - No cyclic dependencies.
  11. #
  12. # - No normal header should #include a inlines.h/-inl.h file.
  13. #
  14. # - #ifndef wrappers should have the right form. (XXX: not yet implemented)
  15. # - Every header file should have one.
  16. # - The guard name used should be appropriate for the filename.
  17. #
  18. # We check the following things in all files.
  19. #
  20. # - #includes should have full paths, e.g. "jit/Ion.h", not "Ion.h".
  21. #
  22. # - #includes should use the appropriate form for system headers (<...>) and
  23. # local headers ("...").
  24. #
  25. # - #includes should be ordered correctly.
  26. # - Each one should be in the correct section.
  27. # - Alphabetical order should be used within sections.
  28. # - Sections should be in the right order.
  29. # Note that the presence of #if/#endif blocks complicates things, to the
  30. # point that it's not always clear where a conditionally-compiled #include
  31. # statement should go, even to a human. Therefore, we check the #include
  32. # statements within each #if/#endif block (including nested ones) in
  33. # isolation, but don't try to do any order checking between such blocks.
  34. #----------------------------------------------------------------------------
  35. from __future__ import print_function
  36. import difflib
  37. import os
  38. import re
  39. import sys
  40. from mozversioncontrol import get_repository_from_env
  41. # We don't bother checking files in these directories, because they're (a) auxiliary or (b)
  42. # imported code that doesn't follow our coding style.
  43. ignored_js_src_dirs = [
  44. 'js/src/config/', # auxiliary stuff
  45. 'js/src/ctypes/libffi/', # imported code
  46. 'js/src/devtools/', # auxiliary stuff
  47. 'js/src/editline/', # imported code
  48. 'js/src/gdb/', # auxiliary stuff
  49. 'js/src/vtune/' # imported code
  50. ]
  51. # We ignore #includes of these files, because they don't follow the usual rules.
  52. included_inclnames_to_ignore = set([
  53. 'ffi.h', # generated in ctypes/libffi/
  54. 'devtools/sharkctl.h', # we ignore devtools/ in general
  55. 'devtools/Instruments.h', # we ignore devtools/ in general
  56. 'double-conversion.h', # strange MFBT case
  57. 'javascript-trace.h', # generated in $OBJDIR if HAVE_DTRACE is defined
  58. 'frontend/ReservedWordsGenerated.h', # generated in $OBJDIR
  59. 'jscustomallocator.h', # provided by embedders; allowed to be missing
  60. 'js-config.h', # generated in $OBJDIR
  61. 'fdlibm.h', # fdlibm
  62. 'pratom.h', # NSPR
  63. 'prcvar.h', # NSPR
  64. 'prerror.h', # NSPR
  65. 'prinit.h', # NSPR
  66. 'prio.h', # NSPR
  67. 'private/pprio.h', # NSPR
  68. 'prlink.h', # NSPR
  69. 'prlock.h', # NSPR
  70. 'prprf.h', # NSPR
  71. 'prthread.h', # NSPR
  72. 'prtypes.h', # NSPR
  73. 'selfhosted.out.h', # generated in $OBJDIR
  74. 'shellmoduleloader.out.h', # generated in $OBJDIR
  75. 'unicode/plurrule.h', # ICU
  76. 'unicode/timezone.h', # ICU
  77. 'unicode/ucal.h', # ICU
  78. 'unicode/uclean.h', # ICU
  79. 'unicode/ucol.h', # ICU
  80. 'unicode/udat.h', # ICU
  81. 'unicode/udatpg.h', # ICU
  82. 'unicode/uenum.h', # ICU
  83. 'unicode/uniset.h', # ICU
  84. 'unicode/unorm.h', # ICU
  85. 'unicode/unum.h', # ICU
  86. 'unicode/unumsys.h', # ICU
  87. 'unicode/upluralrules.h', # ICU
  88. 'unicode/ustring.h', # ICU
  89. 'unicode/utypes.h', # ICU
  90. 'vtune/VTuneWrapper.h' # VTune
  91. ])
  92. # These files have additional constraints on where they are #included, so we
  93. # ignore #includes of them when checking #include ordering.
  94. oddly_ordered_inclnames = set([
  95. 'ctypes/typedefs.h', # Included multiple times in the body of ctypes/CTypes.h
  96. 'frontend/ReservedWordsGenerated.h', # Included in the body of frontend/TokenStream.h
  97. 'jswin.h', # Must be #included before <psapi.h>
  98. 'machine/endian.h', # Must be included after <sys/types.h> on BSD
  99. 'winbase.h', # Must precede other system headers(?)
  100. 'windef.h' # Must precede other system headers(?)
  101. ])
  102. # The files in tests/style/ contain code that fails this checking in various
  103. # ways. Here is the output we expect. If the actual output differs from
  104. # this, one of the following must have happened.
  105. # - New SpiderMonkey code violates one of the checked rules.
  106. # - The tests/style/ files have changed without expected_output being changed
  107. # accordingly.
  108. # - This script has been broken somehow.
  109. #
  110. expected_output = '''\
  111. js/src/tests/style/BadIncludes2.h:1: error:
  112. vanilla header includes an inline-header file "tests/style/BadIncludes2-inl.h"
  113. js/src/tests/style/BadIncludes.h:3: error:
  114. the file includes itself
  115. js/src/tests/style/BadIncludes.h:6: error:
  116. "BadIncludes2.h" is included using the wrong path;
  117. did you forget a prefix, or is the file not yet committed?
  118. js/src/tests/style/BadIncludes.h:8: error:
  119. <tests/style/BadIncludes2.h> should be included using
  120. the #include "..." form
  121. js/src/tests/style/BadIncludes.h:10: error:
  122. "stdio.h" is included using the wrong path;
  123. did you forget a prefix, or is the file not yet committed?
  124. js/src/tests/style/BadIncludesOrder-inl.h:5:6: error:
  125. "vm/Interpreter-inl.h" should be included after "jsscriptinlines.h"
  126. js/src/tests/style/BadIncludesOrder-inl.h:6:7: error:
  127. "jsscriptinlines.h" should be included after "js/Value.h"
  128. js/src/tests/style/BadIncludesOrder-inl.h:7:8: error:
  129. "js/Value.h" should be included after "ds/LifoAlloc.h"
  130. js/src/tests/style/BadIncludesOrder-inl.h:8:9: error:
  131. "ds/LifoAlloc.h" should be included after "jsapi.h"
  132. js/src/tests/style/BadIncludesOrder-inl.h:9:10: error:
  133. "jsapi.h" should be included after <stdio.h>
  134. js/src/tests/style/BadIncludesOrder-inl.h:10:11: error:
  135. <stdio.h> should be included after "mozilla/HashFunctions.h"
  136. js/src/tests/style/BadIncludesOrder-inl.h:27:28: error:
  137. "jsobj.h" should be included after "jsfun.h"
  138. (multiple files): error:
  139. header files form one or more cycles
  140. tests/style/HeaderCycleA1.h
  141. -> tests/style/HeaderCycleA2.h
  142. -> tests/style/HeaderCycleA3.h
  143. -> tests/style/HeaderCycleA1.h
  144. tests/style/HeaderCycleB1-inl.h
  145. -> tests/style/HeaderCycleB2-inl.h
  146. -> tests/style/HeaderCycleB3-inl.h
  147. -> tests/style/HeaderCycleB4-inl.h
  148. -> tests/style/HeaderCycleB1-inl.h
  149. -> tests/style/jsheadercycleB5inlines.h
  150. -> tests/style/HeaderCycleB1-inl.h
  151. -> tests/style/HeaderCycleB4-inl.h
  152. '''.splitlines(True)
  153. actual_output = []
  154. def out(*lines):
  155. for line in lines:
  156. actual_output.append(line + '\n')
  157. def error(filename, linenum, *lines):
  158. location = filename
  159. if linenum is not None:
  160. location += ':' + str(linenum)
  161. out(location + ': error:')
  162. for line in (lines):
  163. out(' ' + line)
  164. out('')
  165. class FileKind(object):
  166. C = 1
  167. CPP = 2
  168. INL_H = 3
  169. H = 4
  170. TBL = 5
  171. MSG = 6
  172. @staticmethod
  173. def get(filename):
  174. if filename.endswith('.c'):
  175. return FileKind.C
  176. if filename.endswith('.cpp'):
  177. return FileKind.CPP
  178. if filename.endswith(('inlines.h', '-inl.h')):
  179. return FileKind.INL_H
  180. if filename.endswith('.h'):
  181. return FileKind.H
  182. if filename.endswith('.tbl'):
  183. return FileKind.TBL
  184. if filename.endswith('.msg'):
  185. return FileKind.MSG
  186. error(filename, None, 'unknown file kind')
  187. def check_style():
  188. # We deal with two kinds of name.
  189. # - A "filename" is a full path to a file from the repository root.
  190. # - An "inclname" is how a file is referred to in a #include statement.
  191. #
  192. # Examples (filename -> inclname)
  193. # - "mfbt/Attributes.h" -> "mozilla/Attributes.h"
  194. # - "mfbt/decimal/Decimal.h -> "mozilla/Decimal.h"
  195. # - "mozglue/misc/TimeStamp.h -> "mozilla/TimeStamp.h"
  196. # - "memory/mozalloc/mozalloc.h -> "mozilla/mozalloc.h"
  197. # - "js/public/Vector.h" -> "js/Vector.h"
  198. # - "js/src/vm/String.h" -> "vm/String.h"
  199. non_js_dirnames = ('mfbt/',
  200. 'memory/mozalloc/',
  201. 'mozglue/') # type: tuple(str)
  202. non_js_inclnames = set() # type: set(inclname)
  203. js_names = dict() # type: dict(filename, inclname)
  204. repo = get_repository_from_env()
  205. # Select the appropriate files.
  206. for filename in repo.get_files_in_working_directory():
  207. for non_js_dir in non_js_dirnames:
  208. if filename.startswith(non_js_dir) and filename.endswith('.h'):
  209. inclname = 'mozilla/' + filename.split('/')[-1]
  210. non_js_inclnames.add(inclname)
  211. if filename.startswith('js/public/') and filename.endswith('.h'):
  212. inclname = 'js/' + filename[len('js/public/'):]
  213. js_names[filename] = inclname
  214. if filename.startswith('js/src/') and \
  215. not filename.startswith(tuple(ignored_js_src_dirs)) and \
  216. filename.endswith(('.c', '.cpp', '.h', '.tbl', '.msg')):
  217. inclname = filename[len('js/src/'):]
  218. js_names[filename] = inclname
  219. all_inclnames = non_js_inclnames | set(js_names.values())
  220. edges = dict() # type: dict(inclname, set(inclname))
  221. # We don't care what's inside the MFBT and MOZALLOC files, but because they
  222. # are #included from JS files we have to add them to the inclusion graph.
  223. for inclname in non_js_inclnames:
  224. edges[inclname] = set()
  225. # Process all the JS files.
  226. for filename in js_names.keys():
  227. inclname = js_names[filename]
  228. file_kind = FileKind.get(filename)
  229. if file_kind == FileKind.C or file_kind == FileKind.CPP or \
  230. file_kind == FileKind.H or file_kind == FileKind.INL_H:
  231. included_h_inclnames = set() # type: set(inclname)
  232. # This script is run in js/src/, so prepend '../../' to get to the root of the Mozilla
  233. # source tree.
  234. with open(os.path.join(repo.path, filename)) as f:
  235. do_file(filename, inclname, file_kind, f, all_inclnames, included_h_inclnames)
  236. edges[inclname] = included_h_inclnames
  237. find_cycles(all_inclnames, edges)
  238. # Compare expected and actual output.
  239. difflines = difflib.unified_diff(expected_output, actual_output,
  240. fromfile='check_spider_monkey_style.py expected output',
  241. tofile='check_spider_monkey_style.py actual output')
  242. ok = True
  243. for diffline in difflines:
  244. ok = False
  245. print(diffline, end='')
  246. return ok
  247. def module_name(name):
  248. '''Strip the trailing .cpp, .h, inlines.h or -inl.h from a filename.'''
  249. return name.replace('inlines.h', '').replace('-inl.h', '').replace('.h', '').replace('.cpp', '')
  250. def is_module_header(enclosing_inclname, header_inclname):
  251. '''Determine if an included name is the "module header", i.e. should be
  252. first in the file.'''
  253. module = module_name(enclosing_inclname)
  254. # Normal case, e.g. module == "foo/Bar", header_inclname == "foo/Bar.h".
  255. if module == module_name(header_inclname):
  256. return True
  257. # A public header, e.g. module == "foo/Bar", header_inclname == "js/Bar.h".
  258. m = re.match(r'js\/(.*)\.h', header_inclname)
  259. if m is not None and module.endswith('/' + m.group(1)):
  260. return True
  261. return False
  262. class Include(object):
  263. '''Important information for a single #include statement.'''
  264. def __init__(self, inclname, linenum, is_system):
  265. self.inclname = inclname
  266. self.linenum = linenum
  267. self.is_system = is_system
  268. def isLeaf(self):
  269. return True
  270. def section(self, enclosing_inclname):
  271. '''Identify which section inclname belongs to.
  272. The section numbers are as follows.
  273. 0. Module header (e.g. jsfoo.h or jsfooinlines.h within jsfoo.cpp)
  274. 1. mozilla/Foo.h
  275. 2. <foo.h> or <foo>
  276. 3. jsfoo.h, prmjtime.h, etc
  277. 4. foo/Bar.h
  278. 5. jsfooinlines.h
  279. 6. foo/Bar-inl.h
  280. 7. non-.h, e.g. *.tbl, *.msg
  281. '''
  282. if self.is_system:
  283. return 2
  284. if not self.inclname.endswith('.h'):
  285. return 7
  286. # A couple of modules have the .h file in js/ and the .cpp file elsewhere and so need
  287. # special handling.
  288. if is_module_header(enclosing_inclname, self.inclname):
  289. return 0
  290. if '/' in self.inclname:
  291. if self.inclname.startswith('mozilla/'):
  292. return 1
  293. if self.inclname.endswith('-inl.h'):
  294. return 6
  295. return 4
  296. if self.inclname.endswith('inlines.h'):
  297. return 5
  298. return 3
  299. def quote(self):
  300. if self.is_system:
  301. return '<' + self.inclname + '>'
  302. else:
  303. return '"' + self.inclname + '"'
  304. class HashIfBlock(object):
  305. '''Important information about a #if/#endif block.
  306. A #if/#endif block is the contents of a #if/#endif (or similar) section.
  307. The top-level block, which is not within a #if/#endif pair, is also
  308. considered a block.
  309. Each leaf is either an Include (representing a #include), or another
  310. nested HashIfBlock.'''
  311. def __init__(self):
  312. self.kids = []
  313. def isLeaf(self):
  314. return False
  315. def do_file(filename, inclname, file_kind, f, all_inclnames, included_h_inclnames):
  316. block_stack = [HashIfBlock()]
  317. # Extract the #include statements as a tree of IBlocks and IIncludes.
  318. for linenum, line in enumerate(f, start=1):
  319. # We're only interested in lines that contain a '#'.
  320. if not '#' in line:
  321. continue
  322. # Look for a |#include "..."| line.
  323. m = re.match(r'\s*#\s*include\s+"([^"]*)"', line)
  324. if m is not None:
  325. block_stack[-1].kids.append(Include(m.group(1), linenum, False))
  326. # Look for a |#include <...>| line.
  327. m = re.match(r'\s*#\s*include\s+<([^>]*)>', line)
  328. if m is not None:
  329. block_stack[-1].kids.append(Include(m.group(1), linenum, True))
  330. # Look for a |#{if,ifdef,ifndef}| line.
  331. m = re.match(r'\s*#\s*(if|ifdef|ifndef)\b', line)
  332. if m is not None:
  333. # Open a new block.
  334. new_block = HashIfBlock()
  335. block_stack[-1].kids.append(new_block)
  336. block_stack.append(new_block)
  337. # Look for a |#{elif,else}| line.
  338. m = re.match(r'\s*#\s*(elif|else)\b', line)
  339. if m is not None:
  340. # Close the current block, and open an adjacent one.
  341. block_stack.pop()
  342. new_block = HashIfBlock()
  343. block_stack[-1].kids.append(new_block)
  344. block_stack.append(new_block)
  345. # Look for a |#endif| line.
  346. m = re.match(r'\s*#\s*endif\b', line)
  347. if m is not None:
  348. # Close the current block.
  349. block_stack.pop()
  350. def check_include_statement(include):
  351. '''Check the style of a single #include statement.'''
  352. if include.is_system:
  353. # Check it is not a known local file (in which case it's probably a system header).
  354. if include.inclname in included_inclnames_to_ignore or \
  355. include.inclname in all_inclnames:
  356. error(filename, include.linenum,
  357. include.quote() + ' should be included using',
  358. 'the #include "..." form')
  359. else:
  360. if include.inclname not in included_inclnames_to_ignore:
  361. included_kind = FileKind.get(include.inclname)
  362. # Check the #include path has the correct form.
  363. if include.inclname not in all_inclnames:
  364. error(filename, include.linenum,
  365. include.quote() + ' is included using the wrong path;',
  366. 'did you forget a prefix, or is the file not yet committed?')
  367. # Record inclusions of .h files for cycle detection later.
  368. # (Exclude .tbl and .msg files.)
  369. elif included_kind == FileKind.H or included_kind == FileKind.INL_H:
  370. included_h_inclnames.add(include.inclname)
  371. # Check a H file doesn't #include an INL_H file.
  372. if file_kind == FileKind.H and included_kind == FileKind.INL_H:
  373. error(filename, include.linenum,
  374. 'vanilla header includes an inline-header file ' + include.quote())
  375. # Check a file doesn't #include itself. (We do this here because the cycle
  376. # detection below doesn't detect this case.)
  377. if inclname == include.inclname:
  378. error(filename, include.linenum, 'the file includes itself')
  379. def check_includes_order(include1, include2):
  380. '''Check the ordering of two #include statements.'''
  381. if include1.inclname in oddly_ordered_inclnames or \
  382. include2.inclname in oddly_ordered_inclnames:
  383. return
  384. section1 = include1.section(inclname)
  385. section2 = include2.section(inclname)
  386. if (section1 > section2) or \
  387. ((section1 == section2) and (include1.inclname.lower() > include2.inclname.lower())):
  388. error(filename, str(include1.linenum) + ':' + str(include2.linenum),
  389. include1.quote() + ' should be included after ' + include2.quote())
  390. # Check the extracted #include statements, both individually, and the ordering of
  391. # adjacent pairs that live in the same block.
  392. def pair_traverse(prev, this):
  393. if this.isLeaf():
  394. check_include_statement(this)
  395. if prev is not None and prev.isLeaf():
  396. check_includes_order(prev, this)
  397. else:
  398. for prev2, this2 in zip([None] + this.kids[0:-1], this.kids):
  399. pair_traverse(prev2, this2)
  400. pair_traverse(None, block_stack[-1])
  401. def find_cycles(all_inclnames, edges):
  402. '''Find and draw any cycles.'''
  403. SCCs = tarjan(all_inclnames, edges)
  404. # The various sorted() calls below ensure the output is deterministic.
  405. def draw_SCC(c):
  406. cset = set(c)
  407. drawn = set()
  408. def draw(v, indent):
  409. out(' ' * indent + ('-> ' if indent else ' ') + v)
  410. if v in drawn:
  411. return
  412. drawn.add(v)
  413. for succ in sorted(edges[v]):
  414. if succ in cset:
  415. draw(succ, indent + 1)
  416. draw(sorted(c)[0], 0)
  417. out('')
  418. have_drawn_an_SCC = False
  419. for scc in sorted(SCCs):
  420. if len(scc) != 1:
  421. if not have_drawn_an_SCC:
  422. error('(multiple files)', None, 'header files form one or more cycles')
  423. have_drawn_an_SCC = True
  424. draw_SCC(scc)
  425. # Tarjan's algorithm for finding the strongly connected components (SCCs) of a graph.
  426. # https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
  427. def tarjan(V, E):
  428. vertex_index = {}
  429. vertex_lowlink = {}
  430. index = 0
  431. S = []
  432. all_SCCs = []
  433. def strongconnect(v, index):
  434. # Set the depth index for v to the smallest unused index
  435. vertex_index[v] = index
  436. vertex_lowlink[v] = index
  437. index += 1
  438. S.append(v)
  439. # Consider successors of v
  440. for w in E[v]:
  441. if w not in vertex_index:
  442. # Successor w has not yet been visited; recurse on it
  443. index = strongconnect(w, index)
  444. vertex_lowlink[v] = min(vertex_lowlink[v], vertex_lowlink[w])
  445. elif w in S:
  446. # Successor w is in stack S and hence in the current SCC
  447. vertex_lowlink[v] = min(vertex_lowlink[v], vertex_index[w])
  448. # If v is a root node, pop the stack and generate an SCC
  449. if vertex_lowlink[v] == vertex_index[v]:
  450. i = S.index(v)
  451. scc = S[i:]
  452. del S[i:]
  453. all_SCCs.append(scc)
  454. return index
  455. for v in V:
  456. if v not in vertex_index:
  457. index = strongconnect(v, index)
  458. return all_SCCs
  459. def main():
  460. ok = check_style()
  461. if ok:
  462. print('TEST-PASS | check_spidermonkey_style.py | ok')
  463. else:
  464. print('TEST-UNEXPECTED-FAIL | check_spidermonkey_style.py | actual output does not match expected output; diff is above')
  465. sys.exit(0 if ok else 1)
  466. if __name__ == '__main__':
  467. main()