gen_simout.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. #!/usr/bin/env python
  2. import sys, os, getopt, sniper_lib
  3. def generate_simout(jobid = None, resultsdir = None, partial = None, output = sys.stdout, silent = False):
  4. try:
  5. res = sniper_lib.get_results(jobid = jobid, resultsdir = resultsdir, partial = partial)
  6. except (KeyError, ValueError), e:
  7. if not silent:
  8. print 'Failed to generated sim.out:', e
  9. return
  10. results = res['results']
  11. config = res['config']
  12. ncores = int(config['general/total_cores'])
  13. format_int = lambda v: str(long(v))
  14. format_pct = lambda v: '%.1f%%' % (100. * v)
  15. def format_float(digits):
  16. return lambda v: ('%%.%uf' % digits) % v
  17. def format_ns(digits):
  18. return lambda v: ('%%.%uf' % digits) % (v/1e6)
  19. if 'barrier.global_time_begin' in results:
  20. time0_begin = results['barrier.global_time_begin']
  21. time0_end = results['barrier.global_time_end']
  22. if 'barrier.global_time' in results:
  23. time0 = results['barrier.global_time'][0]
  24. else:
  25. time0 = time0_begin - time0_end
  26. if sum(results['performance_model.instruction_count']) == 0:
  27. # core.instructions is less exact, but in cache-only mode it's all there is
  28. results['performance_model.instruction_count'] = results['core.instructions']
  29. results['performance_model.elapsed_time_fixed'] = [
  30. time0
  31. for c in range(ncores)
  32. ]
  33. results['performance_model.cycle_count_fixed'] = [
  34. results['performance_model.elapsed_time_fixed'][c] * results['fs_to_cycles_cores'][c]
  35. for c in range(ncores)
  36. ]
  37. results['performance_model.ipc'] = [
  38. i / (c or 1)
  39. for i, c in zip(results['performance_model.instruction_count'], results['performance_model.cycle_count_fixed'])
  40. ]
  41. results['performance_model.nonidle_elapsed_time'] = [
  42. results['performance_model.elapsed_time'][c] - results['performance_model.idle_elapsed_time'][c]
  43. for c in range(ncores)
  44. ]
  45. results['performance_model.idle_elapsed_time'] = [
  46. time0 - results['performance_model.nonidle_elapsed_time'][c]
  47. for c in range(ncores)
  48. ]
  49. results['performance_model.idle_elapsed_percent'] = [
  50. results['performance_model.idle_elapsed_time'][c] / float(time0)
  51. for c in range(ncores)
  52. ]
  53. template = [
  54. (' Instructions', 'performance_model.instruction_count', str),
  55. (' Cycles', 'performance_model.cycle_count_fixed', format_int),
  56. (' IPC', 'performance_model.ipc', format_float(2)),
  57. (' Time (ns)', 'performance_model.elapsed_time_fixed', format_ns(0)),
  58. (' Idle time (ns)', 'performance_model.idle_elapsed_time', format_ns(0)),
  59. (' Idle time (%)', 'performance_model.idle_elapsed_percent', format_pct),
  60. ]
  61. if 'branch_predictor.num-incorrect' in results:
  62. results['branch_predictor.missrate'] = [ 100 * float(results['branch_predictor.num-incorrect'][core])
  63. / ((results['branch_predictor.num-correct'][core] + results['branch_predictor.num-incorrect'][core]) or 1) for core in range(ncores) ]
  64. results['branch_predictor.mpki'] = [ 1000 * float(results['branch_predictor.num-incorrect'][core])
  65. / (results['performance_model.instruction_count'][core] or 1) for core in range(ncores) ]
  66. template += [
  67. ('Branch predictor stats', '', ''),
  68. (' num correct', 'branch_predictor.num-correct', str),
  69. (' num incorrect','branch_predictor.num-incorrect', str),
  70. (' misprediction rate', 'branch_predictor.missrate', lambda v: '%.2f%%' % v),
  71. (' mpki', 'branch_predictor.mpki', lambda v: '%.2f' % v),
  72. ]
  73. template += [
  74. ('TLB Summary', '', ''),
  75. ]
  76. for tlb in ('itlb', 'dtlb', 'stlb'):
  77. if '%s.access'%tlb in results:
  78. results['%s.missrate'%tlb] = map(lambda (a,b): 100*a/float(b or 1), zip(results['%s.miss'%tlb], results['%s.access'%tlb]))
  79. results['%s.mpki'%tlb] = map(lambda (a,b): 1000*a/float(b or 1), zip(results['%s.miss'%tlb], results['performance_model.instruction_count']))
  80. template.extend([
  81. (' %s' % {'itlb': 'I-TLB', 'dtlb': 'D-TLB', 'stlb': 'L2 TLB'}[tlb], '', ''),
  82. (' num accesses', '%s.access'%tlb, str),
  83. (' num misses', '%s.miss'%tlb, str),
  84. (' miss rate', '%s.missrate'%tlb, lambda v: '%.2f%%' % v),
  85. (' mpki', '%s.mpki'%tlb, lambda v: '%.2f' % v),
  86. ])
  87. template += [
  88. ('Cache Summary', '', ''),
  89. ]
  90. allcaches = [ 'L1-I', 'L1-D' ] + [ 'L%u'%l for l in range(2, 5) ]
  91. existcaches = [ c for c in allcaches if '%s.loads'%c in results ]
  92. for c in existcaches:
  93. results['%s.accesses'%c] = map(sum, zip(results['%s.loads'%c], results['%s.stores'%c]))
  94. results['%s.misses'%c] = map(sum, zip(results['%s.load-misses'%c], results.get('%s.store-misses-I'%c, results['%s.store-misses'%c])))
  95. results['%s.missrate'%c] = map(lambda (a,b): 100*a/float(b) if b else float('inf'), zip(results['%s.misses'%c], results['%s.accesses'%c]))
  96. results['%s.mpki'%c] = map(lambda (a,b): 1000*a/float(b) if b else float('inf'), zip(results['%s.misses'%c], results['performance_model.instruction_count']))
  97. template.extend([
  98. (' Cache %s'%c, '', ''),
  99. (' num cache accesses', '%s.accesses'%c, str),
  100. (' num cache loads', '%s.loads'%c, str),
  101. (' num cache stores', '%s.stores'%c, str),
  102. (' num cache misses', '%s.misses'%c, str),
  103. (' miss rate', '%s.missrate'%c, lambda v: '%.2f%%' % v),
  104. (' mpki', '%s.mpki'%c, lambda v: '%.2f' % v),
  105. (' num prefetches', '%s.prefetches'%c, str),
  106. (' num prefetch loads', '%s.loads-prefetch'%c, str),
  107. (' num prefetch stores', '%s.stores-prefetch'%c, str),
  108. (' num prefetches useful', '%s.hits-prefetch'%c, str),
  109. (' num prefetches not used', '%s.evict-prefetch'%c, str),
  110. (' num prefetches invalidated', '%s.invalidate-prefetch'%c, str),
  111. ])
  112. allcaches = [ 'nuca-cache', 'dram-cache' ]
  113. existcaches = [ c for c in allcaches if '%s.reads'%c in results ]
  114. for c in existcaches:
  115. results['%s.accesses'%c] = map(sum, zip(results['%s.reads'%c], results['%s.writes'%c]))
  116. results['%s.misses'%c] = map(sum, zip(results['%s.read-misses'%c], results['%s.write-misses'%c]))
  117. results['%s.missrate'%c] = map(lambda (a,b): 100*a/float(b) if b else float('inf'), zip(results['%s.misses'%c], results['%s.accesses'%c]))
  118. icount = sum(results['performance_model.instruction_count'])
  119. icount /= len([ v for v in results['%s.accesses'%c] if v ]) # Assume instructions are evenly divided over all cache slices
  120. results['%s.mpki'%c] = map(lambda a: 1000*a/float(icount) if icount else float('inf'), results['%s.misses'%c])
  121. template.extend([
  122. (' %s cache'% c.split('-')[0].upper(), '', ''),
  123. (' num cache accesses', '%s.accesses'%c, str),
  124. (' num cache misses', '%s.misses'%c, str),
  125. (' miss rate', '%s.missrate'%c, lambda v: '%.2f%%' % v),
  126. (' mpki', '%s.mpki'%c, lambda v: '%.2f' % v),
  127. ])
  128. results['dram.accesses'] = map(sum, zip(results['dram.reads'], results['dram.writes']))
  129. results['dram.avglatency'] = map(lambda (a,b): a/b if b else float('inf'), zip(results['dram.total-access-latency'], results['dram.accesses']))
  130. template += [
  131. ('DRAM summary', '', ''),
  132. (' num dram accesses', 'dram.accesses', str),
  133. (' average dram access latency (ns)', 'dram.avglatency', format_ns(2)),
  134. ]
  135. if 'dram.total-read-queueing-delay' in results:
  136. results['dram.avgqueueread'] = map(lambda (a,b): a/(b or 1), zip(results['dram.total-read-queueing-delay'], results['dram.reads']))
  137. results['dram.avgqueuewrite'] = map(lambda (a,b): a/(b or 1), zip(results['dram.total-write-queueing-delay'], results['dram.writes']))
  138. template.append((' average dram read queueing delay', 'dram.avgqueueread', format_ns(2)))
  139. template.append((' average dram write queueing delay', 'dram.avgqueuewrite', format_ns(2)))
  140. else:
  141. results['dram.avgqueue'] = map(lambda (a,b): a/(b or 1), zip(results.get('dram.total-queueing-delay', [0]*ncores), results['dram.accesses']))
  142. template.append((' average dram queueing delay', 'dram.avgqueue', format_ns(2)))
  143. if 'dram-queue.total-time-used' in results:
  144. results['dram.bandwidth'] = map(lambda a: 100*a/time0 if time0 else float('inf'), results['dram-queue.total-time-used'])
  145. template.append((' average dram bandwidth utilization', 'dram.bandwidth', lambda v: '%.2f%%' % v))
  146. if 'L1-D.loads-where-dram-local' in results:
  147. results['L1-D.loads-where-dram'] = map(sum, zip(results['L1-D.loads-where-dram-local'], results['L1-D.loads-where-dram-remote']))
  148. results['L1-D.stores-where-dram'] = map(sum, zip(results['L1-D.stores-where-dram-local'], results['L1-D.stores-where-dram-remote']))
  149. template.extend([
  150. ('Coherency Traffic', '', ''),
  151. (' num loads from dram', 'L1-D.loads-where-dram' , str),
  152. #(' num stores from dram', 'L1-D.stores-where-dram' , str),
  153. (' num loads from dram cache', 'L1-D.loads-where-dram-cache' , str),
  154. #(' num stores from dram cache', 'L1-D.stores-where-dram-cache' , str),
  155. (' num loads from remote cache', 'L1-D.loads-where-cache-remote' , str),
  156. #(' num stores from remote cache', 'L1-D.stores-where-cache-remote' , str),
  157. ])
  158. lines = []
  159. lines.append([''] + [ 'Core %u' % i for i in range(ncores) ])
  160. for title, name, func in template:
  161. line = [ title ]
  162. if name and name in results:
  163. for core in range(ncores):
  164. line.append(' '+func(results[name][core]))
  165. else:
  166. line += [''] * ncores
  167. lines.append(line)
  168. widths = [ max(10, max([ len(l[i]) for l in lines ])) for i in range(len(lines[0])) ]
  169. for j, line in enumerate(lines):
  170. output.write(' | '.join([ ('%%%s%us' % ((j==0 or i==0) and '-' or '', widths[i])) % line[i] for i in range(len(line)) ]) + '\n')
  171. if __name__ == '__main__':
  172. def usage():
  173. print 'Usage:', sys.argv[0], '[-h (help)] [--partial <section-start>:<section-end> (default: roi-begin:roi-end)] [-d <resultsdir (default: .)>]'
  174. jobid = 0
  175. resultsdir = '.'
  176. partial = None
  177. try:
  178. opts, args = getopt.getopt(sys.argv[1:], "hj:d:", [ 'partial=' ])
  179. except getopt.GetoptError, e:
  180. print e
  181. usage()
  182. sys.exit()
  183. for o, a in opts:
  184. if o == '-h':
  185. usage()
  186. sys.exit()
  187. if o == '-d':
  188. resultsdir = a
  189. if o == '-j':
  190. jobid = long(a)
  191. if o == '--partial':
  192. if ':' not in a:
  193. sys.stderr.write('--partial=<from>:<to>\n')
  194. usage()
  195. partial = a.split(':')
  196. if args:
  197. usage()
  198. sys.exit(-1)
  199. generate_simout(jobid = jobid, resultsdir = resultsdir, partial = partial)