nimprof.nim 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2015 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## Profiling support for Nim. This is an embedded profiler that requires
  10. ## `--profiler:on`. You only need to import this module to get a profiling
  11. ## report at program exit. See `Embedded Stack Trace Profiler <estp.html>`_
  12. ## for usage.
  13. when not defined(profiler) and not defined(memProfiler):
  14. {.error: "Profiling support is turned off! Enable profiling by passing `--profiler:on --stackTrace:on` to the compiler (see the Nim Compiler User Guide for more options).".}
  15. when defined(nimHasUsed):
  16. {.used.}
  17. # We don't want to profile the profiling code ...
  18. {.push profiler: off.}
  19. import hashes, algorithm, strutils, tables, sets
  20. when defined(nimPreviewSlimSystem):
  21. import std/[syncio, sysatomics]
  22. when not defined(memProfiler):
  23. include "system/timers"
  24. const
  25. withThreads = compileOption("threads")
  26. tickCountCorrection = 50_000
  27. when not declared(system.StackTrace):
  28. type StackTrace = object
  29. lines: array[0..20, cstring]
  30. files: array[0..20, cstring]
  31. proc `[]`*(st: StackTrace, i: int): cstring = st.lines[i]
  32. # We use a simple hash table of bounded size to keep track of the stack traces:
  33. type
  34. ProfileEntry = object
  35. total: int
  36. st: StackTrace
  37. ProfileData = array[0..64*1024-1, ptr ProfileEntry]
  38. proc `==`(a, b: StackTrace): bool =
  39. for i in 0 .. high(a.lines):
  40. if a[i] != b[i]: return false
  41. result = true
  42. # XXX extract this data structure; it is generally useful ;-)
  43. # However a chain length of over 3000 is suspicious...
  44. var
  45. profileData: ProfileData
  46. emptySlots = profileData.len * 3 div 2
  47. maxChainLen = 0
  48. totalCalls = 0
  49. when not defined(memProfiler):
  50. var interval: Nanos = 5_000_000 - tickCountCorrection # 5ms
  51. proc setSamplingFrequency*(intervalInUs: int) =
  52. ## set this to change the sampling frequency. Default value is 5ms.
  53. ## Set it to 0 to disable time based profiling; it uses an imprecise
  54. ## instruction count measure instead then.
  55. if intervalInUs <= 0: interval = 0
  56. else: interval = intervalInUs * 1000 - tickCountCorrection
  57. when withThreads:
  58. import locks
  59. var
  60. profilingLock: Lock
  61. initLock profilingLock
  62. proc hookAux(st: StackTrace, costs: int) =
  63. # this is quite performance sensitive!
  64. when withThreads: acquire profilingLock
  65. inc totalCalls
  66. var last = high(st.lines)
  67. while last > 0 and isNil(st[last]): dec last
  68. var h = hash(pointer(st[last])) and high(profileData)
  69. # we use probing for maxChainLen entries and replace the encountered entry
  70. # with the minimal 'total' value:
  71. if emptySlots == 0:
  72. var minIdx = h
  73. var probes = maxChainLen
  74. while probes >= 0:
  75. if profileData[h].st == st:
  76. # wow, same entry found:
  77. inc profileData[h].total, costs
  78. return
  79. if profileData[minIdx].total < profileData[h].total:
  80. minIdx = h
  81. h = ((5 * h) + 1) and high(profileData)
  82. dec probes
  83. profileData[minIdx].total = costs
  84. profileData[minIdx].st = st
  85. else:
  86. var chain = 0
  87. while true:
  88. if profileData[h] == nil:
  89. profileData[h] = cast[ptr ProfileEntry](
  90. allocShared0(sizeof(ProfileEntry)))
  91. profileData[h].total = costs
  92. profileData[h].st = st
  93. dec emptySlots
  94. break
  95. if profileData[h].st == st:
  96. # wow, same entry found:
  97. inc profileData[h].total, costs
  98. break
  99. h = ((5 * h) + 1) and high(profileData)
  100. inc chain
  101. maxChainLen = max(maxChainLen, chain)
  102. when withThreads: release profilingLock
  103. when defined(memProfiler):
  104. const
  105. SamplingInterval = 50_000
  106. var
  107. gTicker {.threadvar.}: int
  108. proc requestedHook(): bool {.nimcall.} =
  109. if gTicker == 0:
  110. gTicker = SamplingInterval
  111. result = true
  112. dec gTicker
  113. proc hook(st: StackTrace, size: int) {.nimcall.} =
  114. when defined(ignoreAllocationSize):
  115. hookAux(st, 1)
  116. else:
  117. hookAux(st, size)
  118. else:
  119. var
  120. t0 {.threadvar.}: Ticks
  121. gTicker: int # we use an additional counter to
  122. # avoid calling 'getTicks' too frequently
  123. proc requestedHook(): bool {.nimcall.} =
  124. if interval == 0: result = true
  125. elif gTicker == 0:
  126. gTicker = 500
  127. if getTicks() - t0 > interval:
  128. result = true
  129. else:
  130. dec gTicker
  131. proc hook(st: StackTrace) {.nimcall.} =
  132. #echo "profiling! ", interval
  133. if interval == 0:
  134. hookAux(st, 1)
  135. else:
  136. hookAux(st, 1)
  137. t0 = getTicks()
  138. proc getTotal(x: ptr ProfileEntry): int =
  139. result = if isNil(x): 0 else: x.total
  140. proc cmpEntries(a, b: ptr ProfileEntry): int =
  141. result = b.getTotal - a.getTotal
  142. proc `//`(a, b: int): string =
  143. result = format("$1/$2 = $3%", a, b, formatFloat(a / b * 100.0, ffDecimal, 2))
  144. proc writeProfile() {.noconv.} =
  145. system.profilingRequestedHook = nil
  146. when declared(system.StackTrace):
  147. system.profilerHook = nil
  148. const filename = "profile_results.txt"
  149. echo "writing " & filename & "..."
  150. var f: File
  151. if open(f, filename, fmWrite):
  152. sort(profileData, cmpEntries)
  153. writeLine(f, "total executions of each stack trace:")
  154. var entries = 0
  155. for i in 0..high(profileData):
  156. if profileData[i] != nil: inc entries
  157. var perProc = initCountTable[string]()
  158. for i in 0..entries-1:
  159. var dups = initHashSet[string]()
  160. for ii in 0..high(StackTrace.lines):
  161. let procname = profileData[i].st[ii]
  162. if isNil(procname): break
  163. let p = $procname
  164. if not containsOrIncl(dups, p):
  165. perProc.inc(p, profileData[i].total)
  166. var sum = 0
  167. # only write the first 100 entries:
  168. for i in 0..min(100, entries-1):
  169. if profileData[i].total > 1:
  170. inc sum, profileData[i].total
  171. writeLine(f, "Entry: ", i+1, "/", entries, " Calls: ",
  172. profileData[i].total // totalCalls, " [sum: ", sum, "; ",
  173. sum // totalCalls, "]")
  174. for ii in 0..high(StackTrace.lines):
  175. let procname = profileData[i].st[ii]
  176. let filename = profileData[i].st.files[ii]
  177. if isNil(procname): break
  178. writeLine(f, " ", $filename & ": " & $procname, " ",
  179. perProc[$procname] // totalCalls)
  180. close(f)
  181. echo "... done"
  182. else:
  183. echo "... failed"
  184. var
  185. disabled: int
  186. proc disableProfiling*() =
  187. when declared(system.StackTrace):
  188. atomicDec disabled
  189. system.profilingRequestedHook = nil
  190. proc enableProfiling*() =
  191. when declared(system.StackTrace):
  192. if atomicInc(disabled) >= 0:
  193. system.profilingRequestedHook = requestedHook
  194. when declared(system.StackTrace):
  195. import std/exitprocs
  196. system.profilingRequestedHook = requestedHook
  197. system.profilerHook = hook
  198. addExitProc(writeProfile)
  199. {.pop.}