benchmark.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package testing
  5. import (
  6. "flag"
  7. "fmt"
  8. "os"
  9. "runtime"
  10. "sync"
  11. "sync/atomic"
  12. "time"
  13. )
  14. var matchBenchmarks = flag.String("test.bench", "", "regular expression to select benchmarks to run")
  15. var benchTime = flag.Duration("test.benchtime", 1*time.Second, "approximate run time for each benchmark")
  16. var benchmarkMemory = flag.Bool("test.benchmem", false, "print memory allocations for benchmarks")
  17. // Global lock to ensure only one benchmark runs at a time.
  18. var benchmarkLock sync.Mutex
  19. // Used for every benchmark for measuring memory.
  20. var memStats runtime.MemStats
  21. // An internal type but exported because it is cross-package; part of the implementation
  22. // of the "go test" command.
  23. type InternalBenchmark struct {
  24. Name string
  25. F func(b *B)
  26. }
  27. // B is a type passed to Benchmark functions to manage benchmark
  28. // timing and to specify the number of iterations to run.
  29. type B struct {
  30. common
  31. N int
  32. previousN int // number of iterations in the previous run
  33. previousDuration time.Duration // total duration of the previous run
  34. benchmark InternalBenchmark
  35. bytes int64
  36. timerOn bool
  37. showAllocResult bool
  38. result BenchmarkResult
  39. parallelism int // RunParallel creates parallelism*GOMAXPROCS goroutines
  40. // The initial states of memStats.Mallocs and memStats.TotalAlloc.
  41. startAllocs uint64
  42. startBytes uint64
  43. // The net total of this test after being run.
  44. netAllocs uint64
  45. netBytes uint64
  46. }
  47. // StartTimer starts timing a test. This function is called automatically
  48. // before a benchmark starts, but it can also used to resume timing after
  49. // a call to StopTimer.
  50. func (b *B) StartTimer() {
  51. if !b.timerOn {
  52. runtime.ReadMemStats(&memStats)
  53. b.startAllocs = memStats.Mallocs
  54. b.startBytes = memStats.TotalAlloc
  55. b.start = time.Now()
  56. b.timerOn = true
  57. }
  58. }
  59. // StopTimer stops timing a test. This can be used to pause the timer
  60. // while performing complex initialization that you don't
  61. // want to measure.
  62. func (b *B) StopTimer() {
  63. if b.timerOn {
  64. b.duration += time.Now().Sub(b.start)
  65. runtime.ReadMemStats(&memStats)
  66. b.netAllocs += memStats.Mallocs - b.startAllocs
  67. b.netBytes += memStats.TotalAlloc - b.startBytes
  68. b.timerOn = false
  69. }
  70. }
  71. // ResetTimer zeros the elapsed benchmark time and memory allocation counters.
  72. // It does not affect whether the timer is running.
  73. func (b *B) ResetTimer() {
  74. if b.timerOn {
  75. runtime.ReadMemStats(&memStats)
  76. b.startAllocs = memStats.Mallocs
  77. b.startBytes = memStats.TotalAlloc
  78. b.start = time.Now()
  79. }
  80. b.duration = 0
  81. b.netAllocs = 0
  82. b.netBytes = 0
  83. }
  84. // SetBytes records the number of bytes processed in a single operation.
  85. // If this is called, the benchmark will report ns/op and MB/s.
  86. func (b *B) SetBytes(n int64) { b.bytes = n }
  87. // ReportAllocs enables malloc statistics for this benchmark.
  88. // It is equivalent to setting -test.benchmem, but it only affects the
  89. // benchmark function that calls ReportAllocs.
  90. func (b *B) ReportAllocs() {
  91. b.showAllocResult = true
  92. }
  93. func (b *B) nsPerOp() int64 {
  94. if b.N <= 0 {
  95. return 0
  96. }
  97. return b.duration.Nanoseconds() / int64(b.N)
  98. }
  99. // runN runs a single benchmark for the specified number of iterations.
  100. func (b *B) runN(n int) {
  101. benchmarkLock.Lock()
  102. defer benchmarkLock.Unlock()
  103. // Try to get a comparable environment for each run
  104. // by clearing garbage from previous runs.
  105. runtime.GC()
  106. b.N = n
  107. b.parallelism = 1
  108. b.ResetTimer()
  109. b.StartTimer()
  110. b.benchmark.F(b)
  111. b.StopTimer()
  112. b.previousN = n
  113. b.previousDuration = b.duration
  114. }
  115. func min(x, y int) int {
  116. if x > y {
  117. return y
  118. }
  119. return x
  120. }
  121. func max(x, y int) int {
  122. if x < y {
  123. return y
  124. }
  125. return x
  126. }
  127. // roundDown10 rounds a number down to the nearest power of 10.
  128. func roundDown10(n int) int {
  129. var tens = 0
  130. // tens = floor(log_10(n))
  131. for n >= 10 {
  132. n = n / 10
  133. tens++
  134. }
  135. // result = 10^tens
  136. result := 1
  137. for i := 0; i < tens; i++ {
  138. result *= 10
  139. }
  140. return result
  141. }
  142. // roundUp rounds x up to a number of the form [1eX, 2eX, 3eX, 5eX].
  143. func roundUp(n int) int {
  144. base := roundDown10(n)
  145. switch {
  146. case n <= base:
  147. return base
  148. case n <= (2 * base):
  149. return 2 * base
  150. case n <= (3 * base):
  151. return 3 * base
  152. case n <= (5 * base):
  153. return 5 * base
  154. default:
  155. return 10 * base
  156. }
  157. }
  158. // run times the benchmark function in a separate goroutine.
  159. func (b *B) run() BenchmarkResult {
  160. go b.launch()
  161. <-b.signal
  162. return b.result
  163. }
  164. // launch launches the benchmark function. It gradually increases the number
  165. // of benchmark iterations until the benchmark runs for the requested benchtime.
  166. // It prints timing information in this form
  167. // testing.BenchmarkHello 100000 19 ns/op
  168. // launch is run by the run function as a separate goroutine.
  169. func (b *B) launch() {
  170. // Run the benchmark for a single iteration in case it's expensive.
  171. n := 1
  172. // Signal that we're done whether we return normally
  173. // or by FailNow's runtime.Goexit.
  174. defer func() {
  175. b.signal <- b
  176. }()
  177. b.runN(n)
  178. // Run the benchmark for at least the specified amount of time.
  179. d := *benchTime
  180. for !b.failed && b.duration < d && n < 1e9 {
  181. last := n
  182. // Predict required iterations.
  183. if b.nsPerOp() == 0 {
  184. n = 1e9
  185. } else {
  186. n = int(d.Nanoseconds() / b.nsPerOp())
  187. }
  188. // Run more iterations than we think we'll need (1.2x).
  189. // Don't grow too fast in case we had timing errors previously.
  190. // Be sure to run at least one more than last time.
  191. n = max(min(n+n/5, 100*last), last+1)
  192. // Round up to something easy to read.
  193. n = roundUp(n)
  194. b.runN(n)
  195. }
  196. b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes}
  197. }
  198. // The results of a benchmark run.
  199. type BenchmarkResult struct {
  200. N int // The number of iterations.
  201. T time.Duration // The total time taken.
  202. Bytes int64 // Bytes processed in one iteration.
  203. MemAllocs uint64 // The total number of memory allocations.
  204. MemBytes uint64 // The total number of bytes allocated.
  205. }
  206. func (r BenchmarkResult) NsPerOp() int64 {
  207. if r.N <= 0 {
  208. return 0
  209. }
  210. return r.T.Nanoseconds() / int64(r.N)
  211. }
  212. func (r BenchmarkResult) mbPerSec() float64 {
  213. if r.Bytes <= 0 || r.T <= 0 || r.N <= 0 {
  214. return 0
  215. }
  216. return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds()
  217. }
  218. func (r BenchmarkResult) AllocsPerOp() int64 {
  219. if r.N <= 0 {
  220. return 0
  221. }
  222. return int64(r.MemAllocs) / int64(r.N)
  223. }
  224. func (r BenchmarkResult) AllocedBytesPerOp() int64 {
  225. if r.N <= 0 {
  226. return 0
  227. }
  228. return int64(r.MemBytes) / int64(r.N)
  229. }
  230. func (r BenchmarkResult) String() string {
  231. mbs := r.mbPerSec()
  232. mb := ""
  233. if mbs != 0 {
  234. mb = fmt.Sprintf("\t%7.2f MB/s", mbs)
  235. }
  236. nsop := r.NsPerOp()
  237. ns := fmt.Sprintf("%10d ns/op", nsop)
  238. if r.N > 0 && nsop < 100 {
  239. // The format specifiers here make sure that
  240. // the ones digits line up for all three possible formats.
  241. if nsop < 10 {
  242. ns = fmt.Sprintf("%13.2f ns/op", float64(r.T.Nanoseconds())/float64(r.N))
  243. } else {
  244. ns = fmt.Sprintf("%12.1f ns/op", float64(r.T.Nanoseconds())/float64(r.N))
  245. }
  246. }
  247. return fmt.Sprintf("%8d\t%s%s", r.N, ns, mb)
  248. }
  249. func (r BenchmarkResult) MemString() string {
  250. return fmt.Sprintf("%8d B/op\t%8d allocs/op",
  251. r.AllocedBytesPerOp(), r.AllocsPerOp())
  252. }
  253. // An internal function but exported because it is cross-package; part of the implementation
  254. // of the "go test" command.
  255. func RunBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) {
  256. // If no flag was specified, don't run benchmarks.
  257. if len(*matchBenchmarks) == 0 {
  258. return
  259. }
  260. for _, Benchmark := range benchmarks {
  261. matched, err := matchString(*matchBenchmarks, Benchmark.Name)
  262. if err != nil {
  263. fmt.Fprintf(os.Stderr, "testing: invalid regexp for -test.bench: %s\n", err)
  264. os.Exit(1)
  265. }
  266. if !matched {
  267. continue
  268. }
  269. for _, procs := range cpuList {
  270. runtime.GOMAXPROCS(procs)
  271. b := &B{
  272. common: common{
  273. signal: make(chan interface{}),
  274. },
  275. benchmark: Benchmark,
  276. }
  277. benchName := Benchmark.Name
  278. if procs != 1 {
  279. benchName = fmt.Sprintf("%s-%d", Benchmark.Name, procs)
  280. }
  281. fmt.Printf("%s\t", benchName)
  282. r := b.run()
  283. if b.failed {
  284. // The output could be very long here, but probably isn't.
  285. // We print it all, regardless, because we don't want to trim the reason
  286. // the benchmark failed.
  287. fmt.Printf("--- FAIL: %s\n%s", benchName, b.output)
  288. continue
  289. }
  290. results := r.String()
  291. if *benchmarkMemory || b.showAllocResult {
  292. results += "\t" + r.MemString()
  293. }
  294. fmt.Println(results)
  295. // Unlike with tests, we ignore the -chatty flag and always print output for
  296. // benchmarks since the output generation time will skew the results.
  297. if len(b.output) > 0 {
  298. b.trimOutput()
  299. fmt.Printf("--- BENCH: %s\n%s", benchName, b.output)
  300. }
  301. if p := runtime.GOMAXPROCS(-1); p != procs {
  302. fmt.Fprintf(os.Stderr, "testing: %s left GOMAXPROCS set to %d\n", benchName, p)
  303. }
  304. }
  305. }
  306. }
  307. // trimOutput shortens the output from a benchmark, which can be very long.
  308. func (b *B) trimOutput() {
  309. // The output is likely to appear multiple times because the benchmark
  310. // is run multiple times, but at least it will be seen. This is not a big deal
  311. // because benchmarks rarely print, but just in case, we trim it if it's too long.
  312. const maxNewlines = 10
  313. for nlCount, j := 0, 0; j < len(b.output); j++ {
  314. if b.output[j] == '\n' {
  315. nlCount++
  316. if nlCount >= maxNewlines {
  317. b.output = append(b.output[:j], "\n\t... [output truncated]\n"...)
  318. break
  319. }
  320. }
  321. }
  322. }
  323. // A PB is used by RunParallel for running parallel benchmarks.
  324. type PB struct {
  325. globalN *uint64 // shared between all worker goroutines iteration counter
  326. grain uint64 // acquire that many iterations from globalN at once
  327. cache uint64 // local cache of acquired iterations
  328. bN uint64 // total number of iterations to execute (b.N)
  329. }
  330. // Next reports whether there are more iterations to execute.
  331. func (pb *PB) Next() bool {
  332. if pb.cache == 0 {
  333. n := atomic.AddUint64(pb.globalN, pb.grain)
  334. if n <= pb.bN {
  335. pb.cache = pb.grain
  336. } else if n < pb.bN+pb.grain {
  337. pb.cache = pb.bN + pb.grain - n
  338. } else {
  339. return false
  340. }
  341. }
  342. pb.cache--
  343. return true
  344. }
  345. // RunParallel runs a benchmark in parallel.
  346. // It creates multiple goroutines and distributes b.N iterations among them.
  347. // The number of goroutines defaults to GOMAXPROCS. To increase parallelism for
  348. // non-CPU-bound benchmarks, call SetParallelism before RunParallel.
  349. // RunParallel is usually used with the go test -cpu flag.
  350. //
  351. // The body function will be run in each goroutine. It should set up any
  352. // goroutine-local state and then iterate until pb.Next returns false.
  353. // It should not use the StartTimer, StopTimer, or ResetTimer functions,
  354. // because they have global effect.
  355. func (b *B) RunParallel(body func(*PB)) {
  356. // Calculate grain size as number of iterations that take ~100µs.
  357. // 100µs is enough to amortize the overhead and provide sufficient
  358. // dynamic load balancing.
  359. grain := uint64(0)
  360. if b.previousN > 0 && b.previousDuration > 0 {
  361. grain = 1e5 * uint64(b.previousN) / uint64(b.previousDuration)
  362. }
  363. if grain < 1 {
  364. grain = 1
  365. }
  366. // We expect the inner loop and function call to take at least 10ns,
  367. // so do not do more than 100µs/10ns=1e4 iterations.
  368. if grain > 1e4 {
  369. grain = 1e4
  370. }
  371. n := uint64(0)
  372. numProcs := b.parallelism * runtime.GOMAXPROCS(0)
  373. var wg sync.WaitGroup
  374. wg.Add(numProcs)
  375. for p := 0; p < numProcs; p++ {
  376. go func() {
  377. defer wg.Done()
  378. pb := &PB{
  379. globalN: &n,
  380. grain: grain,
  381. bN: uint64(b.N),
  382. }
  383. body(pb)
  384. }()
  385. }
  386. wg.Wait()
  387. if n <= uint64(b.N) && !b.Failed() {
  388. b.Fatal("RunParallel: body exited without pb.Next() == false")
  389. }
  390. }
  391. // SetParallelism sets the number of goroutines used by RunParallel to p*GOMAXPROCS.
  392. // There is usually no need to call SetParallelism for CPU-bound benchmarks.
  393. // If p is less than 1, this call will have no effect.
  394. func (b *B) SetParallelism(p int) {
  395. if p >= 1 {
  396. b.parallelism = p
  397. }
  398. }
  399. // Benchmark benchmarks a single function. Useful for creating
  400. // custom benchmarks that do not use the "go test" command.
  401. func Benchmark(f func(b *B)) BenchmarkResult {
  402. b := &B{
  403. common: common{
  404. signal: make(chan interface{}),
  405. },
  406. benchmark: InternalBenchmark{"", f},
  407. }
  408. return b.run()
  409. }