Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 1 | // Copyright 2009 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package testing |
| 6 | |
| 7 | import ( |
| 8 | "flag" |
| 9 | "fmt" |
Dan Willemsen | bbdf664 | 2017-01-13 22:57:23 -0800 | [diff] [blame] | 10 | "internal/race" |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 11 | "os" |
| 12 | "runtime" |
| 13 | "sync" |
| 14 | "sync/atomic" |
| 15 | "time" |
| 16 | ) |
| 17 | |
Dan Willemsen | bbdf664 | 2017-01-13 22:57:23 -0800 | [diff] [blame] | 18 | var matchBenchmarks = flag.String("test.bench", "", "run only benchmarks matching `regexp`") |
| 19 | var benchTime = flag.Duration("test.benchtime", 1*time.Second, "run each benchmark for duration `d`") |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 20 | var benchmarkMemory = flag.Bool("test.benchmem", false, "print memory allocations for benchmarks") |
| 21 | |
| 22 | // Global lock to ensure only one benchmark runs at a time. |
| 23 | var benchmarkLock sync.Mutex |
| 24 | |
| 25 | // Used for every benchmark for measuring memory. |
| 26 | var memStats runtime.MemStats |
| 27 | |
| 28 | // An internal type but exported because it is cross-package; part of the implementation |
| 29 | // of the "go test" command. |
| 30 | type InternalBenchmark struct { |
| 31 | Name string |
| 32 | F func(b *B) |
| 33 | } |
| 34 | |
| 35 | // B is a type passed to Benchmark functions to manage benchmark |
| 36 | // timing and to specify the number of iterations to run. |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 37 | // |
| 38 | // A benchmark ends when its Benchmark function returns or calls any of the methods |
| 39 | // FailNow, Fatal, Fatalf, SkipNow, Skip, or Skipf. Those methods must be called |
| 40 | // only from the goroutine running the Benchmark function. |
| 41 | // The other reporting methods, such as the variations of Log and Error, |
| 42 | // may be called simultaneously from multiple goroutines. |
| 43 | // |
| 44 | // Like in tests, benchmark logs are accumulated during execution |
| 45 | // and dumped to standard error when done. Unlike in tests, benchmark logs |
| 46 | // are always printed, so as not to hide output whose existence may be |
| 47 | // affecting benchmark results. |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 48 | type B struct { |
| 49 | common |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 50 | context *benchContext |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 51 | N int |
| 52 | previousN int // number of iterations in the previous run |
| 53 | previousDuration time.Duration // total duration of the previous run |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 54 | benchFunc func(b *B) |
| 55 | benchTime time.Duration |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 56 | bytes int64 |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 57 | missingBytes bool // one of the subbenchmarks does not have bytes set. |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 58 | timerOn bool |
| 59 | showAllocResult bool |
| 60 | result BenchmarkResult |
| 61 | parallelism int // RunParallel creates parallelism*GOMAXPROCS goroutines |
| 62 | // The initial states of memStats.Mallocs and memStats.TotalAlloc. |
| 63 | startAllocs uint64 |
| 64 | startBytes uint64 |
| 65 | // The net total of this test after being run. |
| 66 | netAllocs uint64 |
| 67 | netBytes uint64 |
| 68 | } |
| 69 | |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 70 | // StartTimer starts timing a test. This function is called automatically |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 71 | // before a benchmark starts, but it can also used to resume timing after |
| 72 | // a call to StopTimer. |
| 73 | func (b *B) StartTimer() { |
| 74 | if !b.timerOn { |
| 75 | runtime.ReadMemStats(&memStats) |
| 76 | b.startAllocs = memStats.Mallocs |
| 77 | b.startBytes = memStats.TotalAlloc |
| 78 | b.start = time.Now() |
| 79 | b.timerOn = true |
| 80 | } |
| 81 | } |
| 82 | |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 83 | // StopTimer stops timing a test. This can be used to pause the timer |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 84 | // while performing complex initialization that you don't |
| 85 | // want to measure. |
| 86 | func (b *B) StopTimer() { |
| 87 | if b.timerOn { |
| 88 | b.duration += time.Now().Sub(b.start) |
| 89 | runtime.ReadMemStats(&memStats) |
| 90 | b.netAllocs += memStats.Mallocs - b.startAllocs |
| 91 | b.netBytes += memStats.TotalAlloc - b.startBytes |
| 92 | b.timerOn = false |
| 93 | } |
| 94 | } |
| 95 | |
| 96 | // ResetTimer zeros the elapsed benchmark time and memory allocation counters. |
| 97 | // It does not affect whether the timer is running. |
| 98 | func (b *B) ResetTimer() { |
| 99 | if b.timerOn { |
| 100 | runtime.ReadMemStats(&memStats) |
| 101 | b.startAllocs = memStats.Mallocs |
| 102 | b.startBytes = memStats.TotalAlloc |
| 103 | b.start = time.Now() |
| 104 | } |
| 105 | b.duration = 0 |
| 106 | b.netAllocs = 0 |
| 107 | b.netBytes = 0 |
| 108 | } |
| 109 | |
| 110 | // SetBytes records the number of bytes processed in a single operation. |
| 111 | // If this is called, the benchmark will report ns/op and MB/s. |
| 112 | func (b *B) SetBytes(n int64) { b.bytes = n } |
| 113 | |
| 114 | // ReportAllocs enables malloc statistics for this benchmark. |
| 115 | // It is equivalent to setting -test.benchmem, but it only affects the |
| 116 | // benchmark function that calls ReportAllocs. |
| 117 | func (b *B) ReportAllocs() { |
| 118 | b.showAllocResult = true |
| 119 | } |
| 120 | |
| 121 | func (b *B) nsPerOp() int64 { |
| 122 | if b.N <= 0 { |
| 123 | return 0 |
| 124 | } |
| 125 | return b.duration.Nanoseconds() / int64(b.N) |
| 126 | } |
| 127 | |
| 128 | // runN runs a single benchmark for the specified number of iterations. |
| 129 | func (b *B) runN(n int) { |
| 130 | benchmarkLock.Lock() |
| 131 | defer benchmarkLock.Unlock() |
| 132 | // Try to get a comparable environment for each run |
| 133 | // by clearing garbage from previous runs. |
| 134 | runtime.GC() |
Dan Willemsen | bbdf664 | 2017-01-13 22:57:23 -0800 | [diff] [blame] | 135 | b.raceErrors = -race.Errors() |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 136 | b.N = n |
| 137 | b.parallelism = 1 |
| 138 | b.ResetTimer() |
| 139 | b.StartTimer() |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 140 | b.benchFunc(b) |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 141 | b.StopTimer() |
| 142 | b.previousN = n |
| 143 | b.previousDuration = b.duration |
Dan Willemsen | bbdf664 | 2017-01-13 22:57:23 -0800 | [diff] [blame] | 144 | b.raceErrors += race.Errors() |
| 145 | if b.raceErrors > 0 { |
| 146 | b.Errorf("race detected during execution of benchmark") |
| 147 | } |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 148 | } |
| 149 | |
| 150 | func min(x, y int) int { |
| 151 | if x > y { |
| 152 | return y |
| 153 | } |
| 154 | return x |
| 155 | } |
| 156 | |
| 157 | func max(x, y int) int { |
| 158 | if x < y { |
| 159 | return y |
| 160 | } |
| 161 | return x |
| 162 | } |
| 163 | |
| 164 | // roundDown10 rounds a number down to the nearest power of 10. |
| 165 | func roundDown10(n int) int { |
| 166 | var tens = 0 |
| 167 | // tens = floor(log_10(n)) |
| 168 | for n >= 10 { |
| 169 | n = n / 10 |
| 170 | tens++ |
| 171 | } |
| 172 | // result = 10^tens |
| 173 | result := 1 |
| 174 | for i := 0; i < tens; i++ { |
| 175 | result *= 10 |
| 176 | } |
| 177 | return result |
| 178 | } |
| 179 | |
| 180 | // roundUp rounds x up to a number of the form [1eX, 2eX, 3eX, 5eX]. |
| 181 | func roundUp(n int) int { |
| 182 | base := roundDown10(n) |
| 183 | switch { |
| 184 | case n <= base: |
| 185 | return base |
| 186 | case n <= (2 * base): |
| 187 | return 2 * base |
| 188 | case n <= (3 * base): |
| 189 | return 3 * base |
| 190 | case n <= (5 * base): |
| 191 | return 5 * base |
| 192 | default: |
| 193 | return 10 * base |
| 194 | } |
| 195 | } |
| 196 | |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 197 | // run1 runs the first iteration of benchFunc. It returns whether more |
| 198 | // iterations of this benchmarks should be run. |
| 199 | func (b *B) run1() bool { |
| 200 | if ctx := b.context; ctx != nil { |
| 201 | // Extend maxLen, if needed. |
| 202 | if n := len(b.name) + ctx.extLen + 1; n > ctx.maxLen { |
| 203 | ctx.maxLen = n + 8 // Add additional slack to avoid too many jumps in size. |
| 204 | } |
| 205 | } |
| 206 | go func() { |
| 207 | // Signal that we're done whether we return normally |
| 208 | // or by FailNow's runtime.Goexit. |
| 209 | defer func() { |
| 210 | b.signal <- true |
| 211 | }() |
| 212 | |
| 213 | b.runN(1) |
| 214 | }() |
| 215 | <-b.signal |
| 216 | if b.failed { |
| 217 | fmt.Fprintf(b.w, "--- FAIL: %s\n%s", b.name, b.output) |
| 218 | return false |
| 219 | } |
| 220 | // Only print the output if we know we are not going to proceed. |
| 221 | // Otherwise it is printed in processBench. |
Dan Willemsen | 1d8b74d | 2017-01-19 15:06:31 -0800 | [diff] [blame^] | 222 | if atomic.LoadInt32(&b.hasSub) != 0 || b.finished { |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 223 | tag := "BENCH" |
| 224 | if b.skipped { |
| 225 | tag = "SKIP" |
| 226 | } |
| 227 | if b.chatty && (len(b.output) > 0 || b.finished) { |
| 228 | b.trimOutput() |
| 229 | fmt.Fprintf(b.w, "--- %s: %s\n%s", tag, b.name, b.output) |
| 230 | } |
| 231 | return false |
| 232 | } |
| 233 | return true |
| 234 | } |
| 235 | |
| 236 | // run executes the benchmark in a separate goroutine, including all of its |
| 237 | // subbenchmarks. b must not have subbenchmarks. |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 238 | func (b *B) run() BenchmarkResult { |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 239 | if b.context != nil { |
| 240 | // Running go test --test.bench |
| 241 | b.context.processBench(b) // Must call doBench. |
| 242 | } else { |
| 243 | // Running func Benchmark. |
| 244 | b.doBench() |
| 245 | } |
| 246 | return b.result |
| 247 | } |
| 248 | |
| 249 | func (b *B) doBench() BenchmarkResult { |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 250 | go b.launch() |
| 251 | <-b.signal |
| 252 | return b.result |
| 253 | } |
| 254 | |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 255 | // launch launches the benchmark function. It gradually increases the number |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 256 | // of benchmark iterations until the benchmark runs for the requested benchtime. |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 257 | // launch is run by the doBench function as a separate goroutine. |
| 258 | // run1 must have been called on b. |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 259 | func (b *B) launch() { |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 260 | // Signal that we're done whether we return normally |
| 261 | // or by FailNow's runtime.Goexit. |
| 262 | defer func() { |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 263 | b.signal <- true |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 264 | }() |
| 265 | |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 266 | // Run the benchmark for at least the specified amount of time. |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 267 | d := b.benchTime |
| 268 | for n := 1; !b.failed && b.duration < d && n < 1e9; { |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 269 | last := n |
| 270 | // Predict required iterations. |
Dan Willemsen | bbdf664 | 2017-01-13 22:57:23 -0800 | [diff] [blame] | 271 | n = int(d.Nanoseconds()) |
| 272 | if nsop := b.nsPerOp(); nsop != 0 { |
| 273 | n /= int(nsop) |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 274 | } |
| 275 | // Run more iterations than we think we'll need (1.2x). |
| 276 | // Don't grow too fast in case we had timing errors previously. |
| 277 | // Be sure to run at least one more than last time. |
| 278 | n = max(min(n+n/5, 100*last), last+1) |
| 279 | // Round up to something easy to read. |
| 280 | n = roundUp(n) |
| 281 | b.runN(n) |
| 282 | } |
| 283 | b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes} |
| 284 | } |
| 285 | |
| 286 | // The results of a benchmark run. |
| 287 | type BenchmarkResult struct { |
| 288 | N int // The number of iterations. |
| 289 | T time.Duration // The total time taken. |
| 290 | Bytes int64 // Bytes processed in one iteration. |
| 291 | MemAllocs uint64 // The total number of memory allocations. |
| 292 | MemBytes uint64 // The total number of bytes allocated. |
| 293 | } |
| 294 | |
| 295 | func (r BenchmarkResult) NsPerOp() int64 { |
| 296 | if r.N <= 0 { |
| 297 | return 0 |
| 298 | } |
| 299 | return r.T.Nanoseconds() / int64(r.N) |
| 300 | } |
| 301 | |
| 302 | func (r BenchmarkResult) mbPerSec() float64 { |
| 303 | if r.Bytes <= 0 || r.T <= 0 || r.N <= 0 { |
| 304 | return 0 |
| 305 | } |
| 306 | return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds() |
| 307 | } |
| 308 | |
| 309 | func (r BenchmarkResult) AllocsPerOp() int64 { |
| 310 | if r.N <= 0 { |
| 311 | return 0 |
| 312 | } |
| 313 | return int64(r.MemAllocs) / int64(r.N) |
| 314 | } |
| 315 | |
| 316 | func (r BenchmarkResult) AllocedBytesPerOp() int64 { |
| 317 | if r.N <= 0 { |
| 318 | return 0 |
| 319 | } |
| 320 | return int64(r.MemBytes) / int64(r.N) |
| 321 | } |
| 322 | |
| 323 | func (r BenchmarkResult) String() string { |
| 324 | mbs := r.mbPerSec() |
| 325 | mb := "" |
| 326 | if mbs != 0 { |
| 327 | mb = fmt.Sprintf("\t%7.2f MB/s", mbs) |
| 328 | } |
| 329 | nsop := r.NsPerOp() |
| 330 | ns := fmt.Sprintf("%10d ns/op", nsop) |
| 331 | if r.N > 0 && nsop < 100 { |
| 332 | // The format specifiers here make sure that |
| 333 | // the ones digits line up for all three possible formats. |
| 334 | if nsop < 10 { |
| 335 | ns = fmt.Sprintf("%13.2f ns/op", float64(r.T.Nanoseconds())/float64(r.N)) |
| 336 | } else { |
| 337 | ns = fmt.Sprintf("%12.1f ns/op", float64(r.T.Nanoseconds())/float64(r.N)) |
| 338 | } |
| 339 | } |
| 340 | return fmt.Sprintf("%8d\t%s%s", r.N, ns, mb) |
| 341 | } |
| 342 | |
| 343 | func (r BenchmarkResult) MemString() string { |
| 344 | return fmt.Sprintf("%8d B/op\t%8d allocs/op", |
| 345 | r.AllocedBytesPerOp(), r.AllocsPerOp()) |
| 346 | } |
| 347 | |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 348 | // benchmarkName returns full name of benchmark including procs suffix. |
| 349 | func benchmarkName(name string, n int) string { |
| 350 | if n != 1 { |
| 351 | return fmt.Sprintf("%s-%d", name, n) |
| 352 | } |
| 353 | return name |
| 354 | } |
| 355 | |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 356 | type benchContext struct { |
| 357 | match *matcher |
| 358 | |
| 359 | maxLen int // The largest recorded benchmark name. |
| 360 | extLen int // Maximum extension length. |
| 361 | } |
| 362 | |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 363 | // An internal function but exported because it is cross-package; part of the implementation |
| 364 | // of the "go test" command. |
| 365 | func RunBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) { |
Dan Willemsen | bbdf664 | 2017-01-13 22:57:23 -0800 | [diff] [blame] | 366 | runBenchmarks(matchString, benchmarks) |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 367 | } |
| 368 | |
Dan Willemsen | bbdf664 | 2017-01-13 22:57:23 -0800 | [diff] [blame] | 369 | func runBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) bool { |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 370 | // If no flag was specified, don't run benchmarks. |
| 371 | if len(*matchBenchmarks) == 0 { |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 372 | return true |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 373 | } |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 374 | // Collect matching benchmarks and determine longest name. |
| 375 | maxprocs := 1 |
| 376 | for _, procs := range cpuList { |
| 377 | if procs > maxprocs { |
| 378 | maxprocs = procs |
| 379 | } |
| 380 | } |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 381 | ctx := &benchContext{ |
| 382 | match: newMatcher(matchString, *matchBenchmarks, "-test.bench"), |
| 383 | extLen: len(benchmarkName("", maxprocs)), |
| 384 | } |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 385 | var bs []InternalBenchmark |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 386 | for _, Benchmark := range benchmarks { |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 387 | if _, matched := ctx.match.fullName(nil, Benchmark.Name); matched { |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 388 | bs = append(bs, Benchmark) |
| 389 | benchName := benchmarkName(Benchmark.Name, maxprocs) |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 390 | if l := len(benchName) + ctx.extLen + 1; l > ctx.maxLen { |
| 391 | ctx.maxLen = l |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 392 | } |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 393 | } |
Dan Willemsen | 6ff2325 | 2015-09-15 13:49:18 -0700 | [diff] [blame] | 394 | } |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 395 | main := &B{ |
| 396 | common: common{ |
| 397 | name: "Main", |
| 398 | w: os.Stdout, |
| 399 | chatty: *chatty, |
| 400 | }, |
| 401 | benchFunc: func(b *B) { |
| 402 | for _, Benchmark := range bs { |
| 403 | b.Run(Benchmark.Name, Benchmark.F) |
| 404 | } |
| 405 | }, |
| 406 | benchTime: *benchTime, |
| 407 | context: ctx, |
| 408 | } |
| 409 | main.runN(1) |
| 410 | return !main.failed |
| 411 | } |
| 412 | |
| 413 | // processBench runs bench b for the configured CPU counts and prints the results. |
| 414 | func (ctx *benchContext) processBench(b *B) { |
| 415 | for i, procs := range cpuList { |
| 416 | runtime.GOMAXPROCS(procs) |
| 417 | benchName := benchmarkName(b.name, procs) |
| 418 | fmt.Fprintf(b.w, "%-*s\t", ctx.maxLen, benchName) |
| 419 | // Recompute the running time for all but the first iteration. |
| 420 | if i > 0 { |
| 421 | b = &B{ |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 422 | common: common{ |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 423 | signal: make(chan bool), |
| 424 | name: b.name, |
| 425 | w: b.w, |
| 426 | chatty: b.chatty, |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 427 | }, |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 428 | benchFunc: b.benchFunc, |
| 429 | benchTime: b.benchTime, |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 430 | } |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 431 | b.run1() |
| 432 | } |
| 433 | r := b.doBench() |
| 434 | if b.failed { |
| 435 | // The output could be very long here, but probably isn't. |
| 436 | // We print it all, regardless, because we don't want to trim the reason |
| 437 | // the benchmark failed. |
| 438 | fmt.Fprintf(b.w, "--- FAIL: %s\n%s", benchName, b.output) |
| 439 | continue |
| 440 | } |
| 441 | results := r.String() |
| 442 | if *benchmarkMemory || b.showAllocResult { |
| 443 | results += "\t" + r.MemString() |
| 444 | } |
| 445 | fmt.Fprintln(b.w, results) |
| 446 | // Unlike with tests, we ignore the -chatty flag and always print output for |
| 447 | // benchmarks since the output generation time will skew the results. |
| 448 | if len(b.output) > 0 { |
| 449 | b.trimOutput() |
| 450 | fmt.Fprintf(b.w, "--- BENCH: %s\n%s", benchName, b.output) |
| 451 | } |
| 452 | if p := runtime.GOMAXPROCS(-1); p != procs { |
| 453 | fmt.Fprintf(os.Stderr, "testing: %s left GOMAXPROCS set to %d\n", benchName, p) |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 454 | } |
| 455 | } |
| 456 | } |
| 457 | |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 458 | // Run benchmarks f as a subbenchmark with the given name. It reports |
| 459 | // whether there were any failures. |
| 460 | // |
| 461 | // A subbenchmark is like any other benchmark. A benchmark that calls Run at |
| 462 | // least once will not be measured itself and will be called once with N=1. |
Dan Willemsen | 1d8b74d | 2017-01-19 15:06:31 -0800 | [diff] [blame^] | 463 | // |
| 464 | // Run may be called simultaneously from multiple goroutines, but all such |
| 465 | // calls must happen before the outer benchmark function for b returns. |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 466 | func (b *B) Run(name string, f func(b *B)) bool { |
| 467 | // Since b has subbenchmarks, we will no longer run it as a benchmark itself. |
| 468 | // Release the lock and acquire it on exit to ensure locks stay paired. |
Dan Willemsen | 1d8b74d | 2017-01-19 15:06:31 -0800 | [diff] [blame^] | 469 | atomic.StoreInt32(&b.hasSub, 1) |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 470 | benchmarkLock.Unlock() |
| 471 | defer benchmarkLock.Lock() |
| 472 | |
| 473 | benchName, ok := b.name, true |
| 474 | if b.context != nil { |
| 475 | benchName, ok = b.context.match.fullName(&b.common, name) |
| 476 | } |
| 477 | if !ok { |
| 478 | return true |
| 479 | } |
| 480 | sub := &B{ |
| 481 | common: common{ |
| 482 | signal: make(chan bool), |
| 483 | name: benchName, |
| 484 | parent: &b.common, |
| 485 | level: b.level + 1, |
| 486 | w: b.w, |
| 487 | chatty: b.chatty, |
| 488 | }, |
| 489 | benchFunc: f, |
| 490 | benchTime: b.benchTime, |
| 491 | context: b.context, |
| 492 | } |
| 493 | if sub.run1() { |
| 494 | sub.run() |
| 495 | } |
| 496 | b.add(sub.result) |
| 497 | return !sub.failed |
| 498 | } |
| 499 | |
| 500 | // add simulates running benchmarks in sequence in a single iteration. It is |
| 501 | // used to give some meaningful results in case func Benchmark is used in |
| 502 | // combination with Run. |
| 503 | func (b *B) add(other BenchmarkResult) { |
| 504 | r := &b.result |
| 505 | // The aggregated BenchmarkResults resemble running all subbenchmarks as |
| 506 | // in sequence in a single benchmark. |
| 507 | r.N = 1 |
| 508 | r.T += time.Duration(other.NsPerOp()) |
| 509 | if other.Bytes == 0 { |
| 510 | // Summing Bytes is meaningless in aggregate if not all subbenchmarks |
| 511 | // set it. |
| 512 | b.missingBytes = true |
| 513 | r.Bytes = 0 |
| 514 | } |
| 515 | if !b.missingBytes { |
| 516 | r.Bytes += other.Bytes |
| 517 | } |
| 518 | r.MemAllocs += uint64(other.AllocsPerOp()) |
| 519 | r.MemBytes += uint64(other.AllocedBytesPerOp()) |
| 520 | } |
| 521 | |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 522 | // trimOutput shortens the output from a benchmark, which can be very long. |
| 523 | func (b *B) trimOutput() { |
| 524 | // The output is likely to appear multiple times because the benchmark |
| 525 | // is run multiple times, but at least it will be seen. This is not a big deal |
| 526 | // because benchmarks rarely print, but just in case, we trim it if it's too long. |
| 527 | const maxNewlines = 10 |
| 528 | for nlCount, j := 0, 0; j < len(b.output); j++ { |
| 529 | if b.output[j] == '\n' { |
| 530 | nlCount++ |
| 531 | if nlCount >= maxNewlines { |
| 532 | b.output = append(b.output[:j], "\n\t... [output truncated]\n"...) |
| 533 | break |
| 534 | } |
| 535 | } |
| 536 | } |
| 537 | } |
| 538 | |
| 539 | // A PB is used by RunParallel for running parallel benchmarks. |
| 540 | type PB struct { |
| 541 | globalN *uint64 // shared between all worker goroutines iteration counter |
| 542 | grain uint64 // acquire that many iterations from globalN at once |
| 543 | cache uint64 // local cache of acquired iterations |
| 544 | bN uint64 // total number of iterations to execute (b.N) |
| 545 | } |
| 546 | |
| 547 | // Next reports whether there are more iterations to execute. |
| 548 | func (pb *PB) Next() bool { |
| 549 | if pb.cache == 0 { |
| 550 | n := atomic.AddUint64(pb.globalN, pb.grain) |
| 551 | if n <= pb.bN { |
| 552 | pb.cache = pb.grain |
| 553 | } else if n < pb.bN+pb.grain { |
| 554 | pb.cache = pb.bN + pb.grain - n |
| 555 | } else { |
| 556 | return false |
| 557 | } |
| 558 | } |
| 559 | pb.cache-- |
| 560 | return true |
| 561 | } |
| 562 | |
| 563 | // RunParallel runs a benchmark in parallel. |
| 564 | // It creates multiple goroutines and distributes b.N iterations among them. |
| 565 | // The number of goroutines defaults to GOMAXPROCS. To increase parallelism for |
| 566 | // non-CPU-bound benchmarks, call SetParallelism before RunParallel. |
| 567 | // RunParallel is usually used with the go test -cpu flag. |
| 568 | // |
| 569 | // The body function will be run in each goroutine. It should set up any |
| 570 | // goroutine-local state and then iterate until pb.Next returns false. |
| 571 | // It should not use the StartTimer, StopTimer, or ResetTimer functions, |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 572 | // because they have global effect. It should also not call Run. |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 573 | func (b *B) RunParallel(body func(*PB)) { |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 574 | if b.N == 0 { |
| 575 | return // Nothing to do when probing. |
| 576 | } |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 577 | // Calculate grain size as number of iterations that take ~100µs. |
| 578 | // 100µs is enough to amortize the overhead and provide sufficient |
| 579 | // dynamic load balancing. |
| 580 | grain := uint64(0) |
| 581 | if b.previousN > 0 && b.previousDuration > 0 { |
| 582 | grain = 1e5 * uint64(b.previousN) / uint64(b.previousDuration) |
| 583 | } |
| 584 | if grain < 1 { |
| 585 | grain = 1 |
| 586 | } |
| 587 | // We expect the inner loop and function call to take at least 10ns, |
| 588 | // so do not do more than 100µs/10ns=1e4 iterations. |
| 589 | if grain > 1e4 { |
| 590 | grain = 1e4 |
| 591 | } |
| 592 | |
| 593 | n := uint64(0) |
| 594 | numProcs := b.parallelism * runtime.GOMAXPROCS(0) |
| 595 | var wg sync.WaitGroup |
| 596 | wg.Add(numProcs) |
| 597 | for p := 0; p < numProcs; p++ { |
| 598 | go func() { |
| 599 | defer wg.Done() |
| 600 | pb := &PB{ |
| 601 | globalN: &n, |
| 602 | grain: grain, |
| 603 | bN: uint64(b.N), |
| 604 | } |
| 605 | body(pb) |
| 606 | }() |
| 607 | } |
| 608 | wg.Wait() |
| 609 | if n <= uint64(b.N) && !b.Failed() { |
| 610 | b.Fatal("RunParallel: body exited without pb.Next() == false") |
| 611 | } |
| 612 | } |
| 613 | |
| 614 | // SetParallelism sets the number of goroutines used by RunParallel to p*GOMAXPROCS. |
| 615 | // There is usually no need to call SetParallelism for CPU-bound benchmarks. |
| 616 | // If p is less than 1, this call will have no effect. |
| 617 | func (b *B) SetParallelism(p int) { |
| 618 | if p >= 1 { |
| 619 | b.parallelism = p |
| 620 | } |
| 621 | } |
| 622 | |
| 623 | // Benchmark benchmarks a single function. Useful for creating |
| 624 | // custom benchmarks that do not use the "go test" command. |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 625 | // |
| 626 | // If f calls Run, the result will be an estimate of running all its |
| 627 | // subbenchmarks that don't call Run in sequence in a single benchmark. |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 628 | func Benchmark(f func(b *B)) BenchmarkResult { |
| 629 | b := &B{ |
| 630 | common: common{ |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 631 | signal: make(chan bool), |
| 632 | w: discard{}, |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 633 | }, |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 634 | benchFunc: f, |
| 635 | benchTime: *benchTime, |
| 636 | } |
| 637 | if !b.run1() { |
| 638 | return BenchmarkResult{} |
Colin Cross | 7bb052a | 2015-02-03 12:59:37 -0800 | [diff] [blame] | 639 | } |
| 640 | return b.run() |
| 641 | } |
Dan Willemsen | 0c15709 | 2016-07-08 13:57:52 -0700 | [diff] [blame] | 642 | |
| 643 | type discard struct{} |
| 644 | |
| 645 | func (discard) Write(b []byte) (n int, err error) { return len(b), nil } |