blob: ed7273afb6e50c6742f10af41b1d7c907cbbcca7 [file] [log] [blame]
Yann Colletd38063f2018-11-13 11:01:59 -08001/*
Nick Terrellac58c8d2020-03-26 15:19:05 -07002 * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
Yann Colletd38063f2018-11-13 11:01:59 -08003 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11
12
13/* *************************************
14* Includes
15***************************************/
Yann Colletd38063f2018-11-13 11:01:59 -080016#include <stdlib.h> /* malloc, free */
17#include <string.h> /* memset */
Yann Colletd38063f2018-11-13 11:01:59 -080018#include <assert.h> /* assert */
19
Yann Collet59a71162019-04-10 12:37:03 -070020#include "timefn.h" /* UTIL_time_t, UTIL_getTime */
Yann Colletd38063f2018-11-13 11:01:59 -080021#include "benchfn.h"
22
23
24/* *************************************
25* Constants
26***************************************/
Yann Collet59a71162019-04-10 12:37:03 -070027#define TIMELOOP_MICROSEC SEC_TO_MICRO /* 1 second */
Yann Colletd38063f2018-11-13 11:01:59 -080028#define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */
Yann Colletd38063f2018-11-13 11:01:59 -080029
30#define KB *(1 <<10)
31#define MB *(1 <<20)
32#define GB *(1U<<30)
33
34
35/* *************************************
Yann Collet59a71162019-04-10 12:37:03 -070036* Debug errors
Yann Colletd38063f2018-11-13 11:01:59 -080037***************************************/
Yann Collet59a71162019-04-10 12:37:03 -070038#if defined(DEBUG) && (DEBUG >= 1)
39# include <stdio.h> /* fprintf */
40# define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
41# define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
42#else
43# define DEBUGOUTPUT(...)
Yann Colletd38063f2018-11-13 11:01:59 -080044#endif
45
Yann Colletd38063f2018-11-13 11:01:59 -080046
47/* error without displaying */
Yann Collet9867cdb2018-11-13 12:01:17 -080048#define RETURN_QUIET_ERROR(retValue, ...) { \
Yann Colletd38063f2018-11-13 11:01:59 -080049 DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
Yann Collet9867cdb2018-11-13 12:01:17 -080050 DEBUGOUTPUT("Error : "); \
Yann Colletd38063f2018-11-13 11:01:59 -080051 DEBUGOUTPUT(__VA_ARGS__); \
52 DEBUGOUTPUT(" \n"); \
53 return retValue; \
54}
55
Yann Collet944e2e92019-06-21 15:58:55 -070056/* Abort execution if a condition is not met */
57#define CONTROL(c) { if (!(c)) { DEBUGOUTPUT("error: %s \n", #c); abort(); } }
58
Yann Colletd38063f2018-11-13 11:01:59 -080059
60/* *************************************
61* Benchmarking an arbitrary function
62***************************************/
63
64int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)
65{
Yann Collet9867cdb2018-11-13 12:01:17 -080066 return outcome.error_tag_never_ever_use_directly == 0;
Yann Colletd38063f2018-11-13 11:01:59 -080067}
68
69/* warning : this function will stop program execution if outcome is invalid !
70 * check outcome validity first, using BMK_isValid_runResult() */
71BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
72{
Yann Collet944e2e92019-06-21 15:58:55 -070073 CONTROL(outcome.error_tag_never_ever_use_directly == 0);
Yann Collet9867cdb2018-11-13 12:01:17 -080074 return outcome.internal_never_ever_use_directly;
Yann Colletd38063f2018-11-13 11:01:59 -080075}
76
Yann Collet9867cdb2018-11-13 12:01:17 -080077size_t BMK_extract_errorResult(BMK_runOutcome_t outcome)
78{
Yann Collet944e2e92019-06-21 15:58:55 -070079 CONTROL(outcome.error_tag_never_ever_use_directly != 0);
Yann Collet9867cdb2018-11-13 12:01:17 -080080 return outcome.error_result_never_ever_use_directly;
81}
82
83static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult)
Yann Colletd38063f2018-11-13 11:01:59 -080084{
85 BMK_runOutcome_t b;
86 memset(&b, 0, sizeof(b));
Yann Collet9867cdb2018-11-13 12:01:17 -080087 b.error_tag_never_ever_use_directly = 1;
88 b.error_result_never_ever_use_directly = errorResult;
Yann Colletd38063f2018-11-13 11:01:59 -080089 return b;
90}
91
92static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime)
93{
94 BMK_runOutcome_t outcome;
Yann Collet9867cdb2018-11-13 12:01:17 -080095 outcome.error_tag_never_ever_use_directly = 0;
96 outcome.internal_never_ever_use_directly = runTime;
Yann Colletd38063f2018-11-13 11:01:59 -080097 return outcome;
98}
99
100
101/* initFn will be measured once, benchFn will be measured `nbLoops` times */
102/* initFn is optional, provide NULL if none */
Yann Collet9867cdb2018-11-13 12:01:17 -0800103/* benchFn must return a size_t value that errorFn can interpret */
Yann Colletd38063f2018-11-13 11:01:59 -0800104/* takes # of blocks and list of size & stuff for each. */
105/* can report result of benchFn for each block into blockResult. */
106/* blockResult is optional, provide NULL if this information is not required */
Yann Collet9867cdb2018-11-13 12:01:17 -0800107/* note : time per loop can be reported as zero if run time < timer resolution */
Yann Colletb830ccc2018-11-13 13:05:39 -0800108BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
109 unsigned nbLoops)
Yann Colletd38063f2018-11-13 11:01:59 -0800110{
111 size_t dstSize = 0;
Yann Collet9867cdb2018-11-13 12:01:17 -0800112 nbLoops += !nbLoops; /* minimum nbLoops is 1 */
Yann Colletd38063f2018-11-13 11:01:59 -0800113
114 /* init */
115 { size_t i;
Yann Colletb830ccc2018-11-13 13:05:39 -0800116 for(i = 0; i < p.blockCount; i++) {
117 memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]); /* warm up and erase result buffer */
Yann Collet59a71162019-04-10 12:37:03 -0700118 } }
Yann Colletd38063f2018-11-13 11:01:59 -0800119
120 /* benchmark */
121 { UTIL_time_t const clockStart = UTIL_getTime();
122 unsigned loopNb, blockNb;
Yann Colletb830ccc2018-11-13 13:05:39 -0800123 if (p.initFn != NULL) p.initFn(p.initPayload);
Yann Colletd38063f2018-11-13 11:01:59 -0800124 for (loopNb = 0; loopNb < nbLoops; loopNb++) {
Yann Colletb830ccc2018-11-13 13:05:39 -0800125 for (blockNb = 0; blockNb < p.blockCount; blockNb++) {
126 size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb],
127 p.dstBuffers[blockNb], p.dstCapacities[blockNb],
128 p.benchPayload);
Yann Colletd38063f2018-11-13 11:01:59 -0800129 if (loopNb == 0) {
Yann Colletb830ccc2018-11-13 13:05:39 -0800130 if (p.blockResults != NULL) p.blockResults[blockNb] = res;
131 if ((p.errorFn != NULL) && (p.errorFn(res))) {
Yann Collet9867cdb2018-11-13 12:01:17 -0800132 RETURN_QUIET_ERROR(BMK_runOutcome_error(res),
Yann Colletd38063f2018-11-13 11:01:59 -0800133 "Function benchmark failed on block %u (of size %u) with error %i",
Yann Colletededcfc2018-12-21 16:19:44 -0800134 blockNb, (unsigned)p.srcSizes[blockNb], (int)res);
Yann Colletd38063f2018-11-13 11:01:59 -0800135 }
136 dstSize += res;
Yann Colletd38063f2018-11-13 11:01:59 -0800137 } }
138 } /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */
139
Yann Collet59a71162019-04-10 12:37:03 -0700140 { PTime const totalTime = UTIL_clockSpanNano(clockStart);
Yann Colletd38063f2018-11-13 11:01:59 -0800141 BMK_runTime_t rt;
Yann Colletf75ad2e2019-01-25 14:22:25 -0800142 rt.nanoSecPerRun = (double)totalTime / nbLoops;
Yann Colletd38063f2018-11-13 11:01:59 -0800143 rt.sumOfReturn = dstSize;
144 return BMK_setValid_runTime(rt);
145 } }
146}
147
148
149/* ==== Benchmarking any function, providing intermediate results ==== */
150
151struct BMK_timedFnState_s {
Yann Collet59a71162019-04-10 12:37:03 -0700152 PTime timeSpent_ns;
153 PTime timeBudget_ns;
154 PTime runBudget_ns;
Yann Colletd38063f2018-11-13 11:01:59 -0800155 BMK_runTime_t fastestRun;
156 unsigned nbLoops;
157 UTIL_time_t coolTime;
158}; /* typedef'd to BMK_timedFnState_t within bench.h */
159
160BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
161{
162 BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r));
163 if (r == NULL) return NULL; /* malloc() error */
164 BMK_resetTimedFnState(r, total_ms, run_ms);
165 return r;
166}
167
Yann Collet526ec642019-04-10 16:05:02 -0700168void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); }
Yann Colletd38063f2018-11-13 11:01:59 -0800169
Yann Collet9703a592019-04-10 15:54:55 -0700170BMK_timedFnState_t*
171BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms)
Yann Colletf75ad2e2019-01-25 14:22:25 -0800172{
Yann Collet526ec642019-04-10 16:05:02 -0700173 typedef char check_size[ 2 * (sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s)) - 1]; /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */
174 typedef struct { check_size c; BMK_timedFnState_t tfs; } tfs_align; /* force tfs to be aligned at its next best position */
175 size_t const tfs_alignment = offsetof(tfs_align, tfs); /* provides the minimal alignment restriction for BMK_timedFnState_t */
Yann Colletf75ad2e2019-01-25 14:22:25 -0800176 BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer;
Yann Collet526ec642019-04-10 16:05:02 -0700177 if (buffer == NULL) return NULL;
Yann Colletf75ad2e2019-01-25 14:22:25 -0800178 if (size < sizeof(struct BMK_timedFnState_s)) return NULL;
Yann Collet526ec642019-04-10 16:05:02 -0700179 if ((size_t)buffer % tfs_alignment) return NULL; /* buffer must be properly aligned */
Yann Colletf75ad2e2019-01-25 14:22:25 -0800180 BMK_resetTimedFnState(r, total_ms, run_ms);
181 return r;
182}
183
Yann Colletd38063f2018-11-13 11:01:59 -0800184void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
185{
186 if (!total_ms) total_ms = 1 ;
187 if (!run_ms) run_ms = 1;
188 if (run_ms > total_ms) run_ms = total_ms;
189 timedFnState->timeSpent_ns = 0;
Yann Collet59a71162019-04-10 12:37:03 -0700190 timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000;
191 timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000;
Yann Colletf75ad2e2019-01-25 14:22:25 -0800192 timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000; /* hopefully large enough : must be larger than any potential measurement */
Yann Colletd38063f2018-11-13 11:01:59 -0800193 timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
194 timedFnState->nbLoops = 1;
195 timedFnState->coolTime = UTIL_getTime();
196}
197
198/* Tells if nb of seconds set in timedFnState for all runs is spent.
199 * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */
200int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)
201{
202 return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns);
203}
204
205
206#undef MIN
207#define MIN(a,b) ( (a) < (b) ? (a) : (b) )
208
209#define MINUSABLETIME (TIMELOOP_NANOSEC / 2) /* 0.5 seconds */
210
Yann Colletb830ccc2018-11-13 13:05:39 -0800211BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont,
212 BMK_benchParams_t p)
Yann Colletd38063f2018-11-13 11:01:59 -0800213{
Yann Collet59a71162019-04-10 12:37:03 -0700214 PTime const runBudget_ns = cont->runBudget_ns;
215 PTime const runTimeMin_ns = runBudget_ns / 2;
Yann Colletd38063f2018-11-13 11:01:59 -0800216 int completed = 0;
217 BMK_runTime_t bestRunTime = cont->fastestRun;
218
219 while (!completed) {
Yann Collet59a71162019-04-10 12:37:03 -0700220 BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops);
Yann Colletd38063f2018-11-13 11:01:59 -0800221
222 if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */
Yann Collet9867cdb2018-11-13 12:01:17 -0800223 return runResult;
Yann Colletd38063f2018-11-13 11:01:59 -0800224 }
225
226 { BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);
Yann Colletf75ad2e2019-01-25 14:22:25 -0800227 double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;
Yann Colletd38063f2018-11-13 11:01:59 -0800228
Yann Colletf75ad2e2019-01-25 14:22:25 -0800229 cont->timeSpent_ns += (unsigned long long)loopDuration_ns;
Yann Colletd38063f2018-11-13 11:01:59 -0800230
231 /* estimate nbLoops for next run to last approximately 1 second */
232 if (loopDuration_ns > (runBudget_ns / 50)) {
Yann Colletf75ad2e2019-01-25 14:22:25 -0800233 double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
Yann Collet59a71162019-04-10 12:37:03 -0700234 cont->nbLoops = (unsigned)(runBudget_ns / fastestRun_ns) + 1;
Yann Colletd38063f2018-11-13 11:01:59 -0800235 } else {
236 /* previous run was too short : blindly increase workload by x multiplier */
237 const unsigned multiplier = 10;
238 assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */
239 cont->nbLoops *= multiplier;
240 }
241
242 if(loopDuration_ns < runTimeMin_ns) {
243 /* don't report results for which benchmark run time was too small : increased risks of rounding errors */
244 assert(completed == 0);
245 continue;
246 } else {
247 if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) {
248 bestRunTime = newRunTime;
249 }
250 completed = 1;
251 }
252 }
253 } /* while (!completed) */
254
255 return BMK_setValid_runTime(bestRunTime);
256}