Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 1 | /* |
Nick Terrell | ac58c8d | 2020-03-26 15:19:05 -0700 | [diff] [blame] | 2 | * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 3 | * All rights reserved. |
| 4 | * |
| 5 | * This source code is licensed under both the BSD-style license (found in the |
| 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
| 7 | * in the COPYING file in the root directory of this source tree). |
| 8 | * You may select, at your option, one of the above-listed licenses. |
| 9 | */ |
| 10 | |
| 11 | |
| 12 | |
| 13 | /* ************************************* |
| 14 | * Includes |
| 15 | ***************************************/ |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 16 | #include <stdlib.h> /* malloc, free */ |
| 17 | #include <string.h> /* memset */ |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 18 | #include <assert.h> /* assert */ |
| 19 | |
Yann Collet | 59a7116 | 2019-04-10 12:37:03 -0700 | [diff] [blame] | 20 | #include "timefn.h" /* UTIL_time_t, UTIL_getTime */ |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 21 | #include "benchfn.h" |
| 22 | |
| 23 | |
| 24 | /* ************************************* |
| 25 | * Constants |
| 26 | ***************************************/ |
Yann Collet | 59a7116 | 2019-04-10 12:37:03 -0700 | [diff] [blame] | 27 | #define TIMELOOP_MICROSEC SEC_TO_MICRO /* 1 second */ |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 28 | #define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */ |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 29 | |
| 30 | #define KB *(1 <<10) |
| 31 | #define MB *(1 <<20) |
| 32 | #define GB *(1U<<30) |
| 33 | |
| 34 | |
| 35 | /* ************************************* |
Yann Collet | 59a7116 | 2019-04-10 12:37:03 -0700 | [diff] [blame] | 36 | * Debug errors |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 37 | ***************************************/ |
Yann Collet | 59a7116 | 2019-04-10 12:37:03 -0700 | [diff] [blame] | 38 | #if defined(DEBUG) && (DEBUG >= 1) |
| 39 | # include <stdio.h> /* fprintf */ |
| 40 | # define DISPLAY(...) fprintf(stderr, __VA_ARGS__) |
| 41 | # define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); } |
| 42 | #else |
| 43 | # define DEBUGOUTPUT(...) |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 44 | #endif |
| 45 | |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 46 | |
| 47 | /* error without displaying */ |
Yann Collet | 9867cdb | 2018-11-13 12:01:17 -0800 | [diff] [blame] | 48 | #define RETURN_QUIET_ERROR(retValue, ...) { \ |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 49 | DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ |
Yann Collet | 9867cdb | 2018-11-13 12:01:17 -0800 | [diff] [blame] | 50 | DEBUGOUTPUT("Error : "); \ |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 51 | DEBUGOUTPUT(__VA_ARGS__); \ |
| 52 | DEBUGOUTPUT(" \n"); \ |
| 53 | return retValue; \ |
| 54 | } |
| 55 | |
Yann Collet | 944e2e9 | 2019-06-21 15:58:55 -0700 | [diff] [blame] | 56 | /* Abort execution if a condition is not met */ |
| 57 | #define CONTROL(c) { if (!(c)) { DEBUGOUTPUT("error: %s \n", #c); abort(); } } |
| 58 | |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 59 | |
| 60 | /* ************************************* |
| 61 | * Benchmarking an arbitrary function |
| 62 | ***************************************/ |
| 63 | |
| 64 | int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome) |
| 65 | { |
Yann Collet | 9867cdb | 2018-11-13 12:01:17 -0800 | [diff] [blame] | 66 | return outcome.error_tag_never_ever_use_directly == 0; |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 67 | } |
| 68 | |
| 69 | /* warning : this function will stop program execution if outcome is invalid ! |
| 70 | * check outcome validity first, using BMK_isValid_runResult() */ |
| 71 | BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome) |
| 72 | { |
Yann Collet | 944e2e9 | 2019-06-21 15:58:55 -0700 | [diff] [blame] | 73 | CONTROL(outcome.error_tag_never_ever_use_directly == 0); |
Yann Collet | 9867cdb | 2018-11-13 12:01:17 -0800 | [diff] [blame] | 74 | return outcome.internal_never_ever_use_directly; |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 75 | } |
| 76 | |
Yann Collet | 9867cdb | 2018-11-13 12:01:17 -0800 | [diff] [blame] | 77 | size_t BMK_extract_errorResult(BMK_runOutcome_t outcome) |
| 78 | { |
Yann Collet | 944e2e9 | 2019-06-21 15:58:55 -0700 | [diff] [blame] | 79 | CONTROL(outcome.error_tag_never_ever_use_directly != 0); |
Yann Collet | 9867cdb | 2018-11-13 12:01:17 -0800 | [diff] [blame] | 80 | return outcome.error_result_never_ever_use_directly; |
| 81 | } |
| 82 | |
| 83 | static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult) |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 84 | { |
| 85 | BMK_runOutcome_t b; |
| 86 | memset(&b, 0, sizeof(b)); |
Yann Collet | 9867cdb | 2018-11-13 12:01:17 -0800 | [diff] [blame] | 87 | b.error_tag_never_ever_use_directly = 1; |
| 88 | b.error_result_never_ever_use_directly = errorResult; |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 89 | return b; |
| 90 | } |
| 91 | |
| 92 | static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime) |
| 93 | { |
| 94 | BMK_runOutcome_t outcome; |
Yann Collet | 9867cdb | 2018-11-13 12:01:17 -0800 | [diff] [blame] | 95 | outcome.error_tag_never_ever_use_directly = 0; |
| 96 | outcome.internal_never_ever_use_directly = runTime; |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 97 | return outcome; |
| 98 | } |
| 99 | |
| 100 | |
| 101 | /* initFn will be measured once, benchFn will be measured `nbLoops` times */ |
| 102 | /* initFn is optional, provide NULL if none */ |
Yann Collet | 9867cdb | 2018-11-13 12:01:17 -0800 | [diff] [blame] | 103 | /* benchFn must return a size_t value that errorFn can interpret */ |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 104 | /* takes # of blocks and list of size & stuff for each. */ |
| 105 | /* can report result of benchFn for each block into blockResult. */ |
| 106 | /* blockResult is optional, provide NULL if this information is not required */ |
Yann Collet | 9867cdb | 2018-11-13 12:01:17 -0800 | [diff] [blame] | 107 | /* note : time per loop can be reported as zero if run time < timer resolution */ |
Yann Collet | b830ccc | 2018-11-13 13:05:39 -0800 | [diff] [blame] | 108 | BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p, |
| 109 | unsigned nbLoops) |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 110 | { |
| 111 | size_t dstSize = 0; |
Yann Collet | 9867cdb | 2018-11-13 12:01:17 -0800 | [diff] [blame] | 112 | nbLoops += !nbLoops; /* minimum nbLoops is 1 */ |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 113 | |
| 114 | /* init */ |
| 115 | { size_t i; |
Yann Collet | b830ccc | 2018-11-13 13:05:39 -0800 | [diff] [blame] | 116 | for(i = 0; i < p.blockCount; i++) { |
| 117 | memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]); /* warm up and erase result buffer */ |
Yann Collet | 59a7116 | 2019-04-10 12:37:03 -0700 | [diff] [blame] | 118 | } } |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 119 | |
| 120 | /* benchmark */ |
| 121 | { UTIL_time_t const clockStart = UTIL_getTime(); |
| 122 | unsigned loopNb, blockNb; |
Yann Collet | b830ccc | 2018-11-13 13:05:39 -0800 | [diff] [blame] | 123 | if (p.initFn != NULL) p.initFn(p.initPayload); |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 124 | for (loopNb = 0; loopNb < nbLoops; loopNb++) { |
Yann Collet | b830ccc | 2018-11-13 13:05:39 -0800 | [diff] [blame] | 125 | for (blockNb = 0; blockNb < p.blockCount; blockNb++) { |
| 126 | size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb], |
| 127 | p.dstBuffers[blockNb], p.dstCapacities[blockNb], |
| 128 | p.benchPayload); |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 129 | if (loopNb == 0) { |
Yann Collet | b830ccc | 2018-11-13 13:05:39 -0800 | [diff] [blame] | 130 | if (p.blockResults != NULL) p.blockResults[blockNb] = res; |
| 131 | if ((p.errorFn != NULL) && (p.errorFn(res))) { |
Yann Collet | 9867cdb | 2018-11-13 12:01:17 -0800 | [diff] [blame] | 132 | RETURN_QUIET_ERROR(BMK_runOutcome_error(res), |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 133 | "Function benchmark failed on block %u (of size %u) with error %i", |
Yann Collet | ededcfc | 2018-12-21 16:19:44 -0800 | [diff] [blame] | 134 | blockNb, (unsigned)p.srcSizes[blockNb], (int)res); |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 135 | } |
| 136 | dstSize += res; |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 137 | } } |
| 138 | } /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */ |
| 139 | |
Yann Collet | 59a7116 | 2019-04-10 12:37:03 -0700 | [diff] [blame] | 140 | { PTime const totalTime = UTIL_clockSpanNano(clockStart); |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 141 | BMK_runTime_t rt; |
Yann Collet | f75ad2e | 2019-01-25 14:22:25 -0800 | [diff] [blame] | 142 | rt.nanoSecPerRun = (double)totalTime / nbLoops; |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 143 | rt.sumOfReturn = dstSize; |
| 144 | return BMK_setValid_runTime(rt); |
| 145 | } } |
| 146 | } |
| 147 | |
| 148 | |
| 149 | /* ==== Benchmarking any function, providing intermediate results ==== */ |
| 150 | |
| 151 | struct BMK_timedFnState_s { |
Yann Collet | 59a7116 | 2019-04-10 12:37:03 -0700 | [diff] [blame] | 152 | PTime timeSpent_ns; |
| 153 | PTime timeBudget_ns; |
| 154 | PTime runBudget_ns; |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 155 | BMK_runTime_t fastestRun; |
| 156 | unsigned nbLoops; |
| 157 | UTIL_time_t coolTime; |
| 158 | }; /* typedef'd to BMK_timedFnState_t within bench.h */ |
| 159 | |
| 160 | BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms) |
| 161 | { |
| 162 | BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r)); |
| 163 | if (r == NULL) return NULL; /* malloc() error */ |
| 164 | BMK_resetTimedFnState(r, total_ms, run_ms); |
| 165 | return r; |
| 166 | } |
| 167 | |
Yann Collet | 526ec64 | 2019-04-10 16:05:02 -0700 | [diff] [blame] | 168 | void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); } |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 169 | |
Yann Collet | 9703a59 | 2019-04-10 15:54:55 -0700 | [diff] [blame] | 170 | BMK_timedFnState_t* |
| 171 | BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms) |
Yann Collet | f75ad2e | 2019-01-25 14:22:25 -0800 | [diff] [blame] | 172 | { |
Yann Collet | 526ec64 | 2019-04-10 16:05:02 -0700 | [diff] [blame] | 173 | typedef char check_size[ 2 * (sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s)) - 1]; /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */ |
| 174 | typedef struct { check_size c; BMK_timedFnState_t tfs; } tfs_align; /* force tfs to be aligned at its next best position */ |
| 175 | size_t const tfs_alignment = offsetof(tfs_align, tfs); /* provides the minimal alignment restriction for BMK_timedFnState_t */ |
Yann Collet | f75ad2e | 2019-01-25 14:22:25 -0800 | [diff] [blame] | 176 | BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer; |
Yann Collet | 526ec64 | 2019-04-10 16:05:02 -0700 | [diff] [blame] | 177 | if (buffer == NULL) return NULL; |
Yann Collet | f75ad2e | 2019-01-25 14:22:25 -0800 | [diff] [blame] | 178 | if (size < sizeof(struct BMK_timedFnState_s)) return NULL; |
Yann Collet | 526ec64 | 2019-04-10 16:05:02 -0700 | [diff] [blame] | 179 | if ((size_t)buffer % tfs_alignment) return NULL; /* buffer must be properly aligned */ |
Yann Collet | f75ad2e | 2019-01-25 14:22:25 -0800 | [diff] [blame] | 180 | BMK_resetTimedFnState(r, total_ms, run_ms); |
| 181 | return r; |
| 182 | } |
| 183 | |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 184 | void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms) |
| 185 | { |
| 186 | if (!total_ms) total_ms = 1 ; |
| 187 | if (!run_ms) run_ms = 1; |
| 188 | if (run_ms > total_ms) run_ms = total_ms; |
| 189 | timedFnState->timeSpent_ns = 0; |
Yann Collet | 59a7116 | 2019-04-10 12:37:03 -0700 | [diff] [blame] | 190 | timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000; |
| 191 | timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000; |
Yann Collet | f75ad2e | 2019-01-25 14:22:25 -0800 | [diff] [blame] | 192 | timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000; /* hopefully large enough : must be larger than any potential measurement */ |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 193 | timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL); |
| 194 | timedFnState->nbLoops = 1; |
| 195 | timedFnState->coolTime = UTIL_getTime(); |
| 196 | } |
| 197 | |
| 198 | /* Tells if nb of seconds set in timedFnState for all runs is spent. |
| 199 | * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */ |
| 200 | int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState) |
| 201 | { |
| 202 | return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns); |
| 203 | } |
| 204 | |
| 205 | |
| 206 | #undef MIN |
| 207 | #define MIN(a,b) ( (a) < (b) ? (a) : (b) ) |
| 208 | |
| 209 | #define MINUSABLETIME (TIMELOOP_NANOSEC / 2) /* 0.5 seconds */ |
| 210 | |
Yann Collet | b830ccc | 2018-11-13 13:05:39 -0800 | [diff] [blame] | 211 | BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont, |
| 212 | BMK_benchParams_t p) |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 213 | { |
Yann Collet | 59a7116 | 2019-04-10 12:37:03 -0700 | [diff] [blame] | 214 | PTime const runBudget_ns = cont->runBudget_ns; |
| 215 | PTime const runTimeMin_ns = runBudget_ns / 2; |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 216 | int completed = 0; |
| 217 | BMK_runTime_t bestRunTime = cont->fastestRun; |
| 218 | |
| 219 | while (!completed) { |
Yann Collet | 59a7116 | 2019-04-10 12:37:03 -0700 | [diff] [blame] | 220 | BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops); |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 221 | |
| 222 | if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */ |
Yann Collet | 9867cdb | 2018-11-13 12:01:17 -0800 | [diff] [blame] | 223 | return runResult; |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 224 | } |
| 225 | |
| 226 | { BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult); |
Yann Collet | f75ad2e | 2019-01-25 14:22:25 -0800 | [diff] [blame] | 227 | double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops; |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 228 | |
Yann Collet | f75ad2e | 2019-01-25 14:22:25 -0800 | [diff] [blame] | 229 | cont->timeSpent_ns += (unsigned long long)loopDuration_ns; |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 230 | |
| 231 | /* estimate nbLoops for next run to last approximately 1 second */ |
| 232 | if (loopDuration_ns > (runBudget_ns / 50)) { |
Yann Collet | f75ad2e | 2019-01-25 14:22:25 -0800 | [diff] [blame] | 233 | double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun); |
Yann Collet | 59a7116 | 2019-04-10 12:37:03 -0700 | [diff] [blame] | 234 | cont->nbLoops = (unsigned)(runBudget_ns / fastestRun_ns) + 1; |
Yann Collet | d38063f | 2018-11-13 11:01:59 -0800 | [diff] [blame] | 235 | } else { |
| 236 | /* previous run was too short : blindly increase workload by x multiplier */ |
| 237 | const unsigned multiplier = 10; |
| 238 | assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */ |
| 239 | cont->nbLoops *= multiplier; |
| 240 | } |
| 241 | |
| 242 | if(loopDuration_ns < runTimeMin_ns) { |
| 243 | /* don't report results for which benchmark run time was too small : increased risks of rounding errors */ |
| 244 | assert(completed == 0); |
| 245 | continue; |
| 246 | } else { |
| 247 | if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) { |
| 248 | bestRunTime = newRunTime; |
| 249 | } |
| 250 | completed = 1; |
| 251 | } |
| 252 | } |
| 253 | } /* while (!completed) */ |
| 254 | |
| 255 | return BMK_setValid_runTime(bestRunTime); |
| 256 | } |