programs/benchfn.c - platform/external/zstd - Gitiles

 /*
  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  * in the COPYING file in the root directory of this source tree).
  * You may select, at your option, one of the above-listed licenses.
  */


 /* *************************************
 *  Includes
 ***************************************/
 #include <stdlib.h>      /* malloc, free */
 #include <string.h>      /* memset */
 #include <assert.h>      /* assert */

 #include "timefn.h"        /* UTIL_time_t, UTIL_getTime */
 #include "benchfn.h"


 /* *************************************
 *  Constants
 ***************************************/
 #define TIMELOOP_MICROSEC     SEC_TO_MICRO      /* 1 second */
 #define TIMELOOP_NANOSEC      (1*1000000000ULL) /* 1 second */

 #define KB *(1 <<10)
 #define MB *(1 <<20)
 #define GB *(1U<<30)


 /* *************************************
 *  Debug errors
 ***************************************/
 #if defined(DEBUG) && (DEBUG >= 1)
 #  include <stdio.h>       /* fprintf */
 #  define DISPLAY(...)       fprintf(stderr, __VA_ARGS__)
 #  define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
 #else
 #  define DEBUGOUTPUT(...)
 #endif


 /* error without displaying */
 #define RETURN_QUIET_ERROR(retValue, ...) {           \
     DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__);    \
     DEBUGOUTPUT("Error : ");                          \
     DEBUGOUTPUT(__VA_ARGS__);                         \
     DEBUGOUTPUT(" \n");                               \
     return retValue;                                  \
 }

 /* Abort execution if a condition is not met */
 #define CONTROL(c) { if (!(c)) { DEBUGOUTPUT("error: %s \n", #c); abort(); } }


 /* *************************************
 *  Benchmarking an arbitrary function
 ***************************************/

 int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)
 {
     return outcome.error_tag_never_ever_use_directly == 0;
 }

 /* warning : this function will stop program execution if outcome is invalid !
  *           check outcome validity first, using BMK_isValid_runResult() */
 BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
 {
     CONTROL(outcome.error_tag_never_ever_use_directly == 0);
     return outcome.internal_never_ever_use_directly;
 }

 size_t BMK_extract_errorResult(BMK_runOutcome_t outcome)
 {
     CONTROL(outcome.error_tag_never_ever_use_directly != 0);
     return outcome.error_result_never_ever_use_directly;
 }

 static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult)
 {
     BMK_runOutcome_t b;
     memset(&b, 0, sizeof(b));
     b.error_tag_never_ever_use_directly = 1;
     b.error_result_never_ever_use_directly = errorResult;
     return b;
 }

 static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime)
 {
     BMK_runOutcome_t outcome;
     outcome.error_tag_never_ever_use_directly = 0;
     outcome.internal_never_ever_use_directly = runTime;
     return outcome;
 }


 /* initFn will be measured once, benchFn will be measured `nbLoops` times */
 /* initFn is optional, provide NULL if none */
 /* benchFn must return a size_t value that errorFn can interpret */
 /* takes # of blocks and list of size & stuff for each. */
 /* can report result of benchFn for each block into blockResult. */
 /* blockResult is optional, provide NULL if this information is not required */
 /* note : time per loop can be reported as zero if run time < timer resolution */
 BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
                                    unsigned nbLoops)
 {
     size_t dstSize = 0;
     nbLoops += !nbLoops;   /* minimum nbLoops is 1 */

     /* init */
     {   size_t i;
         for(i = 0; i < p.blockCount; i++) {
             memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]);  /* warm up and erase result buffer */
     }   }

     /* benchmark */
     {   UTIL_time_t const clockStart = UTIL_getTime();
         unsigned loopNb, blockNb;
         if (p.initFn != NULL) p.initFn(p.initPayload);
         for (loopNb = 0; loopNb < nbLoops; loopNb++) {
             for (blockNb = 0; blockNb < p.blockCount; blockNb++) {
                 size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb],
                                    p.dstBuffers[blockNb], p.dstCapacities[blockNb],
                                    p.benchPayload);
                 if (loopNb == 0) {
                     if (p.blockResults != NULL) p.blockResults[blockNb] = res;
                     if ((p.errorFn != NULL) && (p.errorFn(res))) {
                         RETURN_QUIET_ERROR(BMK_runOutcome_error(res),
                             "Function benchmark failed on block %u (of size %u) with error %i",
                             blockNb, (unsigned)p.srcSizes[blockNb], (int)res);
                     }
                     dstSize += res;
             }   }
         }  /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */

         {   PTime const totalTime = UTIL_clockSpanNano(clockStart);
             BMK_runTime_t rt;
             rt.nanoSecPerRun = (double)totalTime / nbLoops;
             rt.sumOfReturn = dstSize;
             return BMK_setValid_runTime(rt);
     }   }
 }


 /* ====  Benchmarking any function, providing intermediate results  ==== */

 struct BMK_timedFnState_s {
     PTime timeSpent_ns;
     PTime timeBudget_ns;
     PTime runBudget_ns;
     BMK_runTime_t fastestRun;
     unsigned nbLoops;
     UTIL_time_t coolTime;
 };  /* typedef'd to BMK_timedFnState_t within bench.h */

 BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
 {
     BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r));
     if (r == NULL) return NULL;   /* malloc() error */
     BMK_resetTimedFnState(r, total_ms, run_ms);
     return r;
 }

 void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); }

 BMK_timedFnState_t*
 BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms)
 {
     typedef char check_size[ 2 * (sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s)) - 1];  /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */
     typedef struct { check_size c; BMK_timedFnState_t tfs; } tfs_align;  /* force tfs to be aligned at its next best position */
     size_t const tfs_alignment = offsetof(tfs_align, tfs); /* provides the minimal alignment restriction for BMK_timedFnState_t */
     BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer;
     if (buffer == NULL) return NULL;
     if (size < sizeof(struct BMK_timedFnState_s)) return NULL;
     if ((size_t)buffer % tfs_alignment) return NULL;  /* buffer must be properly aligned */
     BMK_resetTimedFnState(r, total_ms, run_ms);
     return r;
 }

 void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
 {
     if (!total_ms) total_ms = 1 ;
     if (!run_ms) run_ms = 1;
     if (run_ms > total_ms) run_ms = total_ms;
     timedFnState->timeSpent_ns = 0;
     timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000;
     timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000;
     timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000;  /* hopefully large enough : must be larger than any potential measurement */
     timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
     timedFnState->nbLoops = 1;
     timedFnState->coolTime = UTIL_getTime();
 }

 /* Tells if nb of seconds set in timedFnState for all runs is spent.
  * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */
 int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)
 {
     return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns);
 }


 #undef MIN
 #define MIN(a,b)   ( (a) < (b) ? (a) : (b) )

 #define MINUSABLETIME  (TIMELOOP_NANOSEC / 2)  /* 0.5 seconds */

 BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont,
                                   BMK_benchParams_t p)
 {
     PTime const runBudget_ns = cont->runBudget_ns;
     PTime const runTimeMin_ns = runBudget_ns / 2;
     int completed = 0;
     BMK_runTime_t bestRunTime = cont->fastestRun;

     while (!completed) {
         BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops);

         if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */
             return runResult;
         }

         {   BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);
             double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;

             cont->timeSpent_ns += (unsigned long long)loopDuration_ns;

             /* estimate nbLoops for next run to last approximately 1 second */
             if (loopDuration_ns > (runBudget_ns / 50)) {
                 double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
                 cont->nbLoops = (unsigned)(runBudget_ns / fastestRun_ns) + 1;
             } else {
                 /* previous run was too short : blindly increase workload by x multiplier */
                 const unsigned multiplier = 10;
                 assert(cont->nbLoops < ((unsigned)-1) / multiplier);  /* avoid overflow */
                 cont->nbLoops *= multiplier;
             }

             if(loopDuration_ns < runTimeMin_ns) {
                 /* don't report results for which benchmark run time was too small : increased risks of rounding errors */
                 assert(completed == 0);
                 continue;
             } else {
                 if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) {
                     bestRunTime = newRunTime;
                 }
                 completed = 1;
             }
         }
     }   /* while (!completed) */

     return BMK_setValid_runTime(bestRunTime);
 }
	/*
	* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
	* All rights reserved.
	*
	* This source code is licensed under both the BSD-style license (found in the
	* LICENSE file in the root directory of this source tree) and the GPLv2 (found
	* in the COPYING file in the root directory of this source tree).
	* You may select, at your option, one of the above-listed licenses.
	*/



	/* *************************************
	* Includes
	***************************************/
	#include <stdlib.h> /* malloc, free */
	#include <string.h> /* memset */
	#include <assert.h> /* assert */

	#include "timefn.h" /* UTIL_time_t, UTIL_getTime */
	#include "benchfn.h"


	/* *************************************
	* Constants
	***************************************/
	#define TIMELOOP_MICROSEC SEC_TO_MICRO /* 1 second */
	#define TIMELOOP_NANOSEC (11000000000ULL) / 1 second */

	#define KB *(1 <<10)
	#define MB *(1 <<20)
	#define GB *(1U<<30)


	/* *************************************
	* Debug errors
	***************************************/
	#if defined(DEBUG) && (DEBUG >= 1)
	# include <stdio.h> /* fprintf */
	# define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
	# define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
	#else
	# define DEBUGOUTPUT(...)
	#endif


	/* error without displaying */
	#define RETURN_QUIET_ERROR(retValue, ...) { \
	DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
	DEBUGOUTPUT("Error : "); \
	DEBUGOUTPUT(__VA_ARGS__); \
	DEBUGOUTPUT(" \n"); \
	return retValue; \
	}

	/* Abort execution if a condition is not met */
	#define CONTROL(c) { if (!(c)) { DEBUGOUTPUT("error: %s \n", #c); abort(); } }


	/* *************************************
	* Benchmarking an arbitrary function
	***************************************/

	int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)
	{
	return outcome.error_tag_never_ever_use_directly == 0;
	}

	/* warning : this function will stop program execution if outcome is invalid !
	* check outcome validity first, using BMK_isValid_runResult() */
	BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
	{
	CONTROL(outcome.error_tag_never_ever_use_directly == 0);
	return outcome.internal_never_ever_use_directly;
	}

	size_t BMK_extract_errorResult(BMK_runOutcome_t outcome)
	{
	CONTROL(outcome.error_tag_never_ever_use_directly != 0);
	return outcome.error_result_never_ever_use_directly;
	}

	static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult)
	{
	BMK_runOutcome_t b;
	memset(&b, 0, sizeof(b));
	b.error_tag_never_ever_use_directly = 1;
	b.error_result_never_ever_use_directly = errorResult;
	return b;
	}

	static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime)
	{
	BMK_runOutcome_t outcome;
	outcome.error_tag_never_ever_use_directly = 0;
	outcome.internal_never_ever_use_directly = runTime;
	return outcome;
	}


	/* initFn will be measured once, benchFn will be measured `nbLoops` times */
	/* initFn is optional, provide NULL if none */
	/* benchFn must return a size_t value that errorFn can interpret */
	/* takes # of blocks and list of size & stuff for each. */
	/* can report result of benchFn for each block into blockResult. */
	/* blockResult is optional, provide NULL if this information is not required */
	/* note : time per loop can be reported as zero if run time < timer resolution */
	BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
	unsigned nbLoops)
	{
	size_t dstSize = 0;
	nbLoops += !nbLoops; /* minimum nbLoops is 1 */

	/* init */
	{ size_t i;
	for(i = 0; i < p.blockCount; i++) {
	memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]); /* warm up and erase result buffer */
	} }

	/* benchmark */
	{ UTIL_time_t const clockStart = UTIL_getTime();
	unsigned loopNb, blockNb;
	if (p.initFn != NULL) p.initFn(p.initPayload);
	for (loopNb = 0; loopNb < nbLoops; loopNb++) {
	for (blockNb = 0; blockNb < p.blockCount; blockNb++) {
	size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb],
	p.dstBuffers[blockNb], p.dstCapacities[blockNb],
	p.benchPayload);
	if (loopNb == 0) {
	if (p.blockResults != NULL) p.blockResults[blockNb] = res;
	if ((p.errorFn != NULL) && (p.errorFn(res))) {
	RETURN_QUIET_ERROR(BMK_runOutcome_error(res),
	"Function benchmark failed on block %u (of size %u) with error %i",
	blockNb, (unsigned)p.srcSizes[blockNb], (int)res);
	}
	dstSize += res;
	} }
	} /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */

	{ PTime const totalTime = UTIL_clockSpanNano(clockStart);
	BMK_runTime_t rt;
	rt.nanoSecPerRun = (double)totalTime / nbLoops;
	rt.sumOfReturn = dstSize;
	return BMK_setValid_runTime(rt);
	} }
	}


	/* ==== Benchmarking any function, providing intermediate results ==== */

	struct BMK_timedFnState_s {
	PTime timeSpent_ns;
	PTime timeBudget_ns;
	PTime runBudget_ns;
	BMK_runTime_t fastestRun;
	unsigned nbLoops;
	UTIL_time_t coolTime;
	}; /* typedef'd to BMK_timedFnState_t within bench.h */

	BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
	{
	BMK_timedFnState_t* const r = (BMK_timedFnState_t)malloc(sizeof(r));
	if (r == NULL) return NULL; /* malloc() error */
	BMK_resetTimedFnState(r, total_ms, run_ms);
	return r;
	}

	void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); }

	BMK_timedFnState_t*
	BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms)
	{
	typedef char check_size[ 2 * (sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s)) - 1]; /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */
	typedef struct { check_size c; BMK_timedFnState_t tfs; } tfs_align; /* force tfs to be aligned at its next best position */
	size_t const tfs_alignment = offsetof(tfs_align, tfs); /* provides the minimal alignment restriction for BMK_timedFnState_t */
	BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer;
	if (buffer == NULL) return NULL;
	if (size < sizeof(struct BMK_timedFnState_s)) return NULL;
	if ((size_t)buffer % tfs_alignment) return NULL; /* buffer must be properly aligned */
	BMK_resetTimedFnState(r, total_ms, run_ms);
	return r;
	}

	void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
	{
	if (!total_ms) total_ms = 1 ;
	if (!run_ms) run_ms = 1;
	if (run_ms > total_ms) run_ms = total_ms;
	timedFnState->timeSpent_ns = 0;
	timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000;
	timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000;
	timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000; /* hopefully large enough : must be larger than any potential measurement */
	timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
	timedFnState->nbLoops = 1;
	timedFnState->coolTime = UTIL_getTime();
	}

	/* Tells if nb of seconds set in timedFnState for all runs is spent.
	* note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */
	int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)
	{
	return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns);
	}


	#undef MIN
	#define MIN(a,b) ( (a) < (b) ? (a) : (b) )

	#define MINUSABLETIME (TIMELOOP_NANOSEC / 2) /* 0.5 seconds */

	BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont,
	BMK_benchParams_t p)
	{
	PTime const runBudget_ns = cont->runBudget_ns;
	PTime const runTimeMin_ns = runBudget_ns / 2;
	int completed = 0;
	BMK_runTime_t bestRunTime = cont->fastestRun;

	while (!completed) {
	BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops);

	if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */
	return runResult;
	}

	{ BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);
	double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;

	cont->timeSpent_ns += (unsigned long long)loopDuration_ns;

	/* estimate nbLoops for next run to last approximately 1 second */
	if (loopDuration_ns > (runBudget_ns / 50)) {
	double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
	cont->nbLoops = (unsigned)(runBudget_ns / fastestRun_ns) + 1;
	} else {
	/* previous run was too short : blindly increase workload by x multiplier */
	const unsigned multiplier = 10;
	assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */
	cont->nbLoops *= multiplier;
	}

	if(loopDuration_ns < runTimeMin_ns) {
	/* don't report results for which benchmark run time was too small : increased risks of rounding errors */
	assert(completed == 0);
	continue;
	} else {
	if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) {
	bestRunTime = newRunTime;
	}
	completed = 1;
	}
	}
	} /* while (!completed) */

	return BMK_setValid_runTime(bestRunTime);
	}