Blame - SpeedTest.cpp - fp2-dev/platform/external/chromium_org/third_party/smhasher/src

blob: d91f6e4751b3d01993970e9a5269a6b9c3c440a1 [file] [log] [blame]

tanjent@gmail.com	f3b7897	2012-03-01 03:38:55 +0000	[diff] [blame]	1	#include "SpeedTest.h"
				2
				3	#include "Random.h"
				4
				5	#include <stdio.h> // for printf
				6	#include <memory.h> // for memset
				7	#include <math.h> // for sqrt
				8	#include <algorithm> // for sort
				9
				10	//-----------------------------------------------------------------------------
				11	// We view our timing values as a series of random variables V that has been
				12	// contaminated with occasional outliers due to cache misses, thread
				13	// preemption, etcetera. To filter out the outliers, we search for the largest
				14	// subset of V such that all its values are within three standard deviations
				15	// of the mean.
				16
				17	double CalcMean ( std::vector<double> & v )
				18	{
				19	double mean = 0;
				20
				21	for(int i = 0; i < (int)v.size(); i++)
				22	{
				23	mean += v[i];
				24	}
				25
				26	mean /= double(v.size());
				27
				28	return mean;
				29	}
				30
				31	double CalcMean ( std::vector<double> & v, int a, int b )
				32	{
				33	double mean = 0;
				34
				35	for(int i = a; i <= b; i++)
				36	{
				37	mean += v[i];
				38	}
				39
				40	mean /= (b-a+1);
				41
				42	return mean;
				43	}
				44
				45	double CalcStdv ( std::vector<double> & v, int a, int b )
				46	{
				47	double mean = CalcMean(v,a,b);
				48
				49	double stdv = 0;
				50
				51	for(int i = a; i <= b; i++)
				52	{
				53	double x = v[i] - mean;
				54
				55	stdv += x*x;
				56	}
				57
				58	stdv = sqrt(stdv / (b-a+1));
				59
				60	return stdv;
				61	}
				62
				63	// Return true if the largest value in v[0,len) is more than three
				64	// standard deviations from the mean
				65
				66	bool ContainsOutlier ( std::vector<double> & v, size_t len )
				67	{
				68	double mean = 0;
				69
				70	for(size_t i = 0; i < len; i++)
				71	{
				72	mean += v[i];
				73	}
				74
				75	mean /= double(len);
				76
				77	double stdv = 0;
				78
				79	for(size_t i = 0; i < len; i++)
				80	{
				81	double x = v[i] - mean;
				82	stdv += x*x;
				83	}
				84
				85	stdv = sqrt(stdv / double(len));
				86
				87	double cutoff = mean + stdv*3;
				88
				89	return v[len-1] > cutoff;
				90	}
				91
				92	// Do a binary search to find the largest subset of v that does not contain
				93	// outliers.
				94
				95	void FilterOutliers ( std::vector<double> & v )
				96	{
				97	std::sort(v.begin(),v.end());
				98
				99	size_t len = 0;
				100
				101	for(size_t x = 0x40000000; x; x = x >> 1 )
				102	{
				103	if((len \| x) >= v.size()) continue;
				104
				105	if(!ContainsOutlier(v,len \| x))
				106	{
				107	len \|= x;
				108	}
				109	}
				110
				111	v.resize(len);
				112	}
				113
				114	// Iteratively tighten the set to find a subset that does not contain
				115	// outliers. I'm not positive this works correctly in all cases.
				116
				117	void FilterOutliers2 ( std::vector<double> & v )
				118	{
				119	std::sort(v.begin(),v.end());
				120
				121	int a = 0;
				122	int b = (int)(v.size() - 1);
				123
				124	for(int i = 0; i < 10; i++)
				125	{
				126	//printf("%d %d\n",a,b);
				127
				128	double mean = CalcMean(v,a,b);
				129	double stdv = CalcStdv(v,a,b);
				130
				131	double cutA = mean - stdv*3;
				132	double cutB = mean + stdv*3;
				133
				134	while((a < b) && (v[a] < cutA)) a++;
				135	while((b > a) && (v[b] > cutB)) b--;
				136	}
				137
				138	std::vector<double> v2;
				139
				140	v2.insert(v2.begin(),v.begin()+a,v.begin()+b+1);
				141
				142	v.swap(v2);
				143	}
				144
				145	//-----------------------------------------------------------------------------
				146	// We really want the rdtsc() calls to bracket the function call as tightly
				147	// as possible, but that's hard to do portably. We'll try and get as close as
				148	// possible by marking the function as NEVER_INLINE (to keep the optimizer from
				149	// moving it) and marking the timing variables as "volatile register".
				150
				151	NEVER_INLINE int64_t timehash ( pfHash hash, const void * key, int len, int seed )
				152	{
				153	volatile register int64_t begin,end;
				154
				155	uint32_t temp[16];
				156
				157	begin = rdtsc();
				158
				159	hash(key,len,seed,temp);
				160
				161	end = rdtsc();
				162
				163	return end-begin;
				164	}
				165
				166	//-----------------------------------------------------------------------------
				167
				168	double SpeedTest ( pfHash hash, uint32_t seed, const int trials, const int blocksize, const int align )
				169	{
				170	Rand r(seed);
				171
				172	uint8_t * buf = new uint8_t[blocksize + 512];
				173
				174	uint64_t t1 = reinterpret_cast<uint64_t>(buf);
				175
				176	t1 = (t1 + 255) & BIG_CONSTANT(0xFFFFFFFFFFFFFF00);
				177	t1 += align;
				178
				179	uint8_t * block = reinterpret_cast<uint8_t*>(t1);
				180
				181	r.rand_p(block,blocksize);
				182
				183	//----------
				184
				185	std::vector<double> times;
				186	times.reserve(trials);
				187
				188	for(int itrial = 0; itrial < trials; itrial++)
				189	{
				190	r.rand_p(block,blocksize);
				191
				192	double t = (double)timehash(hash,block,blocksize,itrial);
				193
				194	if(t > 0) times.push_back(t);
				195	}
				196
				197	//----------
				198
				199	std::sort(times.begin(),times.end());
				200
				201	FilterOutliers(times);
				202
				203	delete [] buf;
				204
				205	return CalcMean(times);
				206	}
				207
				208	//-----------------------------------------------------------------------------
				209	// 256k blocks seem to give the best results.
				210
				211	void BulkSpeedTest ( pfHash hash, uint32_t seed )
				212	{
				213	const int trials = 2999;
				214	const int blocksize = 256 * 1024;
				215
				216	printf("Bulk speed test - %d-byte keys\n",blocksize);
				217
				218	for(int align = 0; align < 8; align++)
				219	{
				220	double cycles = SpeedTest(hash,seed,trials,blocksize,align);
				221
				222	double bestbpc = double(blocksize)/cycles;
				223
				224	double bestbps = (bestbpc * 3000000000.0 / 1048576.0);
				225	printf("Alignment %2d - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",align,bestbpc,bestbps);
				226	}
				227	}
				228
				229	//-----------------------------------------------------------------------------
				230
				231	void TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bool verbose, double & /outCycles/ )
				232	{
				233	const int trials = 999999;
				234
				235	if(verbose) printf("Small key speed test - %4d-byte keys - ",keysize);
				236
				237	double cycles = SpeedTest(hash,seed,trials,keysize,0);
				238
				239	printf("%8.2f cycles/hash\n",cycles);
				240	}
				241
				242	//-----------------------------------------------------------------------------