Blame - MurmurHash2.cpp - fp2-dev/platform/external/chromium_org/third_party/smhasher/src

blob: cd1e53a9b92620d2656eae423017f81be1fb3ad1 [file] [log] [blame]

tanjent@gmail.com	f3b7897	2012-03-01 03:38:55 +0000	[diff] [blame]	1	//-----------------------------------------------------------------------------
				2	// MurmurHash2 was written by Austin Appleby, and is placed in the public
				3	// domain. The author hereby disclaims copyright to this source code.
				4
				5	// Note - This code makes a few assumptions about how your machine behaves -
				6
				7	// 1. We can read a 4-byte value from any address without crashing
				8	// 2. sizeof(int) == 4
				9
				10	// And it has a few limitations -
				11
				12	// 1. It will not work incrementally.
				13	// 2. It will not produce the same results on little-endian and big-endian
				14	// machines.
				15
				16	#include "MurmurHash2.h"
				17
				18	//-----------------------------------------------------------------------------
				19	// Platform-specific functions and macros
				20
				21	// Microsoft Visual Studio
				22
				23	#if defined(_MSC_VER)
				24
				25	#define BIG_CONSTANT(x) (x)
				26
				27	// Other compilers
				28
				29	#else // defined(_MSC_VER)
				30
				31	#define BIG_CONSTANT(x) (x##LLU)
				32
				33	#endif // !defined(_MSC_VER)
				34
				35	//-----------------------------------------------------------------------------
				36
				37	uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed )
				38	{
				39	// 'm' and 'r' are mixing constants generated offline.
				40	// They're not really 'magic', they just happen to work well.
				41
				42	const uint32_t m = 0x5bd1e995;
				43	const int r = 24;
				44
				45	// Initialize the hash to a 'random' value
				46
				47	uint32_t h = seed ^ len;
				48
				49	// Mix 4 bytes at a time into the hash
				50
				51	const unsigned char * data = (const unsigned char *)key;
				52
				53	while(len >= 4)
				54	{
				55	uint32_t k = (uint32_t)data;
				56
				57	k *= m;
				58	k ^= k >> r;
				59	k *= m;
				60
				61	h *= m;
				62	h ^= k;
				63
				64	data += 4;
				65	len -= 4;
				66	}
				67
				68	// Handle the last few bytes of the input array
				69
				70	switch(len)
				71	{
				72	case 3: h ^= data[2] << 16;
				73	case 2: h ^= data[1] << 8;
				74	case 1: h ^= data[0];
				75	h *= m;
				76	};
				77
				78	// Do a few final mixes of the hash to ensure the last few
				79	// bytes are well-incorporated.
				80
				81	h ^= h >> 13;
				82	h *= m;
				83	h ^= h >> 15;
				84
				85	return h;
				86	}
				87
				88	//-----------------------------------------------------------------------------
				89	// MurmurHash2, 64-bit versions, by Austin Appleby
				90
				91	// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
				92	// and endian-ness issues if used across multiple platforms.
				93
				94	// 64-bit hash for 64-bit platforms
				95
				96	uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed )
				97	{
				98	const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
				99	const int r = 47;
				100
				101	uint64_t h = seed ^ (len * m);
				102
				103	const uint64_t * data = (const uint64_t *)key;
				104	const uint64_t * end = data + (len/8);
				105
				106	while(data != end)
				107	{
				108	uint64_t k = *data++;
				109
				110	k *= m;
				111	k ^= k >> r;
				112	k *= m;
				113
				114	h ^= k;
				115	h *= m;
				116	}
				117
				118	const unsigned char * data2 = (const unsigned char*)data;
				119
				120	switch(len & 7)
				121	{
				122	case 7: h ^= uint64_t(data2[6]) << 48;
				123	case 6: h ^= uint64_t(data2[5]) << 40;
				124	case 5: h ^= uint64_t(data2[4]) << 32;
				125	case 4: h ^= uint64_t(data2[3]) << 24;
				126	case 3: h ^= uint64_t(data2[2]) << 16;
				127	case 2: h ^= uint64_t(data2[1]) << 8;
				128	case 1: h ^= uint64_t(data2[0]);
				129	h *= m;
				130	};
				131
				132	h ^= h >> r;
				133	h *= m;
				134	h ^= h >> r;
				135
				136	return h;
				137	}
				138
				139
				140	// 64-bit hash for 32-bit platforms
				141
				142	uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed )
				143	{
				144	const uint32_t m = 0x5bd1e995;
				145	const int r = 24;
				146
				147	uint32_t h1 = uint32_t(seed) ^ len;
				148	uint32_t h2 = uint32_t(seed >> 32);
				149
				150	const uint32_t * data = (const uint32_t *)key;
				151
				152	while(len >= 8)
				153	{
				154	uint32_t k1 = *data++;
				155	k1 = m; k1 ^= k1 >> r; k1 = m;
				156	h1 *= m; h1 ^= k1;
				157	len -= 4;
				158
				159	uint32_t k2 = *data++;
				160	k2 = m; k2 ^= k2 >> r; k2 = m;
				161	h2 *= m; h2 ^= k2;
				162	len -= 4;
				163	}
				164
				165	if(len >= 4)
				166	{
				167	uint32_t k1 = *data++;
				168	k1 = m; k1 ^= k1 >> r; k1 = m;
				169	h1 *= m; h1 ^= k1;
				170	len -= 4;
				171	}
				172
				173	switch(len)
				174	{
				175	case 3: h2 ^= ((unsigned char*)data)[2] << 16;
				176	case 2: h2 ^= ((unsigned char*)data)[1] << 8;
				177	case 1: h2 ^= ((unsigned char*)data)[0];
				178	h2 *= m;
				179	};
				180
				181	h1 ^= h2 >> 18; h1 *= m;
				182	h2 ^= h1 >> 22; h2 *= m;
				183	h1 ^= h2 >> 17; h1 *= m;
				184	h2 ^= h1 >> 19; h2 *= m;
				185
				186	uint64_t h = h1;
				187
				188	h = (h << 32) \| h2;
				189
				190	return h;
				191	}
				192
				193	//-----------------------------------------------------------------------------
				194	// MurmurHash2A, by Austin Appleby
				195
				196	// This is a variant of MurmurHash2 modified to use the Merkle-Damgard
				197	// construction. Bulk speed should be identical to Murmur2, small-key speed
				198	// will be 10%-20% slower due to the added overhead at the end of the hash.
				199
				200	// This variant fixes a minor issue where null keys were more likely to
				201	// collide with each other than expected, and also makes the function
				202	// more amenable to incremental implementations.
				203
				204	#define mmix(h,k) { k = m; k ^= k >> r; k = m; h *= m; h ^= k; }
				205
				206	uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed )
				207	{
				208	const uint32_t m = 0x5bd1e995;
				209	const int r = 24;
				210	uint32_t l = len;
				211
				212	const unsigned char * data = (const unsigned char *)key;
				213
				214	uint32_t h = seed;
				215
				216	while(len >= 4)
				217	{
				218	uint32_t k = (uint32_t)data;
				219
				220	mmix(h,k);
				221
				222	data += 4;
				223	len -= 4;
				224	}
				225
				226	uint32_t t = 0;
				227
				228	switch(len)
				229	{
				230	case 3: t ^= data[2] << 16;
				231	case 2: t ^= data[1] << 8;
				232	case 1: t ^= data[0];
				233	};
				234
				235	mmix(h,t);
				236	mmix(h,l);
				237
				238	h ^= h >> 13;
				239	h *= m;
				240	h ^= h >> 15;
				241
				242	return h;
				243	}
				244
				245	//-----------------------------------------------------------------------------
				246	// CMurmurHash2A, by Austin Appleby
				247
				248	// This is a sample implementation of MurmurHash2A designed to work
				249	// incrementally.
				250
				251	// Usage -
				252
				253	// CMurmurHash2A hasher
				254	// hasher.Begin(seed);
				255	// hasher.Add(data1,size1);
				256	// hasher.Add(data2,size2);
				257	// ...
				258	// hasher.Add(dataN,sizeN);
				259	// uint32_t hash = hasher.End()
				260
				261	class CMurmurHash2A
				262	{
				263	public:
				264
				265	void Begin ( uint32_t seed = 0 )
				266	{
				267	m_hash = seed;
				268	m_tail = 0;
				269	m_count = 0;
				270	m_size = 0;
				271	}
				272
				273	void Add ( const unsigned char * data, int len )
				274	{
				275	m_size += len;
				276
				277	MixTail(data,len);
				278
				279	while(len >= 4)
				280	{
				281	uint32_t k = (uint32_t)data;
				282
				283	mmix(m_hash,k);
				284
				285	data += 4;
				286	len -= 4;
				287	}
				288
				289	MixTail(data,len);
				290	}
				291
				292	uint32_t End ( void )
				293	{
				294	mmix(m_hash,m_tail);
				295	mmix(m_hash,m_size);
				296
				297	m_hash ^= m_hash >> 13;
				298	m_hash *= m;
				299	m_hash ^= m_hash >> 15;
				300
				301	return m_hash;
				302	}
				303
				304	private:
				305
				306	static const uint32_t m = 0x5bd1e995;
				307	static const int r = 24;
				308
				309	void MixTail ( const unsigned char * & data, int & len )
				310	{
				311	while( len && ((len<4) \|\| m_count) )
				312	{
				313	m_tail \|= (data++) << (m_count 8);
				314
				315	m_count++;
				316	len--;
				317
				318	if(m_count == 4)
				319	{
				320	mmix(m_hash,m_tail);
				321	m_tail = 0;
				322	m_count = 0;
				323	}
				324	}
				325	}
				326
				327	uint32_t m_hash;
				328	uint32_t m_tail;
				329	uint32_t m_count;
				330	uint32_t m_size;
				331	};
				332
				333	//-----------------------------------------------------------------------------
				334	// MurmurHashNeutral2, by Austin Appleby
				335
				336	// Same as MurmurHash2, but endian- and alignment-neutral.
				337	// Half the speed though, alas.
				338
				339	uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed )
				340	{
				341	const uint32_t m = 0x5bd1e995;
				342	const int r = 24;
				343
				344	uint32_t h = seed ^ len;
				345
				346	const unsigned char * data = (const unsigned char *)key;
				347
				348	while(len >= 4)
				349	{
				350	uint32_t k;
				351
				352	k = data[0];
				353	k \|= data[1] << 8;
				354	k \|= data[2] << 16;
				355	k \|= data[3] << 24;
				356
				357	k *= m;
				358	k ^= k >> r;
				359	k *= m;
				360
				361	h *= m;
				362	h ^= k;
				363
				364	data += 4;
				365	len -= 4;
				366	}
				367
				368	switch(len)
				369	{
				370	case 3: h ^= data[2] << 16;
				371	case 2: h ^= data[1] << 8;
				372	case 1: h ^= data[0];
				373	h *= m;
				374	};
				375
				376	h ^= h >> 13;
				377	h *= m;
				378	h ^= h >> 15;
				379
				380	return h;
				381	}
				382
				383	//-----------------------------------------------------------------------------
				384	// MurmurHashAligned2, by Austin Appleby
				385
				386	// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
				387	// on certain platforms.
				388
				389	// Performance will be lower than MurmurHash2
				390
				391	#define MIX(h,k,m) { k = m; k ^= k >> r; k = m; h *= m; h ^= k; }
				392
				393
				394	uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed )
				395	{
				396	const uint32_t m = 0x5bd1e995;
				397	const int r = 24;
				398
				399	const unsigned char * data = (const unsigned char *)key;
				400
				401	uint32_t h = seed ^ len;
				402
				403	int align = (uint64_t)data & 3;
				404
				405	if(align && (len >= 4))
				406	{
				407	// Pre-load the temp registers
				408
				409	uint32_t t = 0, d = 0;
				410
				411	switch(align)
				412	{
				413	case 1: t \|= data[2] << 16;
				414	case 2: t \|= data[1] << 8;
				415	case 3: t \|= data[0];
				416	}
				417
				418	t <<= (8 * align);
				419
				420	data += 4-align;
				421	len -= 4-align;
				422
				423	int sl = 8 * (4-align);
				424	int sr = 8 * align;
				425
				426	// Mix
				427
				428	while(len >= 4)
				429	{
				430	d = (uint32_t )data;
				431	t = (t >> sr) \| (d << sl);
				432
				433	uint32_t k = t;
				434
				435	MIX(h,k,m);
				436
				437	t = d;
				438
				439	data += 4;
				440	len -= 4;
				441	}
				442
				443	// Handle leftover data in temp registers
				444
				445	d = 0;
				446
				447	if(len >= align)
				448	{
				449	switch(align)
				450	{
				451	case 3: d \|= data[2] << 16;
				452	case 2: d \|= data[1] << 8;
				453	case 1: d \|= data[0];
				454	}
				455
				456	uint32_t k = (t >> sr) \| (d << sl);
				457	MIX(h,k,m);
				458
				459	data += align;
				460	len -= align;
				461
				462	//----------
				463	// Handle tail bytes
				464
				465	switch(len)
				466	{
				467	case 3: h ^= data[2] << 16;
				468	case 2: h ^= data[1] << 8;
				469	case 1: h ^= data[0];
				470	h *= m;
				471	};
				472	}
				473	else
				474	{
				475	switch(len)
				476	{
				477	case 3: d \|= data[2] << 16;
				478	case 2: d \|= data[1] << 8;
				479	case 1: d \|= data[0];
				480	case 0: h ^= (t >> sr) \| (d << sl);
				481	h *= m;
				482	}
				483	}
				484
				485	h ^= h >> 13;
				486	h *= m;
				487	h ^= h >> 15;
				488
				489	return h;
				490	}
				491	else
				492	{
				493	while(len >= 4)
				494	{
				495	uint32_t k = (uint32_t )data;
				496
				497	MIX(h,k,m);
				498
				499	data += 4;
				500	len -= 4;
				501	}
				502
				503	//----------
				504	// Handle tail bytes
				505
				506	switch(len)
				507	{
				508	case 3: h ^= data[2] << 16;
				509	case 2: h ^= data[1] << 8;
				510	case 1: h ^= data[0];
				511	h *= m;
				512	};
				513
				514	h ^= h >> 13;
				515	h *= m;
				516	h ^= h >> 15;
				517
				518	return h;
				519	}
				520	}
				521
				522	//-----------------------------------------------------------------------------
				523