blob: 017501264ddff28d8521ef841e000c456f5358ef [file] [log] [blame]
tanjent@gmail.com6895dce2012-05-11 04:59:07 +00001/*-----------------------------------------------------------------------------
2 * MurmurHash3 was written by Austin Appleby, and is placed in the public
3 * domain.
4 *
5 * This implementation was written by Shane Day, and is also public domain.
6 *
7 * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A)
8 * with support for progressive processing.
9 */
10
11/*-----------------------------------------------------------------------------
12
13If you want to understand the MurmurHash algorithm you would be much better
14off reading the original source. Just point your browser at:
15http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
16
17
18What this version provides?
19
201. Progressive data feeding. Useful when the entire payload to be hashed
21does not fit in memory or when the data is streamed through the application.
22Also useful when hashing a number of strings with a common prefix. A partial
23hash of a prefix string can be generated and reused for each suffix string.
24
252. Portability. Plain old C so that it should compile on any old compiler.
26Both CPU endian and access-alignment neutral, but avoiding inefficient code
27when possible depending on CPU capabilities.
28
293. Drop in. I personally like nice self contained public domain code, making it
30easy to pilfer without loads of refactoring to work properly in the existing
31application code & makefile structure and mucking around with licence files.
32Just copy PMurHash.h and PMurHash.c and you're ready to go.
33
34
35How does it work?
36
37We can only process entire 32 bit chunks of input, except for the very end
38that may be shorter. So along with the partial hash we need to give back to
39the caller a carry containing up to 3 bytes that we were unable to process.
40This carry also needs to record the number of bytes the carry holds. I use
41the low 2 bits as a count (0..3) and the carry bytes are shifted into the
42high byte in stream order.
43
44To handle endianess I simply use a macro that reads a uint32_t and define
45that macro to be a direct read on little endian machines, a read and swap
46on big endian machines, or a byte-by-byte read if the endianess is unknown.
47
48-----------------------------------------------------------------------------*/
49
50
51#include "PMurHash.h"
52
53/* I used ugly type names in the header to avoid potential conflicts with
54 * application or system typedefs & defines. Since I'm not including any more
55 * headers below here I can rename these so that the code reads like C99 */
56#undef uint32_t
57#define uint32_t MH_UINT32
58#undef uint8_t
59#define uint8_t MH_UINT8
60
61/* MSVC warnings we choose to ignore */
62#if defined(_MSC_VER)
63 #pragma warning(disable: 4127) /* conditional expression is constant */
64#endif
65
66/*-----------------------------------------------------------------------------
67 * Endianess, misalignment capabilities and util macros
68 *
69 * The following 3 macros are defined in this section. The other macros defined
70 * are only needed to help derive these 3.
71 *
72 * READ_UINT32(x) Read a little endian unsigned 32-bit int
73 * UNALIGNED_SAFE Defined if READ_UINT32 works on non-word boundaries
74 * ROTL32(x,r) Rotate x left by r bits
75 */
76
77/* Convention is to define __BYTE_ORDER == to one of these values */
78#if !defined(__BIG_ENDIAN)
79 #define __BIG_ENDIAN 4321
80#endif
81#if !defined(__LITTLE_ENDIAN)
82 #define __LITTLE_ENDIAN 1234
83#endif
84
85/* I386 */
86#if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(i386)
87 #define __BYTE_ORDER __LITTLE_ENDIAN
88 #define UNALIGNED_SAFE
89#endif
90
91/* gcc 'may' define __LITTLE_ENDIAN__ or __BIG_ENDIAN__ to 1 (Note the trailing __),
92 * or even _LITTLE_ENDIAN or _BIG_ENDIAN (Note the single _ prefix) */
93#if !defined(__BYTE_ORDER)
94 #if defined(__LITTLE_ENDIAN__) && __LITTLE_ENDIAN__==1 || defined(_LITTLE_ENDIAN) && _LITTLE_ENDIAN==1
95 #define __BYTE_ORDER __LITTLE_ENDIAN
96 #elif defined(__BIG_ENDIAN__) && __BIG_ENDIAN__==1 || defined(_BIG_ENDIAN) && _BIG_ENDIAN==1
97 #define __BYTE_ORDER __BIG_ENDIAN
98 #endif
99#endif
100
101/* gcc (usually) defines xEL/EB macros for ARM and MIPS endianess */
102#if !defined(__BYTE_ORDER)
103 #if defined(__ARMEL__) || defined(__MIPSEL__)
104 #define __BYTE_ORDER __LITTLE_ENDIAN
105 #endif
106 #if defined(__ARMEB__) || defined(__MIPSEB__)
107 #define __BYTE_ORDER __BIG_ENDIAN
108 #endif
109#endif
110
111/* Now find best way we can to READ_UINT32 */
112#if __BYTE_ORDER==__LITTLE_ENDIAN
113 /* CPU endian matches murmurhash algorithm, so read 32-bit word directly */
114 #define READ_UINT32(ptr) (*((uint32_t*)(ptr)))
115#elif __BYTE_ORDER==__BIG_ENDIAN
116 /* TODO: Add additional cases below where a compiler provided bswap32 is available */
117 #if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3))
118 #define READ_UINT32(ptr) (__builtin_bswap32(*((uint32_t*)(ptr))))
119 #else
120 /* Without a known fast bswap32 we're just as well off doing this */
121 #define READ_UINT32(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
122 #define UNALIGNED_SAFE
123 #endif
124#else
125 /* Unknown endianess so last resort is to read individual bytes */
126 #define READ_UINT32(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
127
128 /* Since we're not doing word-reads we can skip the messing about with realignment */
129 #define UNALIGNED_SAFE
130#endif
131
132/* Find best way to ROTL32 */
133#if defined(_MSC_VER)
134 #include <stdlib.h> /* Microsoft put _rotl declaration in here */
135 #define ROTL32(x,r) _rotl(x,r)
136#else
137 /* gcc recognises this code and generates a rotate instruction for CPUs with one */
138 #define ROTL32(x,r) (((uint32_t)x << r) | ((uint32_t)x >> (32 - r)))
139#endif
140
141
142/*-----------------------------------------------------------------------------
143 * Core murmurhash algorithm macros */
144
145#define C1 (0xcc9e2d51)
146#define C2 (0x1b873593)
147
148/* This is the main processing body of the algorithm. It operates
149 * on each full 32-bits of input. */
150#define DOBLOCK(h1, k1) do{ \
151 k1 *= C1; \
152 k1 = ROTL32(k1,15); \
153 k1 *= C2; \
154 \
155 h1 ^= k1; \
156 h1 = ROTL32(h1,13); \
157 h1 = h1*5+0xe6546b64; \
158 }while(0)
159
160
161/* Append unaligned bytes to carry, forcing hash churn if we have 4 bytes */
162/* cnt=bytes to process, h1=name of h1 var, c=carry, n=bytes in c, ptr/len=payload */
163#define DOBYTES(cnt, h1, c, n, ptr, len) do{ \
164 int _i = cnt; \
165 while(_i--) { \
166 c = c>>8 | *ptr++<<24; \
167 n++; len--; \
168 if(n==4) { \
169 DOBLOCK(h1, c); \
170 n = 0; \
171 } \
172 } }while(0)
173
174/*---------------------------------------------------------------------------*/
175
176/* Main hashing function. Initialise carry to 0 and h1 to 0 or an initial seed
177 * if wanted. Both ph1 and pcarry are required arguments. */
178void PMurHash32_Process(uint32_t *ph1, uint32_t *pcarry, const void *key, int len)
179{
180 uint32_t h1 = *ph1;
181 uint32_t c = *pcarry;
182
183 const uint8_t *ptr = (uint8_t*)key;
184 const uint8_t *end;
185
186 /* Extract carry count from low 2 bits of c value */
187 int n = c & 3;
188
189#if defined(UNALIGNED_SAFE)
190 /* This CPU handles unaligned word access */
191
192 /* Consume any carry bytes */
193 int i = (4-n) & 3;
194 if(i && i <= len) {
195 DOBYTES(i, h1, c, n, ptr, len);
196 }
197
198 /* Process 32-bit chunks */
199 end = ptr + len/4*4;
200 for( ; ptr < end ; ptr+=4) {
201 uint32_t k1 = READ_UINT32(ptr);
202 DOBLOCK(h1, k1);
203 }
204
205#else /*UNALIGNED_SAFE*/
206 /* This CPU does not handle unaligned word access */
207
208 /* Consume enough so that the next data byte is word aligned */
209 int i = -(long)ptr & 3;
210 if(i && i <= len) {
211 DOBYTES(i, h1, c, n, ptr, len);
212 }
213
214 /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */
215 end = ptr + len/4*4;
216 switch(n) { /* how many bytes in c */
217 case 0: /* c=[----] w=[3210] b=[3210]=w c'=[----] */
218 for( ; ptr < end ; ptr+=4) {
219 uint32_t k1 = READ_UINT32(ptr);
220 DOBLOCK(h1, k1);
221 }
222 break;
223 case 1: /* c=[0---] w=[4321] b=[3210]=c>>24|w<<8 c'=[4---] */
224 for( ; ptr < end ; ptr+=4) {
225 uint32_t k1 = c>>24;
226 c = READ_UINT32(ptr);
227 k1 |= c<<8;
228 DOBLOCK(h1, k1);
229 }
230 break;
231 case 2: /* c=[10--] w=[5432] b=[3210]=c>>16|w<<16 c'=[54--] */
232 for( ; ptr < end ; ptr+=4) {
233 uint32_t k1 = c>>16;
234 c = READ_UINT32(ptr);
235 k1 |= c<<16;
236 DOBLOCK(h1, k1);
237 }
238 break;
239 case 3: /* c=[210-] w=[6543] b=[3210]=c>>8|w<<24 c'=[654-] */
240 for( ; ptr < end ; ptr+=4) {
241 uint32_t k1 = c>>8;
242 c = READ_UINT32(ptr);
243 k1 |= c<<24;
244 DOBLOCK(h1, k1);
245 }
246 }
247#endif /*UNALIGNED_SAFE*/
248
249 /* Advance over whole 32-bit chunks, possibly leaving 1..3 bytes */
250 len -= len/4*4;
251
252 /* Append any remaining bytes into carry */
253 DOBYTES(len, h1, c, n, ptr, len);
254
255 /* Copy out new running hash and carry */
256 *ph1 = h1;
257 *pcarry = (c & ~0xff) | n;
258}
259
260/*---------------------------------------------------------------------------*/
261
262/* Finalize a hash. To match the original Murmur3A the total_length must be provided */
263uint32_t PMurHash32_Result(uint32_t h, uint32_t carry, uint32_t total_length)
264{
265 uint32_t k1;
266 int n = carry & 3;
267 if(n) {
268 k1 = carry >> (4-n)*8;
269 k1 *= C1; k1 = ROTL32(k1,15); k1 *= C2; h ^= k1;
270 }
271 h ^= total_length;
272
273 /* fmix */
274 h ^= h >> 16;
275 h *= 0x85ebca6b;
276 h ^= h >> 13;
277 h *= 0xc2b2ae35;
278 h ^= h >> 16;
279
280 return h;
281}
282
283/*---------------------------------------------------------------------------*/
284
285/* Murmur3A compatable all-at-once */
286uint32_t PMurHash32(uint32_t seed, const void *key, int len)
287{
288 uint32_t h1=seed, carry=0;
289 PMurHash32_Process(&h1, &carry, key, len);
290 return PMurHash32_Result(h1, carry, len);
291}
292
293/*---------------------------------------------------------------------------*/
294
295/* Provide an API suitable for smhasher */
296void PMurHash32_test(const void *key, int len, uint32_t seed, void *out)
297{
298 uint32_t h1=seed, carry=0;
299 const uint8_t *ptr = (uint8_t*)key;
300 const uint8_t *end = ptr + len;
301
302#if 0 /* Exercise the progressive processing */
303 while(ptr < end) {
304 //const uint8_t *mid = ptr + rand()%(end-ptr)+1;
305 const uint8_t *mid = ptr + (rand()&0xF);
306 mid = mid<end?mid:end;
307 PMurHash32_Process(&h1, &carry, ptr, mid-ptr);
308 ptr = mid;
309 }
310#else
311 PMurHash32_Process(&h1, &carry, ptr, (int)(end-ptr));
312#endif
313 h1 = PMurHash32_Result(h1, carry, len);
314 *(uint32_t*)out = h1;
315}
316
317/*---------------------------------------------------------------------------*/