tanjent@gmail.com | 6895dce | 2012-05-11 04:59:07 +0000 | [diff] [blame] | 1 | /*----------------------------------------------------------------------------- |
| 2 | * MurmurHash3 was written by Austin Appleby, and is placed in the public |
| 3 | * domain. |
| 4 | * |
| 5 | * This implementation was written by Shane Day, and is also public domain. |
| 6 | * |
| 7 | * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A) |
| 8 | * with support for progressive processing. |
| 9 | */ |
| 10 | |
| 11 | /*----------------------------------------------------------------------------- |
| 12 | |
| 13 | If you want to understand the MurmurHash algorithm you would be much better |
| 14 | off reading the original source. Just point your browser at: |
| 15 | http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp |
| 16 | |
| 17 | |
| 18 | What this version provides? |
| 19 | |
| 20 | 1. Progressive data feeding. Useful when the entire payload to be hashed |
| 21 | does not fit in memory or when the data is streamed through the application. |
| 22 | Also useful when hashing a number of strings with a common prefix. A partial |
| 23 | hash of a prefix string can be generated and reused for each suffix string. |
| 24 | |
| 25 | 2. Portability. Plain old C so that it should compile on any old compiler. |
| 26 | Both CPU endian and access-alignment neutral, but avoiding inefficient code |
| 27 | when possible depending on CPU capabilities. |
| 28 | |
| 29 | 3. Drop in. I personally like nice self contained public domain code, making it |
| 30 | easy to pilfer without loads of refactoring to work properly in the existing |
| 31 | application code & makefile structure and mucking around with licence files. |
| 32 | Just copy PMurHash.h and PMurHash.c and you're ready to go. |
| 33 | |
| 34 | |
| 35 | How does it work? |
| 36 | |
| 37 | We can only process entire 32 bit chunks of input, except for the very end |
| 38 | that may be shorter. So along with the partial hash we need to give back to |
| 39 | the caller a carry containing up to 3 bytes that we were unable to process. |
| 40 | This carry also needs to record the number of bytes the carry holds. I use |
| 41 | the low 2 bits as a count (0..3) and the carry bytes are shifted into the |
| 42 | high byte in stream order. |
| 43 | |
| 44 | To handle endianess I simply use a macro that reads a uint32_t and define |
| 45 | that macro to be a direct read on little endian machines, a read and swap |
| 46 | on big endian machines, or a byte-by-byte read if the endianess is unknown. |
| 47 | |
| 48 | -----------------------------------------------------------------------------*/ |
| 49 | |
| 50 | |
| 51 | #include "PMurHash.h" |
| 52 | |
| 53 | /* I used ugly type names in the header to avoid potential conflicts with |
| 54 | * application or system typedefs & defines. Since I'm not including any more |
| 55 | * headers below here I can rename these so that the code reads like C99 */ |
| 56 | #undef uint32_t |
| 57 | #define uint32_t MH_UINT32 |
| 58 | #undef uint8_t |
| 59 | #define uint8_t MH_UINT8 |
| 60 | |
| 61 | /* MSVC warnings we choose to ignore */ |
| 62 | #if defined(_MSC_VER) |
| 63 | #pragma warning(disable: 4127) /* conditional expression is constant */ |
| 64 | #endif |
| 65 | |
| 66 | /*----------------------------------------------------------------------------- |
| 67 | * Endianess, misalignment capabilities and util macros |
| 68 | * |
| 69 | * The following 3 macros are defined in this section. The other macros defined |
| 70 | * are only needed to help derive these 3. |
| 71 | * |
| 72 | * READ_UINT32(x) Read a little endian unsigned 32-bit int |
| 73 | * UNALIGNED_SAFE Defined if READ_UINT32 works on non-word boundaries |
| 74 | * ROTL32(x,r) Rotate x left by r bits |
| 75 | */ |
| 76 | |
| 77 | /* Convention is to define __BYTE_ORDER == to one of these values */ |
| 78 | #if !defined(__BIG_ENDIAN) |
| 79 | #define __BIG_ENDIAN 4321 |
| 80 | #endif |
| 81 | #if !defined(__LITTLE_ENDIAN) |
| 82 | #define __LITTLE_ENDIAN 1234 |
| 83 | #endif |
| 84 | |
| 85 | /* I386 */ |
| 86 | #if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(i386) |
| 87 | #define __BYTE_ORDER __LITTLE_ENDIAN |
| 88 | #define UNALIGNED_SAFE |
| 89 | #endif |
| 90 | |
| 91 | /* gcc 'may' define __LITTLE_ENDIAN__ or __BIG_ENDIAN__ to 1 (Note the trailing __), |
| 92 | * or even _LITTLE_ENDIAN or _BIG_ENDIAN (Note the single _ prefix) */ |
| 93 | #if !defined(__BYTE_ORDER) |
| 94 | #if defined(__LITTLE_ENDIAN__) && __LITTLE_ENDIAN__==1 || defined(_LITTLE_ENDIAN) && _LITTLE_ENDIAN==1 |
| 95 | #define __BYTE_ORDER __LITTLE_ENDIAN |
| 96 | #elif defined(__BIG_ENDIAN__) && __BIG_ENDIAN__==1 || defined(_BIG_ENDIAN) && _BIG_ENDIAN==1 |
| 97 | #define __BYTE_ORDER __BIG_ENDIAN |
| 98 | #endif |
| 99 | #endif |
| 100 | |
| 101 | /* gcc (usually) defines xEL/EB macros for ARM and MIPS endianess */ |
| 102 | #if !defined(__BYTE_ORDER) |
| 103 | #if defined(__ARMEL__) || defined(__MIPSEL__) |
| 104 | #define __BYTE_ORDER __LITTLE_ENDIAN |
| 105 | #endif |
| 106 | #if defined(__ARMEB__) || defined(__MIPSEB__) |
| 107 | #define __BYTE_ORDER __BIG_ENDIAN |
| 108 | #endif |
| 109 | #endif |
| 110 | |
| 111 | /* Now find best way we can to READ_UINT32 */ |
| 112 | #if __BYTE_ORDER==__LITTLE_ENDIAN |
| 113 | /* CPU endian matches murmurhash algorithm, so read 32-bit word directly */ |
| 114 | #define READ_UINT32(ptr) (*((uint32_t*)(ptr))) |
| 115 | #elif __BYTE_ORDER==__BIG_ENDIAN |
| 116 | /* TODO: Add additional cases below where a compiler provided bswap32 is available */ |
| 117 | #if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3)) |
| 118 | #define READ_UINT32(ptr) (__builtin_bswap32(*((uint32_t*)(ptr)))) |
| 119 | #else |
| 120 | /* Without a known fast bswap32 we're just as well off doing this */ |
| 121 | #define READ_UINT32(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24) |
| 122 | #define UNALIGNED_SAFE |
| 123 | #endif |
| 124 | #else |
| 125 | /* Unknown endianess so last resort is to read individual bytes */ |
| 126 | #define READ_UINT32(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24) |
| 127 | |
| 128 | /* Since we're not doing word-reads we can skip the messing about with realignment */ |
| 129 | #define UNALIGNED_SAFE |
| 130 | #endif |
| 131 | |
| 132 | /* Find best way to ROTL32 */ |
| 133 | #if defined(_MSC_VER) |
| 134 | #include <stdlib.h> /* Microsoft put _rotl declaration in here */ |
| 135 | #define ROTL32(x,r) _rotl(x,r) |
| 136 | #else |
| 137 | /* gcc recognises this code and generates a rotate instruction for CPUs with one */ |
| 138 | #define ROTL32(x,r) (((uint32_t)x << r) | ((uint32_t)x >> (32 - r))) |
| 139 | #endif |
| 140 | |
| 141 | |
| 142 | /*----------------------------------------------------------------------------- |
| 143 | * Core murmurhash algorithm macros */ |
| 144 | |
| 145 | #define C1 (0xcc9e2d51) |
| 146 | #define C2 (0x1b873593) |
| 147 | |
| 148 | /* This is the main processing body of the algorithm. It operates |
| 149 | * on each full 32-bits of input. */ |
| 150 | #define DOBLOCK(h1, k1) do{ \ |
| 151 | k1 *= C1; \ |
| 152 | k1 = ROTL32(k1,15); \ |
| 153 | k1 *= C2; \ |
| 154 | \ |
| 155 | h1 ^= k1; \ |
| 156 | h1 = ROTL32(h1,13); \ |
| 157 | h1 = h1*5+0xe6546b64; \ |
| 158 | }while(0) |
| 159 | |
| 160 | |
| 161 | /* Append unaligned bytes to carry, forcing hash churn if we have 4 bytes */ |
| 162 | /* cnt=bytes to process, h1=name of h1 var, c=carry, n=bytes in c, ptr/len=payload */ |
| 163 | #define DOBYTES(cnt, h1, c, n, ptr, len) do{ \ |
| 164 | int _i = cnt; \ |
| 165 | while(_i--) { \ |
| 166 | c = c>>8 | *ptr++<<24; \ |
| 167 | n++; len--; \ |
| 168 | if(n==4) { \ |
| 169 | DOBLOCK(h1, c); \ |
| 170 | n = 0; \ |
| 171 | } \ |
| 172 | } }while(0) |
| 173 | |
| 174 | /*---------------------------------------------------------------------------*/ |
| 175 | |
| 176 | /* Main hashing function. Initialise carry to 0 and h1 to 0 or an initial seed |
| 177 | * if wanted. Both ph1 and pcarry are required arguments. */ |
| 178 | void PMurHash32_Process(uint32_t *ph1, uint32_t *pcarry, const void *key, int len) |
| 179 | { |
| 180 | uint32_t h1 = *ph1; |
| 181 | uint32_t c = *pcarry; |
| 182 | |
| 183 | const uint8_t *ptr = (uint8_t*)key; |
| 184 | const uint8_t *end; |
| 185 | |
| 186 | /* Extract carry count from low 2 bits of c value */ |
| 187 | int n = c & 3; |
| 188 | |
| 189 | #if defined(UNALIGNED_SAFE) |
| 190 | /* This CPU handles unaligned word access */ |
| 191 | |
| 192 | /* Consume any carry bytes */ |
| 193 | int i = (4-n) & 3; |
| 194 | if(i && i <= len) { |
| 195 | DOBYTES(i, h1, c, n, ptr, len); |
| 196 | } |
| 197 | |
| 198 | /* Process 32-bit chunks */ |
| 199 | end = ptr + len/4*4; |
| 200 | for( ; ptr < end ; ptr+=4) { |
| 201 | uint32_t k1 = READ_UINT32(ptr); |
| 202 | DOBLOCK(h1, k1); |
| 203 | } |
| 204 | |
| 205 | #else /*UNALIGNED_SAFE*/ |
| 206 | /* This CPU does not handle unaligned word access */ |
| 207 | |
| 208 | /* Consume enough so that the next data byte is word aligned */ |
| 209 | int i = -(long)ptr & 3; |
| 210 | if(i && i <= len) { |
| 211 | DOBYTES(i, h1, c, n, ptr, len); |
| 212 | } |
| 213 | |
| 214 | /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */ |
| 215 | end = ptr + len/4*4; |
| 216 | switch(n) { /* how many bytes in c */ |
| 217 | case 0: /* c=[----] w=[3210] b=[3210]=w c'=[----] */ |
| 218 | for( ; ptr < end ; ptr+=4) { |
| 219 | uint32_t k1 = READ_UINT32(ptr); |
| 220 | DOBLOCK(h1, k1); |
| 221 | } |
| 222 | break; |
| 223 | case 1: /* c=[0---] w=[4321] b=[3210]=c>>24|w<<8 c'=[4---] */ |
| 224 | for( ; ptr < end ; ptr+=4) { |
| 225 | uint32_t k1 = c>>24; |
| 226 | c = READ_UINT32(ptr); |
| 227 | k1 |= c<<8; |
| 228 | DOBLOCK(h1, k1); |
| 229 | } |
| 230 | break; |
| 231 | case 2: /* c=[10--] w=[5432] b=[3210]=c>>16|w<<16 c'=[54--] */ |
| 232 | for( ; ptr < end ; ptr+=4) { |
| 233 | uint32_t k1 = c>>16; |
| 234 | c = READ_UINT32(ptr); |
| 235 | k1 |= c<<16; |
| 236 | DOBLOCK(h1, k1); |
| 237 | } |
| 238 | break; |
| 239 | case 3: /* c=[210-] w=[6543] b=[3210]=c>>8|w<<24 c'=[654-] */ |
| 240 | for( ; ptr < end ; ptr+=4) { |
| 241 | uint32_t k1 = c>>8; |
| 242 | c = READ_UINT32(ptr); |
| 243 | k1 |= c<<24; |
| 244 | DOBLOCK(h1, k1); |
| 245 | } |
| 246 | } |
| 247 | #endif /*UNALIGNED_SAFE*/ |
| 248 | |
| 249 | /* Advance over whole 32-bit chunks, possibly leaving 1..3 bytes */ |
| 250 | len -= len/4*4; |
| 251 | |
| 252 | /* Append any remaining bytes into carry */ |
| 253 | DOBYTES(len, h1, c, n, ptr, len); |
| 254 | |
| 255 | /* Copy out new running hash and carry */ |
| 256 | *ph1 = h1; |
| 257 | *pcarry = (c & ~0xff) | n; |
| 258 | } |
| 259 | |
| 260 | /*---------------------------------------------------------------------------*/ |
| 261 | |
| 262 | /* Finalize a hash. To match the original Murmur3A the total_length must be provided */ |
| 263 | uint32_t PMurHash32_Result(uint32_t h, uint32_t carry, uint32_t total_length) |
| 264 | { |
| 265 | uint32_t k1; |
| 266 | int n = carry & 3; |
| 267 | if(n) { |
| 268 | k1 = carry >> (4-n)*8; |
| 269 | k1 *= C1; k1 = ROTL32(k1,15); k1 *= C2; h ^= k1; |
| 270 | } |
| 271 | h ^= total_length; |
| 272 | |
| 273 | /* fmix */ |
| 274 | h ^= h >> 16; |
| 275 | h *= 0x85ebca6b; |
| 276 | h ^= h >> 13; |
| 277 | h *= 0xc2b2ae35; |
| 278 | h ^= h >> 16; |
| 279 | |
| 280 | return h; |
| 281 | } |
| 282 | |
| 283 | /*---------------------------------------------------------------------------*/ |
| 284 | |
| 285 | /* Murmur3A compatable all-at-once */ |
| 286 | uint32_t PMurHash32(uint32_t seed, const void *key, int len) |
| 287 | { |
| 288 | uint32_t h1=seed, carry=0; |
| 289 | PMurHash32_Process(&h1, &carry, key, len); |
| 290 | return PMurHash32_Result(h1, carry, len); |
| 291 | } |
| 292 | |
| 293 | /*---------------------------------------------------------------------------*/ |
| 294 | |
| 295 | /* Provide an API suitable for smhasher */ |
| 296 | void PMurHash32_test(const void *key, int len, uint32_t seed, void *out) |
| 297 | { |
| 298 | uint32_t h1=seed, carry=0; |
| 299 | const uint8_t *ptr = (uint8_t*)key; |
| 300 | const uint8_t *end = ptr + len; |
| 301 | |
| 302 | #if 0 /* Exercise the progressive processing */ |
| 303 | while(ptr < end) { |
| 304 | //const uint8_t *mid = ptr + rand()%(end-ptr)+1; |
| 305 | const uint8_t *mid = ptr + (rand()&0xF); |
| 306 | mid = mid<end?mid:end; |
| 307 | PMurHash32_Process(&h1, &carry, ptr, mid-ptr); |
| 308 | ptr = mid; |
| 309 | } |
| 310 | #else |
| 311 | PMurHash32_Process(&h1, &carry, ptr, (int)(end-ptr)); |
| 312 | #endif |
| 313 | h1 = PMurHash32_Result(h1, carry, len); |
| 314 | *(uint32_t*)out = h1; |
| 315 | } |
| 316 | |
| 317 | /*---------------------------------------------------------------------------*/ |