tanjent@gmail.com | f3b7897 | 2012-03-01 03:38:55 +0000 | [diff] [blame] | 1 | // Spooky Hash |
| 2 | // A 128-bit noncryptographic hash, for checksums and table lookup |
| 3 | // By Bob Jenkins. Public domain. |
| 4 | // Oct 31 2010: published framework, disclaimer ShortHash isn't right |
| 5 | // Nov 7 2010: disabled ShortHash |
| 6 | // Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again |
| 7 | |
| 8 | #include <memory.h> |
| 9 | #include "Spooky.h" |
| 10 | |
| 11 | #define ALLOW_UNALIGNED_READS 1 |
| 12 | |
| 13 | // |
| 14 | // short hash ... it could be used on any message, |
| 15 | // but it's used by Spooky just for short messages. |
| 16 | // |
| 17 | void SpookyHash::Short( |
| 18 | const void *message, |
| 19 | size_t length, |
| 20 | uint64 *hash1, |
| 21 | uint64 *hash2) |
| 22 | { |
| 23 | uint64 buf[sc_numVars]; |
| 24 | union |
| 25 | { |
| 26 | const uint8 *p8; |
| 27 | uint32 *p32; |
| 28 | uint64 *p64; |
| 29 | size_t i; |
| 30 | } u; |
| 31 | |
| 32 | u.p8 = (const uint8 *)message; |
| 33 | |
| 34 | if (!ALLOW_UNALIGNED_READS && (u.i & 0x7)) |
| 35 | { |
| 36 | memcpy(buf, message, length); |
| 37 | u.p64 = buf; |
| 38 | } |
| 39 | |
| 40 | size_t remainder = length%32; |
| 41 | uint64 a=*hash1; |
| 42 | uint64 b=*hash2; |
| 43 | uint64 c=sc_const; |
| 44 | uint64 d=sc_const; |
| 45 | |
| 46 | if (length > 15) |
| 47 | { |
| 48 | const uint64 *end = u.p64 + (length/32)*4; |
| 49 | |
| 50 | // handle all complete sets of 32 bytes |
| 51 | for (; u.p64 < end; u.p64 += 4) |
| 52 | { |
| 53 | c += u.p64[0]; |
| 54 | d += u.p64[1]; |
| 55 | ShortMix(a,b,c,d); |
| 56 | a += u.p64[2]; |
| 57 | b += u.p64[3]; |
| 58 | } |
| 59 | |
| 60 | //Handle the case of 16+ remaining bytes. |
| 61 | if (remainder >= 16) |
| 62 | { |
| 63 | c += u.p64[0]; |
| 64 | d += u.p64[1]; |
| 65 | ShortMix(a,b,c,d); |
| 66 | u.p64 += 2; |
| 67 | remainder -= 16; |
| 68 | } |
| 69 | } |
| 70 | |
| 71 | // Handle the last 0..15 bytes, and its length |
| 72 | d = ((uint64)length) << 56; |
| 73 | switch (remainder) |
| 74 | { |
| 75 | case 15: |
| 76 | d += ((uint64)u.p8[14]) << 48; |
| 77 | case 14: |
| 78 | d += ((uint64)u.p8[13]) << 40; |
| 79 | case 13: |
| 80 | d += ((uint64)u.p8[12]) << 32; |
| 81 | case 12: |
| 82 | d += u.p32[2]; |
| 83 | c += u.p64[0]; |
| 84 | break; |
| 85 | case 11: |
| 86 | d += ((uint64)u.p8[10]) << 16; |
| 87 | case 10: |
| 88 | d += ((uint64)u.p8[9]) << 8; |
| 89 | case 9: |
| 90 | d += (uint64)u.p8[8]; |
| 91 | case 8: |
| 92 | c += u.p64[0]; |
| 93 | break; |
| 94 | case 7: |
| 95 | c += ((uint64)u.p8[6]) << 48; |
| 96 | case 6: |
| 97 | c += ((uint64)u.p8[5]) << 40; |
| 98 | case 5: |
| 99 | c += ((uint64)u.p8[4]) << 32; |
| 100 | case 4: |
| 101 | c += u.p32[0]; |
| 102 | break; |
| 103 | case 3: |
| 104 | c += ((uint64)u.p8[2]) << 16; |
| 105 | case 2: |
| 106 | c += ((uint64)u.p8[1]) << 8; |
| 107 | case 1: |
| 108 | c += (uint64)u.p8[0]; |
| 109 | break; |
| 110 | case 0: |
| 111 | c += sc_const; |
| 112 | d += sc_const; |
| 113 | } |
| 114 | ShortEnd(a,b,c,d); |
| 115 | *hash1 = a; |
| 116 | *hash2 = b; |
| 117 | } |
| 118 | |
| 119 | |
| 120 | |
| 121 | |
| 122 | // do the whole hash in one call |
| 123 | void SpookyHash::Hash128( |
| 124 | const void *message, |
| 125 | size_t length, |
| 126 | uint64 *hash1, |
| 127 | uint64 *hash2) |
| 128 | { |
| 129 | if (length < sc_bufSize) |
| 130 | { |
| 131 | Short(message, length, hash1, hash2); |
| 132 | return; |
| 133 | } |
| 134 | |
| 135 | uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11; |
| 136 | uint64 buf[sc_numVars]; |
| 137 | uint64 *end; |
| 138 | union |
| 139 | { |
| 140 | const uint8 *p8; |
| 141 | uint64 *p64; |
| 142 | size_t i; |
| 143 | } u; |
| 144 | size_t remainder; |
| 145 | |
| 146 | h0=h3=h6=h9 = *hash1; |
| 147 | h1=h4=h7=h10 = *hash2; |
| 148 | h2=h5=h8=h11 = sc_const; |
| 149 | |
| 150 | u.p8 = (const uint8 *)message; |
| 151 | end = u.p64 + (length/sc_blockSize)*sc_numVars; |
| 152 | |
| 153 | // handle all whole sc_blockSize blocks of bytes |
| 154 | if (ALLOW_UNALIGNED_READS || ((u.i & 0x7) == 0)) |
| 155 | { |
| 156 | while (u.p64 < end) |
| 157 | { |
| 158 | Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
| 159 | u.p64 += sc_numVars; |
| 160 | } |
| 161 | } |
| 162 | else |
| 163 | { |
| 164 | while (u.p64 < end) |
| 165 | { |
| 166 | memcpy(buf, u.p64, sc_blockSize); |
| 167 | Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
| 168 | u.p64 += sc_numVars; |
| 169 | } |
| 170 | } |
| 171 | |
| 172 | // handle the last partial block of sc_blockSize bytes |
| 173 | remainder = (length - ((const uint8 *)end-(const uint8 *)message)); |
| 174 | memcpy(buf, end, remainder); |
| 175 | memset(((uint8 *)buf)+remainder, 0, sc_blockSize-remainder); |
| 176 | ((uint8 *)buf)[sc_blockSize-1] = remainder; |
| 177 | Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
| 178 | |
| 179 | // do some final mixing |
| 180 | End(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
| 181 | *hash1 = h0; |
| 182 | *hash2 = h1; |
| 183 | } |
| 184 | |
| 185 | |
| 186 | |
| 187 | // init spooky state |
| 188 | void SpookyHash::Init(uint64 seed1, uint64 seed2) |
| 189 | { |
| 190 | m_length = 0; |
| 191 | m_remainder = 0; |
| 192 | m_state[0] = seed1; |
| 193 | m_state[1] = seed2; |
| 194 | } |
| 195 | |
| 196 | |
| 197 | // add a message fragment to the state |
| 198 | void SpookyHash::Update(const void *message, size_t length) |
| 199 | { |
| 200 | uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11; |
| 201 | size_t newLength = length + m_remainder; |
| 202 | uint8 remainder; |
| 203 | union |
| 204 | { |
| 205 | const uint8 *p8; |
| 206 | uint64 *p64; |
| 207 | size_t i; |
| 208 | } u; |
| 209 | const uint64 *end; |
| 210 | |
| 211 | // Is this message fragment too short? If it is, stuff it away. |
| 212 | if (newLength < sc_bufSize) |
| 213 | { |
| 214 | memcpy(&((uint8 *)m_data)[m_remainder], message, length); |
| 215 | m_length = length + m_length; |
| 216 | m_remainder = (uint8)newLength; |
| 217 | return; |
| 218 | } |
| 219 | |
| 220 | // init the variables |
| 221 | if (m_length < sc_bufSize) |
| 222 | { |
| 223 | h0=h3=h6=h9 = m_state[0]; |
| 224 | h1=h4=h7=h10 = m_state[1]; |
| 225 | h2=h5=h8=h11 = sc_const; |
| 226 | } |
| 227 | else |
| 228 | { |
| 229 | h0 = m_state[0]; |
| 230 | h1 = m_state[1]; |
| 231 | h2 = m_state[2]; |
| 232 | h3 = m_state[3]; |
| 233 | h4 = m_state[4]; |
| 234 | h5 = m_state[5]; |
| 235 | h6 = m_state[6]; |
| 236 | h7 = m_state[7]; |
| 237 | h8 = m_state[8]; |
| 238 | h9 = m_state[9]; |
| 239 | h10 = m_state[10]; |
| 240 | h11 = m_state[11]; |
| 241 | } |
| 242 | m_length = length + m_length; |
| 243 | |
| 244 | // if we've got anything stuffed away, use it now |
| 245 | if (m_remainder) |
| 246 | { |
| 247 | uint8 prefix = sc_bufSize-m_remainder; |
| 248 | memcpy(&(((uint8 *)m_data)[m_remainder]), message, prefix); |
| 249 | u.p64 = m_data; |
| 250 | Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
| 251 | Mix(&u.p64[sc_numVars], h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
| 252 | u.p8 = ((const uint8 *)message) + prefix; |
| 253 | length -= prefix; |
| 254 | } |
| 255 | else |
| 256 | { |
| 257 | u.p8 = (const uint8 *)message; |
| 258 | } |
| 259 | |
| 260 | // handle all whole blocks of sc_blockSize bytes |
| 261 | end = u.p64 + (length/sc_blockSize)*sc_numVars; |
| 262 | remainder = (uint8)(length-((const uint8 *)end-u.p8)); |
| 263 | if (ALLOW_UNALIGNED_READS || (u.i & 0x7) == 0) |
| 264 | { |
| 265 | while (u.p64 < end) |
| 266 | { |
| 267 | Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
| 268 | u.p64 += sc_numVars; |
| 269 | } |
| 270 | } |
| 271 | else |
| 272 | { |
| 273 | while (u.p64 < end) |
| 274 | { |
| 275 | memcpy(m_data, u.p8, sc_blockSize); |
| 276 | Mix(m_data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
| 277 | u.p64 += sc_numVars; |
| 278 | } |
| 279 | } |
| 280 | |
| 281 | // stuff away the last few bytes |
| 282 | m_remainder = remainder; |
| 283 | memcpy(m_data, end, remainder); |
| 284 | |
| 285 | // stuff away the variables |
| 286 | m_state[0] = h0; |
| 287 | m_state[1] = h1; |
| 288 | m_state[2] = h2; |
| 289 | m_state[3] = h3; |
| 290 | m_state[4] = h4; |
| 291 | m_state[5] = h5; |
| 292 | m_state[6] = h6; |
| 293 | m_state[7] = h7; |
| 294 | m_state[8] = h8; |
| 295 | m_state[9] = h9; |
| 296 | m_state[10] = h10; |
| 297 | m_state[11] = h11; |
| 298 | } |
| 299 | |
| 300 | |
| 301 | // report the hash for the concatenation of all message fragments so far |
| 302 | void SpookyHash::Final(uint64 *hash1, uint64 *hash2) |
| 303 | { |
| 304 | // init the variables |
| 305 | if (m_length < sc_bufSize) |
| 306 | { |
| 307 | Short( m_data, m_length, hash1, hash2); |
| 308 | return; |
| 309 | } |
| 310 | |
| 311 | const uint64 *data = (const uint64 *)m_data; |
| 312 | uint8 remainder = m_remainder; |
| 313 | |
| 314 | uint64 h0 = m_state[0]; |
| 315 | uint64 h1 = m_state[1]; |
| 316 | uint64 h2 = m_state[2]; |
| 317 | uint64 h3 = m_state[3]; |
| 318 | uint64 h4 = m_state[4]; |
| 319 | uint64 h5 = m_state[5]; |
| 320 | uint64 h6 = m_state[6]; |
| 321 | uint64 h7 = m_state[7]; |
| 322 | uint64 h8 = m_state[8]; |
| 323 | uint64 h9 = m_state[9]; |
| 324 | uint64 h10 = m_state[10]; |
| 325 | uint64 h11 = m_state[11]; |
| 326 | |
| 327 | if (remainder >= sc_blockSize) |
| 328 | { |
| 329 | // m_data can contain two blocks; handle any whole first block |
| 330 | Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
| 331 | data += sc_numVars; |
| 332 | remainder -= sc_blockSize; |
| 333 | } |
| 334 | |
| 335 | // mix in the last partial block, and the length mod sc_blockSize |
| 336 | memset(&((uint8 *)data)[remainder], 0, (sc_blockSize-remainder)); |
| 337 | |
| 338 | ((uint8 *)data)[sc_blockSize-1] = remainder; |
| 339 | Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
| 340 | |
| 341 | // do some final mixing |
| 342 | End(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); |
| 343 | |
| 344 | *hash1 = h0; |
| 345 | *hash2 = h1; |
| 346 | } |
| 347 | |