tanjent@gmail.com | f3b7897 | 2012-03-01 03:38:55 +0000 | [diff] [blame] | 1 | #include "KeysetTest.h" |
| 2 | |
| 3 | #include "Platform.h" |
| 4 | #include "Random.h" |
| 5 | |
| 6 | #include <map> |
| 7 | #include <set> |
| 8 | |
| 9 | //----------------------------------------------------------------------------- |
| 10 | // This should hopefully be a thorough and uambiguous test of whether a hash |
| 11 | // is correctly implemented on a given platform |
| 12 | |
| 13 | bool VerificationTest ( pfHash hash, const int hashbits, uint32_t expected, bool verbose ) |
| 14 | { |
| 15 | const int hashbytes = hashbits / 8; |
| 16 | |
| 17 | uint8_t * key = new uint8_t[256]; |
| 18 | uint8_t * hashes = new uint8_t[hashbytes * 256]; |
| 19 | uint8_t * final = new uint8_t[hashbytes]; |
| 20 | |
| 21 | memset(key,0,256); |
| 22 | memset(hashes,0,hashbytes*256); |
| 23 | memset(final,0,hashbytes); |
| 24 | |
| 25 | // Hash keys of the form {0}, {0,1}, {0,1,2}... up to N=255,using 256-N as |
| 26 | // the seed |
| 27 | |
| 28 | for(int i = 0; i < 256; i++) |
| 29 | { |
| 30 | key[i] = (uint8_t)i; |
| 31 | |
| 32 | hash(key,i,256-i,&hashes[i*hashbytes]); |
| 33 | } |
| 34 | |
| 35 | // Then hash the result array |
| 36 | |
| 37 | hash(hashes,hashbytes*256,0,final); |
| 38 | |
| 39 | // The first four bytes of that hash, interpreted as a little-endian integer, is our |
| 40 | // verification value |
| 41 | |
| 42 | uint32_t verification = (final[0] << 0) | (final[1] << 8) | (final[2] << 16) | (final[3] << 24); |
| 43 | |
| 44 | delete [] key; |
| 45 | delete [] hashes; |
| 46 | delete [] final; |
| 47 | |
| 48 | //---------- |
| 49 | |
| 50 | if(expected != verification) |
| 51 | { |
| 52 | if(verbose) printf("Verification value 0x%08X : Failed! (Expected 0x%08x)\n",verification,expected); |
| 53 | return false; |
| 54 | } |
| 55 | else |
| 56 | { |
| 57 | if(verbose) printf("Verification value 0x%08X : Passed!\n",verification); |
| 58 | return true; |
| 59 | } |
| 60 | } |
| 61 | |
| 62 | //---------------------------------------------------------------------------- |
| 63 | // Basic sanity checks - |
| 64 | |
| 65 | // A hash function should not be reading outside the bounds of the key. |
| 66 | |
| 67 | // Flipping a bit of a key should, with overwhelmingly high probability, |
| 68 | // result in a different hash. |
| 69 | |
| 70 | // Hashing the same key twice should always produce the same result. |
| 71 | |
| 72 | // The memory alignment of the key should not affect the hash result. |
| 73 | |
| 74 | bool SanityTest ( pfHash hash, const int hashbits ) |
| 75 | { |
| 76 | printf("Running sanity check 1"); |
| 77 | |
| 78 | Rand r(883741); |
| 79 | |
| 80 | bool result = true; |
| 81 | |
| 82 | const int hashbytes = hashbits/8; |
| 83 | const int reps = 10; |
tanjent@gmail.com | dd462f2 | 2012-03-01 06:06:01 +0000 | [diff] [blame] | 84 | const int keymax = 256; |
tanjent@gmail.com | f3b7897 | 2012-03-01 03:38:55 +0000 | [diff] [blame] | 85 | const int pad = 16; |
| 86 | const int buflen = keymax + pad*3; |
| 87 | |
| 88 | uint8_t * buffer1 = new uint8_t[buflen]; |
| 89 | uint8_t * buffer2 = new uint8_t[buflen]; |
| 90 | |
| 91 | uint8_t * hash1 = new uint8_t[hashbytes]; |
| 92 | uint8_t * hash2 = new uint8_t[hashbytes]; |
| 93 | |
| 94 | //---------- |
| 95 | |
| 96 | for(int irep = 0; irep < reps; irep++) |
| 97 | { |
| 98 | if(irep % (reps/10) == 0) printf("."); |
| 99 | |
| 100 | for(int len = 4; len <= keymax; len++) |
| 101 | { |
| 102 | for(int offset = pad; offset < pad*2; offset++) |
| 103 | { |
| 104 | uint8_t * key1 = &buffer1[pad]; |
| 105 | uint8_t * key2 = &buffer2[pad+offset]; |
| 106 | |
| 107 | r.rand_p(buffer1,buflen); |
| 108 | r.rand_p(buffer2,buflen); |
| 109 | |
| 110 | memcpy(key2,key1,len); |
| 111 | |
| 112 | hash(key1,len,0,hash1); |
| 113 | |
| 114 | for(int bit = 0; bit < (len * 8); bit++) |
| 115 | { |
| 116 | // Flip a bit, hash the key -> we should get a different result. |
| 117 | |
| 118 | flipbit(key2,len,bit); |
| 119 | hash(key2,len,0,hash2); |
| 120 | |
| 121 | if(memcmp(hash1,hash2,hashbytes) == 0) |
| 122 | { |
| 123 | result = false; |
| 124 | } |
| 125 | |
| 126 | // Flip it back, hash again -> we should get the original result. |
| 127 | |
| 128 | flipbit(key2,len,bit); |
| 129 | hash(key2,len,0,hash2); |
| 130 | |
| 131 | if(memcmp(hash1,hash2,hashbytes) != 0) |
| 132 | { |
| 133 | result = false; |
| 134 | } |
| 135 | } |
| 136 | } |
| 137 | } |
| 138 | } |
| 139 | |
| 140 | if(result == false) |
| 141 | { |
| 142 | printf("*********FAIL*********\n"); |
| 143 | } |
| 144 | else |
| 145 | { |
| 146 | printf("PASS\n"); |
| 147 | } |
| 148 | |
tanjent@gmail.com | e813f9b | 2012-05-11 06:19:58 +0000 | [diff] [blame] | 149 | delete [] buffer1; |
| 150 | delete [] buffer2; |
| 151 | |
tanjent@gmail.com | f3b7897 | 2012-03-01 03:38:55 +0000 | [diff] [blame] | 152 | delete [] hash1; |
| 153 | delete [] hash2; |
| 154 | |
| 155 | return result; |
| 156 | } |
| 157 | |
| 158 | //---------------------------------------------------------------------------- |
| 159 | // Appending zero bytes to a key should always cause it to produce a different |
| 160 | // hash value |
| 161 | |
| 162 | void AppendedZeroesTest ( pfHash hash, const int hashbits ) |
| 163 | { |
| 164 | printf("Running sanity check 2"); |
| 165 | |
| 166 | Rand r(173994); |
| 167 | |
| 168 | const int hashbytes = hashbits/8; |
| 169 | |
| 170 | for(int rep = 0; rep < 100; rep++) |
| 171 | { |
| 172 | if(rep % 10 == 0) printf("."); |
| 173 | |
| 174 | unsigned char key[256]; |
| 175 | |
| 176 | memset(key,0,sizeof(key)); |
| 177 | |
| 178 | r.rand_p(key,32); |
| 179 | |
| 180 | uint32_t h1[16]; |
| 181 | uint32_t h2[16]; |
| 182 | |
| 183 | memset(h1,0,hashbytes); |
| 184 | memset(h2,0,hashbytes); |
| 185 | |
| 186 | for(int i = 0; i < 32; i++) |
| 187 | { |
| 188 | hash(key,32+i,0,h1); |
| 189 | |
| 190 | if(memcmp(h1,h2,hashbytes) == 0) |
| 191 | { |
| 192 | printf("\n*********FAIL*********\n"); |
| 193 | return; |
| 194 | } |
| 195 | |
| 196 | memcpy(h2,h1,hashbytes); |
| 197 | } |
| 198 | } |
| 199 | |
| 200 | printf("PASS\n"); |
| 201 | } |
| 202 | |
| 203 | //----------------------------------------------------------------------------- |
| 204 | // Generate all keys of up to N bytes containing two non-zero bytes |
| 205 | |
| 206 | void TwoBytesKeygen ( int maxlen, KeyCallback & c ) |
| 207 | { |
| 208 | //---------- |
| 209 | // Compute # of keys |
| 210 | |
| 211 | int keycount = 0; |
| 212 | |
| 213 | for(int i = 2; i <= maxlen; i++) keycount += (int)chooseK(i,2); |
| 214 | |
| 215 | keycount *= 255*255; |
| 216 | |
| 217 | for(int i = 2; i <= maxlen; i++) keycount += i*255; |
| 218 | |
| 219 | printf("Keyset 'TwoBytes' - up-to-%d-byte keys, %d total keys\n",maxlen, keycount); |
| 220 | |
| 221 | c.reserve(keycount); |
| 222 | |
| 223 | //---------- |
| 224 | // Add all keys with one non-zero byte |
| 225 | |
| 226 | uint8_t key[256]; |
| 227 | |
| 228 | memset(key,0,256); |
| 229 | |
| 230 | for(int keylen = 2; keylen <= maxlen; keylen++) |
| 231 | for(int byteA = 0; byteA < keylen; byteA++) |
| 232 | { |
| 233 | for(int valA = 1; valA <= 255; valA++) |
| 234 | { |
| 235 | key[byteA] = (uint8_t)valA; |
| 236 | |
| 237 | c(key,keylen); |
| 238 | } |
| 239 | |
| 240 | key[byteA] = 0; |
| 241 | } |
| 242 | |
| 243 | //---------- |
| 244 | // Add all keys with two non-zero bytes |
| 245 | |
| 246 | for(int keylen = 2; keylen <= maxlen; keylen++) |
| 247 | for(int byteA = 0; byteA < keylen-1; byteA++) |
| 248 | for(int byteB = byteA+1; byteB < keylen; byteB++) |
| 249 | { |
| 250 | for(int valA = 1; valA <= 255; valA++) |
| 251 | { |
| 252 | key[byteA] = (uint8_t)valA; |
| 253 | |
| 254 | for(int valB = 1; valB <= 255; valB++) |
| 255 | { |
| 256 | key[byteB] = (uint8_t)valB; |
| 257 | c(key,keylen); |
| 258 | } |
| 259 | |
| 260 | key[byteB] = 0; |
| 261 | } |
| 262 | |
| 263 | key[byteA] = 0; |
| 264 | } |
| 265 | } |
| 266 | |
| 267 | //----------------------------------------------------------------------------- |
| 268 | |
| 269 | template< typename hashtype > |
| 270 | void DumpCollisionMap ( CollisionMap<hashtype,ByteVec> & cmap ) |
| 271 | { |
| 272 | typedef CollisionMap<hashtype,ByteVec> cmap_t; |
| 273 | |
| 274 | for(typename cmap_t::iterator it = cmap.begin(); it != cmap.end(); ++it) |
| 275 | { |
| 276 | const hashtype & hash = (*it).first; |
| 277 | |
| 278 | printf("Hash - "); |
| 279 | printbytes(&hash,sizeof(hashtype)); |
| 280 | printf("\n"); |
| 281 | |
| 282 | std::vector<ByteVec> & keys = (*it).second; |
| 283 | |
| 284 | for(int i = 0; i < (int)keys.size(); i++) |
| 285 | { |
| 286 | ByteVec & key = keys[i]; |
| 287 | |
| 288 | printf("Key - "); |
| 289 | printbytes(&key[0],(int)key.size()); |
| 290 | printf("\n"); |
| 291 | } |
| 292 | printf("\n"); |
| 293 | } |
| 294 | |
| 295 | } |
| 296 | |
| 297 | // test code |
| 298 | |
| 299 | void ReportCollisions ( pfHash hash ) |
| 300 | { |
| 301 | printf("Hashing keyset\n"); |
| 302 | |
| 303 | std::vector<uint128_t> hashes; |
| 304 | |
| 305 | HashCallback<uint128_t> c(hash,hashes); |
| 306 | |
| 307 | TwoBytesKeygen(20,c); |
| 308 | |
| 309 | printf("%d hashes\n",(int)hashes.size()); |
| 310 | |
| 311 | printf("Finding collisions\n"); |
| 312 | |
| 313 | HashSet<uint128_t> collisions; |
| 314 | |
| 315 | FindCollisions(hashes,collisions,1000); |
| 316 | |
| 317 | printf("%d collisions\n",(int)collisions.size()); |
| 318 | |
| 319 | printf("Mapping collisions\n"); |
| 320 | |
| 321 | CollisionMap<uint128_t,ByteVec> cmap; |
| 322 | |
| 323 | CollisionCallback<uint128_t> c2(hash,collisions,cmap); |
| 324 | |
| 325 | TwoBytesKeygen(20,c2); |
| 326 | |
| 327 | printf("Dumping collisions\n"); |
| 328 | |
| 329 | DumpCollisionMap(cmap); |
| 330 | } |