blob: 707727718a8df4566393e0d03923fcf7adb8942f [file] [log] [blame]
tanjent@gmail.comf3b78972012-03-01 03:38:55 +00001#include "KeysetTest.h"
2
3#include "Platform.h"
4#include "Random.h"
5
6#include <map>
7#include <set>
8
9//-----------------------------------------------------------------------------
10// This should hopefully be a thorough and uambiguous test of whether a hash
11// is correctly implemented on a given platform
12
13bool VerificationTest ( pfHash hash, const int hashbits, uint32_t expected, bool verbose )
14{
15 const int hashbytes = hashbits / 8;
16
17 uint8_t * key = new uint8_t[256];
18 uint8_t * hashes = new uint8_t[hashbytes * 256];
19 uint8_t * final = new uint8_t[hashbytes];
20
21 memset(key,0,256);
22 memset(hashes,0,hashbytes*256);
23 memset(final,0,hashbytes);
24
25 // Hash keys of the form {0}, {0,1}, {0,1,2}... up to N=255,using 256-N as
26 // the seed
27
28 for(int i = 0; i < 256; i++)
29 {
30 key[i] = (uint8_t)i;
31
32 hash(key,i,256-i,&hashes[i*hashbytes]);
33 }
34
35 // Then hash the result array
36
37 hash(hashes,hashbytes*256,0,final);
38
39 // The first four bytes of that hash, interpreted as a little-endian integer, is our
40 // verification value
41
42 uint32_t verification = (final[0] << 0) | (final[1] << 8) | (final[2] << 16) | (final[3] << 24);
43
44 delete [] key;
45 delete [] hashes;
46 delete [] final;
47
48 //----------
49
50 if(expected != verification)
51 {
52 if(verbose) printf("Verification value 0x%08X : Failed! (Expected 0x%08x)\n",verification,expected);
53 return false;
54 }
55 else
56 {
57 if(verbose) printf("Verification value 0x%08X : Passed!\n",verification);
58 return true;
59 }
60}
61
62//----------------------------------------------------------------------------
63// Basic sanity checks -
64
65// A hash function should not be reading outside the bounds of the key.
66
67// Flipping a bit of a key should, with overwhelmingly high probability,
68// result in a different hash.
69
70// Hashing the same key twice should always produce the same result.
71
72// The memory alignment of the key should not affect the hash result.
73
74bool SanityTest ( pfHash hash, const int hashbits )
75{
76 printf("Running sanity check 1");
77
78 Rand r(883741);
79
80 bool result = true;
81
82 const int hashbytes = hashbits/8;
83 const int reps = 10;
tanjent@gmail.comdd462f22012-03-01 06:06:01 +000084 const int keymax = 256;
tanjent@gmail.comf3b78972012-03-01 03:38:55 +000085 const int pad = 16;
86 const int buflen = keymax + pad*3;
87
88 uint8_t * buffer1 = new uint8_t[buflen];
89 uint8_t * buffer2 = new uint8_t[buflen];
90
91 uint8_t * hash1 = new uint8_t[hashbytes];
92 uint8_t * hash2 = new uint8_t[hashbytes];
93
94 //----------
95
96 for(int irep = 0; irep < reps; irep++)
97 {
98 if(irep % (reps/10) == 0) printf(".");
99
100 for(int len = 4; len <= keymax; len++)
101 {
102 for(int offset = pad; offset < pad*2; offset++)
103 {
104 uint8_t * key1 = &buffer1[pad];
105 uint8_t * key2 = &buffer2[pad+offset];
106
107 r.rand_p(buffer1,buflen);
108 r.rand_p(buffer2,buflen);
109
110 memcpy(key2,key1,len);
111
112 hash(key1,len,0,hash1);
113
114 for(int bit = 0; bit < (len * 8); bit++)
115 {
116 // Flip a bit, hash the key -> we should get a different result.
117
118 flipbit(key2,len,bit);
119 hash(key2,len,0,hash2);
120
121 if(memcmp(hash1,hash2,hashbytes) == 0)
122 {
123 result = false;
124 }
125
126 // Flip it back, hash again -> we should get the original result.
127
128 flipbit(key2,len,bit);
129 hash(key2,len,0,hash2);
130
131 if(memcmp(hash1,hash2,hashbytes) != 0)
132 {
133 result = false;
134 }
135 }
136 }
137 }
138 }
139
140 if(result == false)
141 {
142 printf("*********FAIL*********\n");
143 }
144 else
145 {
146 printf("PASS\n");
147 }
148
tanjent@gmail.come813f9b2012-05-11 06:19:58 +0000149 delete [] buffer1;
150 delete [] buffer2;
151
tanjent@gmail.comf3b78972012-03-01 03:38:55 +0000152 delete [] hash1;
153 delete [] hash2;
154
155 return result;
156}
157
158//----------------------------------------------------------------------------
159// Appending zero bytes to a key should always cause it to produce a different
160// hash value
161
162void AppendedZeroesTest ( pfHash hash, const int hashbits )
163{
164 printf("Running sanity check 2");
165
166 Rand r(173994);
167
168 const int hashbytes = hashbits/8;
169
170 for(int rep = 0; rep < 100; rep++)
171 {
172 if(rep % 10 == 0) printf(".");
173
174 unsigned char key[256];
175
176 memset(key,0,sizeof(key));
177
178 r.rand_p(key,32);
179
180 uint32_t h1[16];
181 uint32_t h2[16];
182
183 memset(h1,0,hashbytes);
184 memset(h2,0,hashbytes);
185
186 for(int i = 0; i < 32; i++)
187 {
188 hash(key,32+i,0,h1);
189
190 if(memcmp(h1,h2,hashbytes) == 0)
191 {
192 printf("\n*********FAIL*********\n");
193 return;
194 }
195
196 memcpy(h2,h1,hashbytes);
197 }
198 }
199
200 printf("PASS\n");
201}
202
203//-----------------------------------------------------------------------------
204// Generate all keys of up to N bytes containing two non-zero bytes
205
206void TwoBytesKeygen ( int maxlen, KeyCallback & c )
207{
208 //----------
209 // Compute # of keys
210
211 int keycount = 0;
212
213 for(int i = 2; i <= maxlen; i++) keycount += (int)chooseK(i,2);
214
215 keycount *= 255*255;
216
217 for(int i = 2; i <= maxlen; i++) keycount += i*255;
218
219 printf("Keyset 'TwoBytes' - up-to-%d-byte keys, %d total keys\n",maxlen, keycount);
220
221 c.reserve(keycount);
222
223 //----------
224 // Add all keys with one non-zero byte
225
226 uint8_t key[256];
227
228 memset(key,0,256);
229
230 for(int keylen = 2; keylen <= maxlen; keylen++)
231 for(int byteA = 0; byteA < keylen; byteA++)
232 {
233 for(int valA = 1; valA <= 255; valA++)
234 {
235 key[byteA] = (uint8_t)valA;
236
237 c(key,keylen);
238 }
239
240 key[byteA] = 0;
241 }
242
243 //----------
244 // Add all keys with two non-zero bytes
245
246 for(int keylen = 2; keylen <= maxlen; keylen++)
247 for(int byteA = 0; byteA < keylen-1; byteA++)
248 for(int byteB = byteA+1; byteB < keylen; byteB++)
249 {
250 for(int valA = 1; valA <= 255; valA++)
251 {
252 key[byteA] = (uint8_t)valA;
253
254 for(int valB = 1; valB <= 255; valB++)
255 {
256 key[byteB] = (uint8_t)valB;
257 c(key,keylen);
258 }
259
260 key[byteB] = 0;
261 }
262
263 key[byteA] = 0;
264 }
265}
266
267//-----------------------------------------------------------------------------
268
269template< typename hashtype >
270void DumpCollisionMap ( CollisionMap<hashtype,ByteVec> & cmap )
271{
272 typedef CollisionMap<hashtype,ByteVec> cmap_t;
273
274 for(typename cmap_t::iterator it = cmap.begin(); it != cmap.end(); ++it)
275 {
276 const hashtype & hash = (*it).first;
277
278 printf("Hash - ");
279 printbytes(&hash,sizeof(hashtype));
280 printf("\n");
281
282 std::vector<ByteVec> & keys = (*it).second;
283
284 for(int i = 0; i < (int)keys.size(); i++)
285 {
286 ByteVec & key = keys[i];
287
288 printf("Key - ");
289 printbytes(&key[0],(int)key.size());
290 printf("\n");
291 }
292 printf("\n");
293 }
294
295}
296
297// test code
298
299void ReportCollisions ( pfHash hash )
300{
301 printf("Hashing keyset\n");
302
303 std::vector<uint128_t> hashes;
304
305 HashCallback<uint128_t> c(hash,hashes);
306
307 TwoBytesKeygen(20,c);
308
309 printf("%d hashes\n",(int)hashes.size());
310
311 printf("Finding collisions\n");
312
313 HashSet<uint128_t> collisions;
314
315 FindCollisions(hashes,collisions,1000);
316
317 printf("%d collisions\n",(int)collisions.size());
318
319 printf("Mapping collisions\n");
320
321 CollisionMap<uint128_t,ByteVec> cmap;
322
323 CollisionCallback<uint128_t> c2(hash,collisions,cmap);
324
325 TwoBytesKeygen(20,c2);
326
327 printf("Dumping collisions\n");
328
329 DumpCollisionMap(cmap);
330}