blob: dce54d26381bfa75432c7ecd4c4c5990aeaa2f4c [file] [log] [blame]
tanjent@gmail.comf3b78972012-03-01 03:38:55 +00001//-----------------------------------------------------------------------------
2// Keyset tests generate various sorts of difficult-to-hash keysets and compare
3// the distribution and collision frequency of the hash results against an
4// ideal random distribution
5
6// The sanity checks are also in this cpp/h
7
8#pragma once
9
10#include "Types.h"
11#include "Stats.h"
12#include "Random.h" // for rand_p
13
14#include <algorithm> // for std::swap
15#include <assert.h>
16
17//-----------------------------------------------------------------------------
18// Sanity tests
19
20bool VerificationTest ( pfHash hash, const int hashbits, uint32_t expected, bool verbose );
21bool SanityTest ( pfHash hash, const int hashbits );
22void AppendedZeroesTest ( pfHash hash, const int hashbits );
23
24//-----------------------------------------------------------------------------
25// Keyset 'Combination' - all possible combinations of input blocks
26
27template< typename hashtype >
28void CombinationKeygenRecurse ( uint32_t * key, int len, int maxlen,
29 uint32_t * blocks, int blockcount,
30 pfHash hash, std::vector<hashtype> & hashes )
31{
32 if(len == maxlen) return;
33
34 for(int i = 0; i < blockcount; i++)
35 {
36 key[len] = blocks[i];
37
38 //if(len == maxlen-1)
39 {
40 hashtype h;
41 hash(key,(len+1) * sizeof(uint32_t),0,&h);
42 hashes.push_back(h);
43 }
44
45 //else
46 {
47 CombinationKeygenRecurse(key,len+1,maxlen,blocks,blockcount,hash,hashes);
48 }
49 }
50}
51
52template< typename hashtype >
53bool CombinationKeyTest ( hashfunc<hashtype> hash, int maxlen, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
54{
55 printf("Keyset 'Combination' - up to %d blocks from a set of %d - ",maxlen,blockcount);
56
57 //----------
58
59 std::vector<hashtype> hashes;
60
61 uint32_t * key = new uint32_t[maxlen];
62
63 CombinationKeygenRecurse<hashtype>(key,0,maxlen,blocks,blockcount,hash,hashes);
64
65 delete [] key;
66
67 printf("%d keys\n",(int)hashes.size());
68
69 //----------
70
71 bool result = true;
72
73 result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
74
75 printf("\n");
76
77 return result;
78}
79
80//----------------------------------------------------------------------------
81// Keyset 'Permutation' - given a set of 32-bit blocks, generate keys
82// consisting of all possible permutations of those blocks
83
84template< typename hashtype >
85void PermutationKeygenRecurse ( pfHash hash, uint32_t * blocks, int blockcount, int k, std::vector<hashtype> & hashes )
86{
87 if(k == blockcount-1)
88 {
89 hashtype h;
90
91 hash(blocks,blockcount * sizeof(uint32_t),0,&h);
92
93 hashes.push_back(h);
94
95 return;
96 }
97
98 for(int i = k; i < blockcount; i++)
99 {
100 std::swap(blocks[k],blocks[i]);
101
102 PermutationKeygenRecurse(hash,blocks,blockcount,k+1,hashes);
103
104 std::swap(blocks[k],blocks[i]);
105 }
106}
107
108template< typename hashtype >
109bool PermutationKeyTest ( hashfunc<hashtype> hash, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
110{
111 printf("Keyset 'Permutation' - %d blocks - ",blockcount);
112
113 //----------
114
115 std::vector<hashtype> hashes;
116
117 PermutationKeygenRecurse<hashtype>(hash,blocks,blockcount,0,hashes);
118
119 printf("%d keys\n",(int)hashes.size());
120
121 //----------
122
123 bool result = true;
124
125 result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
126
127 printf("\n");
128
129 return result;
130}
131
132//-----------------------------------------------------------------------------
133// Keyset 'Sparse' - generate all possible N-bit keys with up to K bits set
134
135template < typename keytype, typename hashtype >
136void SparseKeygenRecurse ( pfHash hash, int start, int bitsleft, bool inclusive, keytype & k, std::vector<hashtype> & hashes )
137{
138 const int nbytes = sizeof(keytype);
139 const int nbits = nbytes * 8;
140
141 hashtype h;
142
143 for(int i = start; i < nbits; i++)
144 {
145 flipbit(&k,nbytes,i);
146
147 if(inclusive || (bitsleft == 1))
148 {
149 hash(&k,sizeof(keytype),0,&h);
150 hashes.push_back(h);
151 }
152
153 if(bitsleft > 1)
154 {
155 SparseKeygenRecurse(hash,i+1,bitsleft-1,inclusive,k,hashes);
156 }
157
158 flipbit(&k,nbytes,i);
159 }
160}
161
162//----------
163
164template < int keybits, typename hashtype >
165bool SparseKeyTest ( hashfunc<hashtype> hash, const int setbits, bool inclusive, bool testColl, bool testDist, bool drawDiagram )
166{
167 printf("Keyset 'Sparse' - %d-bit keys with %s %d bits set - ",keybits, inclusive ? "up to" : "exactly", setbits);
168
169 typedef Blob<keybits> keytype;
170
171 std::vector<hashtype> hashes;
172
173 keytype k;
174 memset(&k,0,sizeof(k));
175
176 if(inclusive)
177 {
178 hashtype h;
179
180 hash(&k,sizeof(keytype),0,&h);
181
182 hashes.push_back(h);
183 }
184
185 SparseKeygenRecurse(hash,0,setbits,inclusive,k,hashes);
186
187 printf("%d keys\n",(int)hashes.size());
188
189 bool result = true;
190
191 result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
192
193 printf("\n");
194
195 return result;
196}
197
198//-----------------------------------------------------------------------------
199// Keyset 'Windows' - for all possible N-bit windows of a K-bit key, generate
200// all possible keys with bits set in that window
201
202template < typename keytype, typename hashtype >
203bool WindowedKeyTest ( hashfunc<hashtype> hash, const int windowbits, bool testCollision, bool testDistribution, bool drawDiagram )
204{
205 const int keybits = sizeof(keytype) * 8;
206 const int keycount = 1 << windowbits;
207
208 std::vector<hashtype> hashes;
209 hashes.resize(keycount);
210
211 bool result = true;
212
213 int testcount = keybits;
214
215 printf("Keyset 'Windowed' - %3d-bit key, %3d-bit window - %d tests, %d keys per test\n",keybits,windowbits,testcount,keycount);
216
217 for(int j = 0; j <= testcount; j++)
218 {
219 int minbit = j;
220
221 keytype key;
222
223 for(int i = 0; i < keycount; i++)
224 {
225 key = i;
226 //key = key << minbit;
227
228 lrot(&key,sizeof(keytype),minbit);
229
230 hash(&key,sizeof(keytype),0,&hashes[i]);
231 }
232
233 printf("Window at %3d - ",j);
234
235 result &= TestHashList(hashes,testCollision,testDistribution,drawDiagram);
236
237 //printf("\n");
238 }
239
240 return result;
241}
242
243//-----------------------------------------------------------------------------
244// Keyset 'Cyclic' - generate keys that consist solely of N repetitions of M
245// bytes.
246
247// (This keyset type is designed to make MurmurHash2 fail)
248
249template < typename hashtype >
250bool CyclicKeyTest ( pfHash hash, int cycleLen, int cycleReps, const int keycount, bool drawDiagram )
251{
252 printf("Keyset 'Cyclic' - %d cycles of %d bytes - %d keys\n",cycleReps,cycleLen,keycount);
253
254 Rand r(483723);
255
256 std::vector<hashtype> hashes;
257 hashes.resize(keycount);
258
259 int keyLen = cycleLen * cycleReps;
260
261 uint8_t * cycle = new uint8_t[cycleLen + 16];
262 uint8_t * key = new uint8_t[keyLen];
263
264 //----------
265
266 for(int i = 0; i < keycount; i++)
267 {
268 r.rand_p(cycle,cycleLen);
269
270 *(uint32_t*)cycle = f3mix(i ^ 0x746a94f1);
271
272 for(int j = 0; j < keyLen; j++)
273 {
274 key[j] = cycle[j % cycleLen];
275 }
276
277 hash(key,keyLen,0,&hashes[i]);
278 }
279
280 //----------
281
282 bool result = true;
283
284 result &= TestHashList(hashes,true,true,drawDiagram);
285 printf("\n");
286
287 delete [] cycle;
288 delete [] key;
289
290 return result;
291}
292
293//-----------------------------------------------------------------------------
294// Keyset 'TwoBytes' - generate all keys up to length N with two non-zero bytes
295
296void TwoBytesKeygen ( int maxlen, KeyCallback & c );
297
298template < typename hashtype >
299bool TwoBytesTest2 ( pfHash hash, int maxlen, bool drawDiagram )
300{
301 std::vector<hashtype> hashes;
302
303 HashCallback<hashtype> c(hash,hashes);
304
305 TwoBytesKeygen(maxlen,c);
306
307 bool result = true;
308
309 result &= TestHashList(hashes,true,true,drawDiagram);
310 printf("\n");
311
312 return result;
313}
314
315//-----------------------------------------------------------------------------
316// Keyset 'Text' - generate all keys of the form "prefix"+"core"+"suffix",
317// where "core" consists of all possible combinations of the given character
318// set of length N.
319
320template < typename hashtype >
321bool TextKeyTest ( hashfunc<hashtype> hash, const char * prefix, const char * coreset, const int corelen, const char * suffix, bool drawDiagram )
322{
323 const int prefixlen = (int)strlen(prefix);
324 const int suffixlen = (int)strlen(suffix);
325 const int corecount = (int)strlen(coreset);
326
327 const int keybytes = prefixlen + corelen + suffixlen;
328 const int keycount = (int)pow(double(corecount),double(corelen));
329
330 printf("Keyset 'Text' - keys of form \"%s[",prefix);
331 for(int i = 0; i < corelen; i++) printf("X");
332 printf("]%s\" - %d keys\n",suffix,keycount);
333
334 uint8_t * key = new uint8_t[keybytes+1];
335
336 key[keybytes] = 0;
337
338 memcpy(key,prefix,prefixlen);
339 memcpy(key+prefixlen+corelen,suffix,suffixlen);
340
341 //----------
342
343 std::vector<hashtype> hashes;
344 hashes.resize(keycount);
345
346 for(int i = 0; i < keycount; i++)
347 {
348 int t = i;
349
350 for(int j = 0; j < corelen; j++)
351 {
352 key[prefixlen+j] = coreset[t % corecount]; t /= corecount;
353 }
354
355 hash(key,keybytes,0,&hashes[i]);
356 }
357
358 //----------
359
360 bool result = true;
361
362 result &= TestHashList(hashes,true,true,drawDiagram);
363
364 printf("\n");
365
366 delete [] key;
367
368 return result;
369}
370
371//-----------------------------------------------------------------------------
372// Keyset 'Zeroes' - keys consisting of all zeroes, differing only in length
373
374// We reuse one block of empty bytes, otherwise the RAM cost is enormous.
375
376template < typename hashtype >
377bool ZeroKeyTest ( pfHash hash, bool drawDiagram )
378{
379 int keycount = 64*1024;
380
381 printf("Keyset 'Zeroes' - %d keys\n",keycount);
382
383 unsigned char * nullblock = new unsigned char[keycount];
384 memset(nullblock,0,keycount);
385
386 //----------
387
388 std::vector<hashtype> hashes;
389
390 hashes.resize(keycount);
391
392 for(int i = 0; i < keycount; i++)
393 {
394 hash(nullblock,i,0,&hashes[i]);
395 }
396
397 bool result = true;
398
399 result &= TestHashList(hashes,true,true,drawDiagram);
400
401 printf("\n");
402
403 delete [] nullblock;
404
405 return result;
406}
407
408//-----------------------------------------------------------------------------
409// Keyset 'Seed' - hash "the quick brown fox..." using different seeds
410
411template < typename hashtype >
412bool SeedTest ( pfHash hash, int keycount, bool drawDiagram )
413{
414 printf("Keyset 'Seed' - %d keys\n",keycount);
415
416 const char * text = "The quick brown fox jumps over the lazy dog";
417 const int len = (int)strlen(text);
418
419 //----------
420
421 std::vector<hashtype> hashes;
422
423 hashes.resize(keycount);
424
425 for(int i = 0; i < keycount; i++)
426 {
427 hash(text,len,i,&hashes[i]);
428 }
429
430 bool result = true;
431
432 result &= TestHashList(hashes,true,true,drawDiagram);
433
434 printf("\n");
435
436 return result;
437}
438
439//-----------------------------------------------------------------------------