blob: f1bfeea2fb2d4ea94c0ef6acc280c44145e6e976 [file] [log] [blame]
tanjent@gmail.comf3b78972012-03-01 03:38:55 +00001//-----------------------------------------------------------------------------
2// Flipping a single bit of a key should cause an "avalanche" of changes in
3// the hash function's output. Ideally, each output bits should flip 50% of
4// the time - if the probability of an output bit flipping is not 50%, that bit
5// is "biased". Too much bias means that patterns applied to the input will
6// cause "echoes" of the patterns in the output, which in turn can cause the
7// hash function to fail to create an even, random distribution of hash values.
8
9
10#pragma once
11
12#include "Types.h"
13#include "Random.h"
14
15#include <vector>
16#include <stdio.h>
17#include <math.h>
18
19// Avalanche fails if a bit is biased by more than 1%
20
21#define AVALANCHE_FAIL 0.01
22
23double maxBias ( std::vector<int> & counts, int reps );
24
25//-----------------------------------------------------------------------------
26
27template < typename keytype, typename hashtype >
28void calcBias ( pfHash hash, std::vector<int> & counts, int reps, Rand & r )
29{
30 const int keybytes = sizeof(keytype);
31 const int hashbytes = sizeof(hashtype);
32
33 const int keybits = keybytes * 8;
34 const int hashbits = hashbytes * 8;
35
36 keytype K;
37 hashtype A,B;
38
39 for(int irep = 0; irep < reps; irep++)
40 {
41 if(irep % (reps/10) == 0) printf(".");
42
43 r.rand_p(&K,keybytes);
44
45 hash(&K,keybytes,0,&A);
46
47 int * cursor = &counts[0];
48
49 for(int iBit = 0; iBit < keybits; iBit++)
50 {
51 flipbit(&K,keybytes,iBit);
52 hash(&K,keybytes,0,&B);
53 flipbit(&K,keybytes,iBit);
54
55 for(int iOut = 0; iOut < hashbits; iOut++)
56 {
57 int bitA = getbit(&A,hashbytes,iOut);
58 int bitB = getbit(&B,hashbytes,iOut);
59
60 (*cursor++) += (bitA ^ bitB);
61 }
62 }
63 }
64}
65
66//-----------------------------------------------------------------------------
67
68template < typename keytype, typename hashtype >
69bool AvalancheTest ( pfHash hash, const int reps )
70{
71 Rand r(48273);
72
73 const int keybytes = sizeof(keytype);
74 const int hashbytes = sizeof(hashtype);
75
76 const int keybits = keybytes * 8;
77 const int hashbits = hashbytes * 8;
78
79 printf("Testing %3d-bit keys -> %3d-bit hashes, %8d reps",keybits,hashbits,reps);
80
81 //----------
82
83 std::vector<int> bins(keybits*hashbits,0);
84
85 calcBias<keytype,hashtype>(hash,bins,reps,r);
86
87 //----------
88
89 bool result = true;
90
91 double b = maxBias(bins,reps);
92
93 printf(" worst bias is %f%%",b * 100.0);
94
95 if(b > AVALANCHE_FAIL)
96 {
97 printf(" !!!!! ");
98 result = false;
99 }
100
101 printf("\n");
102
103 return result;
104}
105
106//----------------------------------------------------------------------------
107// Tests the Bit Independence Criteron. Stricter than Avalanche, but slow and
108// not really all that useful.
109
110template< typename keytype, typename hashtype >
111void BicTest ( pfHash hash, const int keybit, const int reps, double & maxBias, int & maxA, int & maxB, bool verbose )
112{
113 Rand r(11938);
114
115 const int keybytes = sizeof(keytype);
116 const int hashbytes = sizeof(hashtype);
117 const int hashbits = hashbytes * 8;
118
119 std::vector<int> bins(hashbits*hashbits*4,0);
120
121 keytype key;
122 hashtype h1,h2;
123
124 for(int irep = 0; irep < reps; irep++)
125 {
126 if(verbose)
127 {
128 if(irep % (reps/10) == 0) printf(".");
129 }
130
131 r.rand_p(&key,keybytes);
132 hash(&key,keybytes,0,&h1);
133
134 flipbit(key,keybit);
135 hash(&key,keybytes,0,&h2);
136
137 hashtype d = h1 ^ h2;
138
139 for(int out1 = 0; out1 < hashbits; out1++)
140 for(int out2 = 0; out2 < hashbits; out2++)
141 {
142 if(out1 == out2) continue;
143
144 uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
145
146 bins[(out1 * hashbits + out2) * 4 + b]++;
147 }
148 }
149
150 if(verbose) printf("\n");
151
152 maxBias = 0;
153
154 for(int out1 = 0; out1 < hashbits; out1++)
155 {
156 for(int out2 = 0; out2 < hashbits; out2++)
157 {
158 if(out1 == out2)
159 {
160 if(verbose) printf("\\");
161 continue;
162 }
163
164 double bias = 0;
165
166 for(int b = 0; b < 4; b++)
167 {
168 double b2 = double(bins[(out1 * hashbits + out2) * 4 + b]) / double(reps / 2);
169 b2 = fabs(b2 * 2 - 1);
170
171 if(b2 > bias) bias = b2;
172 }
173
174 if(bias > maxBias)
175 {
176 maxBias = bias;
177 maxA = out1;
178 maxB = out2;
179 }
180
181 if(verbose)
182 {
183 if (bias < 0.01) printf(".");
184 else if(bias < 0.05) printf("o");
185 else if(bias < 0.33) printf("O");
186 else printf("X");
187 }
188 }
189
190 if(verbose) printf("\n");
191 }
192}
193
194//----------
195
196template< typename keytype, typename hashtype >
197bool BicTest ( pfHash hash, const int reps )
198{
199 const int keybytes = sizeof(keytype);
200 const int keybits = keybytes * 8;
201
202 double maxBias = 0;
203 int maxK = 0;
204 int maxA = 0;
205 int maxB = 0;
206
207 for(int i = 0; i < keybits; i++)
208 {
209 if(i % (keybits/10) == 0) printf(".");
210
211 double bias;
212 int a,b;
213
214 BicTest<keytype,hashtype>(hash,i,reps,bias,a,b,true);
215
216 if(bias > maxBias)
217 {
218 maxBias = bias;
219 maxK = i;
220 maxA = a;
221 maxB = b;
222 }
223 }
224
225 printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
226
227 // Bit independence is harder to pass than avalanche, so we're a bit more lax here.
228
229 bool result = (maxBias < 0.05);
230
231 return result;
232}
233
234//-----------------------------------------------------------------------------
235// BIC test variant - store all intermediate data in a table, draw diagram
236// afterwards (much faster)
237
238template< typename keytype, typename hashtype >
239void BicTest3 ( pfHash hash, const int reps, bool verbose = true )
240{
241 const int keybytes = sizeof(keytype);
242 const int keybits = keybytes * 8;
243 const int hashbytes = sizeof(hashtype);
244 const int hashbits = hashbytes * 8;
245 const int pagesize = hashbits*hashbits*4;
246
247 Rand r(11938);
248
249 double maxBias = 0;
250 int maxK = 0;
251 int maxA = 0;
252 int maxB = 0;
253
254 keytype key;
255 hashtype h1,h2;
256
257 std::vector<int> bins(keybits*pagesize,0);
258
259 for(int keybit = 0; keybit < keybits; keybit++)
260 {
261 if(keybit % (keybits/10) == 0) printf(".");
262
263 int * page = &bins[keybit*pagesize];
264
265 for(int irep = 0; irep < reps; irep++)
266 {
267 r.rand_p(&key,keybytes);
268 hash(&key,keybytes,0,&h1);
269 flipbit(key,keybit);
270 hash(&key,keybytes,0,&h2);
271
272 hashtype d = h1 ^ h2;
273
274 for(int out1 = 0; out1 < hashbits-1; out1++)
275 for(int out2 = out1+1; out2 < hashbits; out2++)
276 {
277 int * b = &page[(out1*hashbits+out2)*4];
278
279 uint32_t x = getbit(d,out1) | (getbit(d,out2) << 1);
280
281 b[x]++;
282 }
283 }
284 }
285
286 printf("\n");
287
288 for(int out1 = 0; out1 < hashbits-1; out1++)
289 {
290 for(int out2 = out1+1; out2 < hashbits; out2++)
291 {
292 if(verbose) printf("(%3d,%3d) - ",out1,out2);
293
294 for(int keybit = 0; keybit < keybits; keybit++)
295 {
296 int * page = &bins[keybit*pagesize];
297 int * bins = &page[(out1*hashbits+out2)*4];
298
299 double bias = 0;
300
301 for(int b = 0; b < 4; b++)
302 {
303 double b2 = double(bins[b]) / double(reps / 2);
304 b2 = fabs(b2 * 2 - 1);
305
306 if(b2 > bias) bias = b2;
307 }
308
309 if(bias > maxBias)
310 {
311 maxBias = bias;
312 maxK = keybit;
313 maxA = out1;
314 maxB = out2;
315 }
316
317 if(verbose)
318 {
319 if (bias < 0.01) printf(".");
320 else if(bias < 0.05) printf("o");
321 else if(bias < 0.33) printf("O");
322 else printf("X");
323 }
324 }
325
326 // Finished keybit
327
328 if(verbose) printf("\n");
329 }
330
331 if(verbose)
332 {
333 for(int i = 0; i < keybits+12; i++) printf("-");
334 printf("\n");
335 }
336 }
337
338 printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
339}
340
341
342//-----------------------------------------------------------------------------
343// BIC test variant - iterate over output bits, then key bits. No temp storage,
344// but slooooow
345
346template< typename keytype, typename hashtype >
347void BicTest2 ( pfHash hash, const int reps, bool verbose = true )
348{
349 const int keybytes = sizeof(keytype);
350 const int keybits = keybytes * 8;
351 const int hashbytes = sizeof(hashtype);
352 const int hashbits = hashbytes * 8;
353
354 Rand r(11938);
355
356 double maxBias = 0;
357 int maxK = 0;
358 int maxA = 0;
359 int maxB = 0;
360
361 keytype key;
362 hashtype h1,h2;
363
364 for(int out1 = 0; out1 < hashbits-1; out1++)
365 for(int out2 = out1+1; out2 < hashbits; out2++)
366 {
367 if(verbose) printf("(%3d,%3d) - ",out1,out2);
368
369 for(int keybit = 0; keybit < keybits; keybit++)
370 {
371 int bins[4] = { 0, 0, 0, 0 };
372
373 for(int irep = 0; irep < reps; irep++)
374 {
375 r.rand_p(&key,keybytes);
376 hash(&key,keybytes,0,&h1);
377 flipbit(key,keybit);
378 hash(&key,keybytes,0,&h2);
379
380 hashtype d = h1 ^ h2;
381
382 uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
383
384 bins[b]++;
385 }
386
387 double bias = 0;
388
389 for(int b = 0; b < 4; b++)
390 {
391 double b2 = double(bins[b]) / double(reps / 2);
392 b2 = fabs(b2 * 2 - 1);
393
394 if(b2 > bias) bias = b2;
395 }
396
397 if(bias > maxBias)
398 {
399 maxBias = bias;
400 maxK = keybit;
401 maxA = out1;
402 maxB = out2;
403 }
404
405 if(verbose)
406 {
407 if (bias < 0.05) printf(".");
408 else if(bias < 0.10) printf("o");
409 else if(bias < 0.50) printf("O");
410 else printf("X");
411 }
412 }
413
414 // Finished keybit
415
416 if(verbose) printf("\n");
417 }
418
419 printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
420}
421
422//-----------------------------------------------------------------------------