blob: 22bea53b1b726015a198c1fb576a4c85e0775e7f [file] [log] [blame]
#include "Tests.h"
#include "Stats.h" // for CountCollisions
#include "Random.h"
#include "Bitvec.h"
#include <string.h>
#include <math.h>
#include <set>
#include <vector>
#include <intrin.h>
#pragma warning(disable:4127)
#pragma warning(disable:4127) // warning C4985: 'ceil': attributes not present on previous declaration.
//-----------------------------------------------------------------------------
// 256k blocks seem to give the best results.
void BulkSpeedTest ( pfHash hash )
{
const int trials = 9999;
const int blocksize = 256 * 1024;
printf("Bulk speed test - %d-byte keys\n",blocksize);
char * block = new char[blocksize + 16];
rand_p(block,blocksize+16);
uint32_t temp[16];
for(int align = 0; align < 4; align++)
{
double bestbpc = 0;
for(int itrial = 0; itrial < trials; itrial++)
{
__int64 begin,end;
begin = __rdtsc();
hash(block + align,blocksize,itrial,temp);
end = __rdtsc();
blackhole(temp[0]);
double cycles = double(end-begin);
double bpc = double(blocksize) / cycles;
if(bpc > bestbpc) bestbpc = bpc;
}
double bestbps = (bestbpc * 3000000000.0 / 1048576.0);
printf("Alignment %2d - %8.4f bytes/cycle, (%.2f MiB/sec @ 3 ghz)\n",align,bestbpc,bestbps);
}
printf("\n");
delete [] block;
}
//----------------------------------------------------------------------------
// Tests the Bit Independence Criteron. Not sure if this still works.
template< int _bits, int _reps, class hashtype >
void BicTest ( hashfunc<uint32_t> hash )
{
const int bits = _bits;
const int reps = _reps;
const int bytes = bits / 8;
const int hashbytes = sizeof(hashtype);
const int hashbits = hashbytes * 8;
int counts[bits][hashbits][hashbits];
memset(counts,0,sizeof(counts));
unsigned char k[bytes];
for(int irep = 0; irep < reps; irep++)
{
if(irep % 1000 == 0) printf(".");
rand_p(k,bytes);
unsigned int h1 = hash(k,bytes,0);
for(int in = 0; in < bits; in++)
{
flipbit(k,in);
unsigned int h2 = hash(k,bytes,0);
flipbit(k,in);
unsigned int h = h1 ^ h2;
for(int out1 = 0; out1 < hashbits; out1++)
for(int out2 = out1; out2 < hashbits; out2++)
{
int b1 = (h >> out1) & 1;
int b2 = (h >> out2) & 1;
int b = b1 ^ b2;
if(b1 ^ b2)
{
counts[in][out1][out2]++;
}
}
}
}
printf("\n");
int biases[4] = {0,0,0,0};
for(int i = 0; i < hashbits; i++)
{
for(int j = 0; j < hashbits; j++)
{
if(i == j)
{
printf("\\");
}
else if(i > j)
{
printf(" ");
}
else
{
double d = double(counts[16][i][j]) / double(reps);
int b = (int)(fabs(d * 2 - 1) * 100);
if(b == 0) printf(".");
else if(b < 5) printf("o");
else if(b < 33) printf("O");
else printf("X");
if(b == 0) biases[0]++;
else if(b < 5) biases[1]++;
else if(b < 33) biases[2]++;
else biases[3]++;
}
}
printf("\n");
}
printf("Bias distribution - %3d : %3d : %3d : %3d\n",biases[0],biases[1],biases[2],biases[3]);
printf("\n");
}
//----------------------------------------------------------------------------
// Bijection test = hash all possible 32-bit keys, see how many 32-bit values
// are missing. Each missing value indicates a collision.
void BijectionTest ( hashfunc<uint32_t> hash )
{
const int nbytes = 512 * 1024 * 1024;
unsigned char * block = new unsigned char[nbytes];
memset(block,0,nbytes);
printf("Testing bijection for 32-bit keys");
unsigned int k = 0;
do
{
unsigned int h = hash(&k,4,0);
setbit(block,nbytes,h);
if(k % 100000000 == 0) printf(".");
k++;
}
while(k != 0);
int missing = 0;
do
{
if(!getbit(block,nbytes,k)) missing++;
k++;
}
while(k != 0);
printf("\nMissing values - %d\n",missing);
delete [] block;
}
//----------------------------------------------------------------------------
// Loop counting
// Repeatedly hash the same 4-byte value over and over, and track how many
// loops are in the output.
void LoopTest ( hashfunc<uint32_t> hash )
{
const int nbytes = 512 * 1024 * 1024;
unsigned char * block = new unsigned char[nbytes];
memset(block,0,nbytes);
int loops = 0;
unsigned int start = 0;
while(1)
{
if(!getbit(block,nbytes,start))
{
loops++;
unsigned int r = 0;
unsigned int h = start;
printf("Testing loop starting at %u",start);
while(1)
{
setbit(block,nbytes,h);
r++;
h = hash(&h,4,0);
if(h == start) break;
if(r % 100000000 == 0) printf(".");
}
printf("\nStart point %u looped after %u\n",start,r);
}
start++;
if(start == 0) break;
}
printf("Total loops - %d\n",loops);
delete [] block;
}
//-----------------------------------------------------------------------------
// Trickle test, not really usable by itself. Used for testing the diffusion
// properties of a sequence of adds, xors, rotates, etc - replace the adds and
// xors with ors, compute how many iterations it takes to convert an input with
// a single 1 bit into all 1s.
// I was using it to find optimal rotation constants for various types of
// diffusion functions, but didn't end up using them
int trickle ( int r[4], int reps )
{
int worst = 1000;
for(int i = 0; i < 128; i++)
{
uint32_t t[4] = { 0, 0, 0, 0 };
setbit(t,4,i);
for(int j = 0; j < reps; j++)
{
t[0] |= t[1];
t[2] |= t[3];
t[1] |= t[0];
t[0] |= t[2];
t[2] |= t[1];
t[1] |= t[0];
t[3] |= t[0];
t[0] = _rotl(t[0],r[0]);
t[1] = _rotl(t[1],r[1]);
t[2] = _rotl(t[2],r[2]);
t[3] = _rotl(t[3],r[3]);
}
int p = popcount(t[0]) + popcount(t[1]) + popcount(t[2]) + popcount(t[3]);
if(p < worst) worst = p;
}
return worst;
}
void TrickleTest ( void )
{
int best = 0;
int r[4];
for(int i = 0; i < 1024*1024; i++)
{
r[0] = (i >> 0) & 31;
r[1] = (i >> 5) & 31;
r[2] = (i >> 10) & 31;
r[3] = (i >> 15) & 31;
//if(trickle(r,2) < 16) continue;
//if(trickle(r,3) < 40) continue;
//if(trickle(r,4) < 80) continue;
int worst = trickle(r,6);
if(worst >= best)
//if(i % 10000 == 0)
{
best = worst;
printf("\t{");
for(int i = 0; i < 4; i++)
{
printf("%2d, ",r[i]);
}
printf("}, // %3d\n",worst);
}
}
}
//----------------------------------------------------------------------------
// Alignment of the keys should not affect the hash value - if it does,
// something is broken.
void AlignmentTest ( pfHash hash, const int hashbits )
{
const int hashbytes = hashbits / 8;
printf("Testing alignment handling on small keys..........");
char bufs[16][64];
char * strings[16];
for(int i = 0; i < 16; i++)
{
uint32_t b = uint32_t(&bufs[i][0]);
b = (b+15)&(~15);
strings[i] = (char*)(b + i);
strcpy_s(strings[i],32,"DeadBeefDeadBeef");
}
uint32_t hash1[64];
uint32_t hash2[64];
for(int k = 1; k <= 16; k++)
for(int j = 0; j < 15; j++)
for(int i = j+1; i < 16; i++)
{
const char * s1 = strings[i];
const char * s2 = strings[j];
hash(s1,k,0,hash1);
hash(s2,k,0,hash2);
if(memcmp(hash1,hash2,hashbytes) != 0)
{
printf("*********FAIL*********\n");
return;
}
}
printf("PASS\n");
}
//----------------------------------------------------------------------------
// Appending zero bytes to a key should always cause it to produce a different
// hash value
void AppendedZeroesTest ( pfHash hash, const int hashbits )
{
const int hashbytes = hashbits/8;
printf("Testing zero-appending..........");
for(int rep = 0; rep < 100; rep++)
{
if(rep % 10 == 0) printf(".");
unsigned char key[256];
memset(key,0,sizeof(key));
rand_p(key,32);
uint32_t h1[16];
uint32_t h2[16];
memset(h1,0,hashbytes);
memset(h2,0,hashbytes);
for(int i = 0; i < 32; i++)
{
hash(key,32+i,0,h1);
if(memcmp(h1,h2,hashbytes) == 0)
{
printf("\n*********FAIL*********\n");
return;
}
memcpy(h2,h1,hashbytes);
}
}
printf("\nPASS\n");
}
//----------------------------------------------------------------------------
// Flipping a bit of a key should, with very high probability, result in a
// different hash.
bool TwiddleTest ( pfHash hash, const int hashbits )
{
bool result = true;
const int hashbytes = hashbits/8;
printf("Testing bit twiddling..........");
uint8_t key[256];
uint32_t h1[16];
uint32_t h2[16];
for(int len = 1; len < 16; len++)
{
for(int bit = 0; bit < (len * 8); bit++)
{
rand_p(key,len);
hash(key,len,0,h1);
flipbit(key,len,bit);
hash(key,len,0,h2);
if(memcmp(h1,h2,hashbytes) == 0)
{
//printf("X");
result = false;
}
else
{
//printf(".");
}
}
//printf("\n");
}
if(result == false)
printf("*********FAIL*********\n");
else
printf("PASS\n");
return result;
}
//-----------------------------------------------------------------------------
// Create a bunch of zero-byte keys of different lengths and check distribution.
// We reuse one block of empty bytes, otherwise the RAM cost is enormous.
// The distribution table will have some random 1s in the bottom rows due to
// there not being enough keys for a good test.
void NullKeysetTest ( hashfunc<uint32_t> hash, bool drawDiagram )
{
int keycount = 64*1024;
unsigned char * nullblock = new unsigned char[keycount];
memset(nullblock,0,keycount);
//----------
std::vector<uint32_t> hashes;
hashes.resize(keycount);
//----------
printf("Collision test - Hash keyset 100 times, count collisions");
for(int i = 0; i < keycount; i++)
{
if(i % (keycount/10) == 0) printf(".");
uint32_t h = hash(nullblock,i,0);
hashes[i] = h;
}
testhashlist(hashes,true,true,drawDiagram);
delete [] nullblock;
}
//-----------------------------------------------------------------------------
// Simple collections of alphanumeric strings
template < typename hashtype >
void TextKeyTest2 ( hashfunc<hashtype> hash, bool drawDiagram )
{
const char * s = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456790123456789";
const int nbytes = 7;
const int nbits = 56;
const int nkeys = 1024*1024;
printf("Testing %d 7-character text keys - \n",nkeys,nbits);
//----------
// generate keyset
typedef Blob<nbits> keytype;
std::vector<keytype> keys;
keys.reserve(nkeys);
keytype key;
uint8_t * bytes = (uint8_t*)&key;
for(int i = 0; i < nkeys; i++)
{
for(int j = 0; j < nbytes; j++)
{
int d = i >> (j * 3);
bytes[j] = s[d % 64];
}
keys.push_back(key);
}
//----------
testkeylist<keytype,hashtype>(hash,keys,true,true,drawDiagram);
}