//----------------------------------------------------------------------------- | |
// Keyset tests generate various sorts of difficult-to-hash keysets and compare | |
// the distribution and collision frequency of the hash results against an | |
// ideal random distribution | |
// The sanity checks are also in this cpp/h | |
#pragma once | |
#include "Types.h" | |
#include "Stats.h" | |
//----------------------------------------------------------------------------- | |
// Sanity tests | |
bool SanityTest ( pfHash hash, const int hashbits ); | |
void QuickBrownFox ( pfHash hash, const int hashbits ); | |
void AlignmentTest ( pfHash hash, const int hashbits ); | |
void AppendedZeroesTest ( pfHash hash, const int hashbits ); | |
//---------------------------------------------------------------------------- | |
// Keyset 'Permutation' - given a set of 32-bit blocks, generate keys | |
// consisting of all possible permutations of those blocks | |
template< typename hashtype > | |
void PermutationKeygenRecurse ( pfHash hash, uint32_t * blocks, int blockcount, int k, std::vector<hashtype> & hashes ) | |
{ | |
if(k == blockcount-1) | |
{ | |
hashtype h; | |
hash(blocks,blockcount * sizeof(uint32_t),0,&h); | |
hashes.push_back(h); | |
return; | |
} | |
for(int i = k; i < blockcount; i++) | |
{ | |
swap(blocks[k],blocks[i]); | |
PermutationKeygenRecurse(hash,blocks,blockcount,k+1,hashes); | |
swap(blocks[k],blocks[i]); | |
} | |
} | |
template< typename hashtype > | |
bool PermutationKeyTest ( hashfunc<hashtype> hash, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram ) | |
{ | |
printf("Keyset 'Permutation' - %d blocks - ",blockcount); | |
//---------- | |
std::vector<hashtype> hashes; | |
PermutationKeygenRecurse<hashtype>(hash,blocks,blockcount,0,hashes); | |
printf("%d keys\n",(int)hashes.size()); | |
//---------- | |
bool result = true; | |
result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram); | |
printf("\n"); | |
return result; | |
} | |
//----------------------------------------------------------------------------- | |
// Keyset 'Sparse' - generate all possible N-bit keys with up to K bits set | |
template < typename keytype, typename hashtype > | |
void SparseKeygenRecurse ( pfHash hash, int start, int bitsleft, bool inclusive, keytype & k, std::vector<hashtype> & hashes ) | |
{ | |
const int nbytes = sizeof(keytype); | |
const int nbits = nbytes * 8; | |
hashtype h; | |
for(int i = start; i < nbits; i++) | |
{ | |
flipbit(&k,nbytes,i); | |
if(inclusive || (bitsleft == 1)) | |
{ | |
hash(&k,sizeof(keytype),0,&h); | |
hashes.push_back(h); | |
} | |
if(bitsleft > 1) | |
{ | |
SparseKeygenRecurse(hash,i+1,bitsleft-1,inclusive,k,hashes); | |
} | |
flipbit(&k,nbytes,i); | |
} | |
} | |
//---------- | |
template < int keybits, typename hashtype > | |
bool SparseKeyTest ( hashfunc<hashtype> hash, const int setbits, bool inclusive, bool testColl, bool testDist, bool drawDiagram ) | |
{ | |
printf("Keyset 'Sparse' - %d-bit keys with %s %d bits set - ",keybits, inclusive ? "up to" : "exactly", setbits); | |
typedef Blob<keybits> keytype; | |
std::vector<hashtype> hashes; | |
keytype k; | |
memset(&k,0,sizeof(k)); | |
if(inclusive) | |
{ | |
hashtype h; | |
hash(&k,sizeof(keytype),0,&h); | |
hashes.push_back(h); | |
} | |
SparseKeygenRecurse(hash,0,setbits,inclusive,k,hashes); | |
printf("%d keys\n",(int)hashes.size()); | |
bool result = true; | |
result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram); | |
printf("\n"); | |
return result; | |
} | |
//----------------------------------------------------------------------------- | |
// Keyset 'Windows' - for all possible N-bit windows of a K-bit key, generate | |
// all possible keys with bits set in that window | |
template < typename keytype, typename hashtype > | |
bool WindowedKeyTest ( hashfunc<hashtype> hash, const int windowbits, bool testCollision, bool testDistribution, bool drawDiagram ) | |
{ | |
const int keybits = sizeof(keytype) * 8; | |
const int keycount = 1 << windowbits; | |
std::vector<hashtype> hashes; | |
hashes.resize(keycount); | |
bool result = true; | |
int testcount = (keybits-windowbits); | |
printf("Keyset 'Windowed' - %3d-bit key, %3d-bit window - %d tests, %d keys per test\n",keybits,windowbits,testcount,keycount); | |
for(int j = 0; j <= testcount; j++) | |
{ | |
int minbit = j; | |
keytype key; | |
for(int i = 0; i < keycount; i++) | |
{ | |
key = i; | |
key = key << minbit; | |
hash(&key,sizeof(keytype),0,&hashes[i]); | |
} | |
printf("Window at %3d - ",j); | |
result &= TestHashList(hashes,testCollision,testDistribution,drawDiagram); | |
//printf("\n"); | |
} | |
return result; | |
} | |
//----------------------------------------------------------------------------- | |
// Keyset 'Cyclic' - generate keys that consist solely of N repetitions of M | |
// bytes. | |
// (This keyset type is designed to make MurmurHash2 fail) | |
template < typename hashtype > | |
bool CyclicKeyTest ( pfHash hash, int cycleLen, int cycleReps, const int keycount, bool drawDiagram ) | |
{ | |
printf("Keyset 'Cyclic' - %d cycles of %d bytes - %d keys\n",cycleReps,cycleLen,keycount); | |
std::vector<hashtype> hashes; | |
hashes.resize(keycount); | |
int keyLen = cycleLen * cycleReps; | |
uint8_t * cycle = new uint8_t[cycleLen + 16]; | |
uint8_t * key = new uint8_t[keyLen]; | |
//---------- | |
for(int i = 0; i < keycount; i++) | |
{ | |
rand_p(cycle,cycleLen); | |
*(uint32_t*)cycle = f3mix(i ^ 0x746a94f1); | |
for(int j = 0; j < keyLen; j++) | |
{ | |
key[j] = cycle[j % cycleLen]; | |
} | |
hash(key,keyLen,0,&hashes[i]); | |
} | |
//---------- | |
bool result = true; | |
result &= TestHashList(hashes,true,true,drawDiagram); | |
printf("\n"); | |
delete [] cycle; | |
delete [] key; | |
return result; | |
} | |
//----------------------------------------------------------------------------- | |
// Keyset 'Text' - generate all keys of the form "prefix"+"core"+"suffix", | |
// where "core" consists of all possible combinations of the given character | |
// set of length N. | |
template < typename hashtype > | |
bool TextKeyTest ( hashfunc<hashtype> hash, const char * prefix, const char * coreset, const int corelen, const char * suffix, bool drawDiagram ) | |
{ | |
const int prefixlen = (int)strlen(prefix); | |
const int suffixlen = (int)strlen(suffix); | |
const int corecount = (int)strlen(coreset); | |
const int keybytes = prefixlen + corelen + suffixlen; | |
const int keycount = (int)pow(double(corecount),double(corelen)); | |
printf("Keyset 'Text' - keys of form \"%s[",prefix); | |
for(int i = 0; i < corelen; i++) printf("X"); | |
printf("]%s\" - %d keys\n",suffix,keycount); | |
uint8_t * key = new uint8_t[keybytes+1]; | |
key[keybytes] = 0; | |
memcpy(key,prefix,prefixlen); | |
memcpy(key+prefixlen+corelen,suffix,suffixlen); | |
//---------- | |
std::vector<hashtype> hashes; | |
hashes.resize(keycount); | |
for(int i = 0; i < keycount; i++) | |
{ | |
int t = i; | |
for(int j = 0; j < corelen; j++) | |
{ | |
key[prefixlen+j] = coreset[t % corecount]; t /= corecount; | |
} | |
hash(key,keybytes,0,&hashes[i]); | |
} | |
//---------- | |
bool result = true; | |
result &= TestHashList(hashes,true,true,drawDiagram); | |
printf("\n"); | |
delete [] key; | |
return result; | |
} | |
//----------------------------------------------------------------------------- | |
// Keyset 'Zeroes' - keys consisting of all zeroes, differing only in length | |
// We reuse one block of empty bytes, otherwise the RAM cost is enormous. | |
template < typename hashtype > | |
bool ZeroKeyTest ( pfHash hash, bool drawDiagram ) | |
{ | |
int keycount = 64*1024; | |
printf("Keyset 'Zeroes' - %d keys\n",keycount); | |
unsigned char * nullblock = new unsigned char[keycount]; | |
memset(nullblock,0,keycount); | |
//---------- | |
std::vector<hashtype> hashes; | |
hashes.resize(keycount); | |
for(int i = 0; i < keycount; i++) | |
{ | |
hash(nullblock,i,0,&hashes[i]); | |
} | |
bool result = true; | |
result &= TestHashList(hashes,true,true,drawDiagram); | |
printf("\n"); | |
delete [] nullblock; | |
return result; | |
} | |
//----------------------------------------------------------------------------- | |
// Keyset 'Seed' - hash "the quick brown fox..." using different seeds | |
template < typename hashtype > | |
bool SeedTest ( pfHash hash, int keycount, bool drawDiagram ) | |
{ | |
printf("Keyset 'Seed' - %d keys\n",keycount); | |
const char * text = "The quick brown fox jumps over the lazy dog"; | |
const int len = (int)strlen(text); | |
//---------- | |
std::vector<hashtype> hashes; | |
hashes.resize(keycount); | |
for(int i = 0; i < keycount; i++) | |
{ | |
hash(text,len,i,&hashes[i]); | |
} | |
bool result = true; | |
result &= TestHashList(hashes,true,true,drawDiagram); | |
printf("\n"); | |
return result; | |
} | |
//----------------------------------------------------------------------------- |