Merge branch chandlerc_dev

git-svn-id: http://smhasher.googlecode.com/svn/trunk@144 77a7d1d3-4c08-bdc2-d393-d5859734b01a
diff --git a/AvalancheTest.cpp b/AvalancheTest.cpp
index 38aa452..f5ea0df 100644
--- a/AvalancheTest.cpp
+++ b/AvalancheTest.cpp
@@ -1,56 +1,56 @@
-#include "AvalancheTest.h"

-

-//-----------------------------------------------------------------------------

-

-void PrintAvalancheDiagram ( int x, int y, int reps, double scale, int * bins )

-{

-  const char * symbols = ".123456789X";

-

-  for(int i = 0; i < y; i++)

-  {

-    printf("[");

-    for(int j = 0; j < x; j++)

-    {

-      int k = (y - i) -1;

-

-      int bin = bins[k + (j*y)];

-

-      double b = double(bin) / double(reps);

-      b = fabs(b*2 - 1);

-

-      b *= scale;

-

-      int s = (int)floor(b*10);

-

-      if(s > 10) s = 10;

-      if(s < 0) s = 0;

-

-      printf("%c",symbols[s]);

-    }

-

-    printf("]\n");

-  }

-}

-

-//----------------------------------------------------------------------------

-

-double maxBias ( std::vector<int> & counts, int reps )

-{

-  double worst = 0;

-

-  for(int i = 0; i < (int)counts.size(); i++)

-  {

-    double c = double(counts[i]) / double(reps);

-

-    double d = fabs(c * 2 - 1);

-      

-    if(d > worst)

-    {

-      worst = d;

-    }

-  }

-

-  return worst;

-}

-

-//-----------------------------------------------------------------------------

+#include "AvalancheTest.h"
+
+//-----------------------------------------------------------------------------
+
+void PrintAvalancheDiagram ( int x, int y, int reps, double scale, int * bins )
+{
+  const char * symbols = ".123456789X";
+
+  for(int i = 0; i < y; i++)
+  {
+    printf("[");
+    for(int j = 0; j < x; j++)
+    {
+      int k = (y - i) -1;
+
+      int bin = bins[k + (j*y)];
+
+      double b = double(bin) / double(reps);
+      b = fabs(b*2 - 1);
+
+      b *= scale;
+
+      int s = (int)floor(b*10);
+
+      if(s > 10) s = 10;
+      if(s < 0) s = 0;
+
+      printf("%c",symbols[s]);
+    }
+
+    printf("]\n");
+  }
+}
+
+//----------------------------------------------------------------------------
+
+double maxBias ( std::vector<int> & counts, int reps )
+{
+  double worst = 0;
+
+  for(int i = 0; i < (int)counts.size(); i++)
+  {
+    double c = double(counts[i]) / double(reps);
+
+    double d = fabs(c * 2 - 1);
+      
+    if(d > worst)
+    {
+      worst = d;
+    }
+  }
+
+  return worst;
+}
+
+//-----------------------------------------------------------------------------
diff --git a/AvalancheTest.h b/AvalancheTest.h
index 4c23369..f1bfeea 100644
--- a/AvalancheTest.h
+++ b/AvalancheTest.h
@@ -1,422 +1,422 @@
-//-----------------------------------------------------------------------------

-// Flipping a single bit of a key should cause an "avalanche" of changes in

-// the hash function's output. Ideally, each output bits should flip 50% of

-// the time - if the probability of an output bit flipping is not 50%, that bit

-// is "biased". Too much bias means that patterns applied to the input will

-// cause "echoes" of the patterns in the output, which in turn can cause the

-// hash function to fail to create an even, random distribution of hash values.

-

-

-#pragma once

-

-#include "Types.h"

-#include "Random.h"

-

-#include <vector>

-#include <stdio.h>

-#include <math.h>

-

-// Avalanche fails if a bit is biased by more than 1%

-

-#define AVALANCHE_FAIL 0.01

-

-double maxBias ( std::vector<int> & counts, int reps );

-

-//-----------------------------------------------------------------------------

-

-template < typename keytype, typename hashtype >

-void calcBias ( pfHash hash, std::vector<int> & counts, int reps, Rand & r )

-{

-  const int keybytes = sizeof(keytype);

-  const int hashbytes = sizeof(hashtype);

-

-  const int keybits = keybytes * 8;

-  const int hashbits = hashbytes * 8;

-

-  keytype K;

-  hashtype A,B;

-

-  for(int irep = 0; irep < reps; irep++)

-  {

-    if(irep % (reps/10) == 0) printf(".");

-

-    r.rand_p(&K,keybytes);

-

-    hash(&K,keybytes,0,&A);

-

-    int * cursor = &counts[0];

-

-    for(int iBit = 0; iBit < keybits; iBit++)

-    {

-      flipbit(&K,keybytes,iBit);

-      hash(&K,keybytes,0,&B);

-      flipbit(&K,keybytes,iBit);

-

-      for(int iOut = 0; iOut < hashbits; iOut++)

-      {

-        int bitA = getbit(&A,hashbytes,iOut);

-        int bitB = getbit(&B,hashbytes,iOut);

-

-        (*cursor++) += (bitA ^ bitB);

-      }

-    }

-  }

-}

-

-//-----------------------------------------------------------------------------

-

-template < typename keytype, typename hashtype >

-bool AvalancheTest ( pfHash hash, const int reps )

-{

-  Rand r(48273);

-  

-  const int keybytes = sizeof(keytype);

-  const int hashbytes = sizeof(hashtype);

-

-  const int keybits = keybytes * 8;

-  const int hashbits = hashbytes * 8;

-

-  printf("Testing %3d-bit keys -> %3d-bit hashes, %8d reps",keybits,hashbits,reps);

-

-  //----------

-

-  std::vector<int> bins(keybits*hashbits,0);

-

-  calcBias<keytype,hashtype>(hash,bins,reps,r);

-  

-  //----------

-

-  bool result = true;

-

-  double b = maxBias(bins,reps);

-

-  printf(" worst bias is %f%%",b * 100.0);

-

-  if(b > AVALANCHE_FAIL)

-  {

-    printf(" !!!!! ");

-    result = false;

-  }

-

-  printf("\n");

-

-  return result;

-}

-

-//----------------------------------------------------------------------------

-// Tests the Bit Independence Criteron. Stricter than Avalanche, but slow and

-// not really all that useful.

-

-template< typename keytype, typename hashtype >

-void BicTest ( pfHash hash, const int keybit, const int reps, double & maxBias, int & maxA, int & maxB, bool verbose )

-{

-  Rand r(11938);

-  

-  const int keybytes = sizeof(keytype);

-  const int hashbytes = sizeof(hashtype);

-  const int hashbits = hashbytes * 8;

-

-  std::vector<int> bins(hashbits*hashbits*4,0);

-

-  keytype key;

-  hashtype h1,h2;

-

-  for(int irep = 0; irep < reps; irep++)

-  {

-    if(verbose)

-    {

-      if(irep % (reps/10) == 0) printf(".");

-    }

-

-    r.rand_p(&key,keybytes);

-    hash(&key,keybytes,0,&h1);

-

-    flipbit(key,keybit);

-    hash(&key,keybytes,0,&h2);

-

-    hashtype d = h1 ^ h2;

-

-    for(int out1 = 0; out1 < hashbits; out1++)

-    for(int out2 = 0; out2 < hashbits; out2++)

-    {

-      if(out1 == out2) continue;

-

-      uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);

-

-      bins[(out1 * hashbits + out2) * 4 + b]++;

-    }

-  }

-

-  if(verbose) printf("\n");

-

-  maxBias = 0;

-

-  for(int out1 = 0; out1 < hashbits; out1++)

-  {

-    for(int out2 = 0; out2 < hashbits; out2++)

-    {

-      if(out1 == out2)

-      {

-        if(verbose) printf("\\");

-        continue;

-      }

-

-      double bias = 0;

-

-      for(int b = 0; b < 4; b++)

-      {

-        double b2 = double(bins[(out1 * hashbits + out2) * 4 + b]) / double(reps / 2);

-        b2 = fabs(b2 * 2 - 1);

-

-        if(b2 > bias) bias = b2;

-      }

-

-      if(bias > maxBias)

-      {

-        maxBias = bias;

-        maxA = out1;

-        maxB = out2;

-      }

-

-      if(verbose) 

-      {

-        if     (bias < 0.01) printf(".");

-        else if(bias < 0.05) printf("o");

-        else if(bias < 0.33) printf("O");

-        else                 printf("X");

-      }

-    }

-

-    if(verbose) printf("\n");

-  }

-}

-

-//----------

-

-template< typename keytype, typename hashtype >

-bool BicTest ( pfHash hash, const int reps )

-{

-  const int keybytes = sizeof(keytype);

-  const int keybits = keybytes * 8;

-

-  double maxBias = 0;

-  int maxK = 0;

-  int maxA = 0;

-  int maxB = 0;

-

-  for(int i = 0; i < keybits; i++)

-  {

-    if(i % (keybits/10) == 0) printf(".");

-

-    double bias;

-    int a,b;

-    

-    BicTest<keytype,hashtype>(hash,i,reps,bias,a,b,true);

-

-    if(bias > maxBias)

-    {

-      maxBias = bias;

-      maxK = i;

-      maxA = a;

-      maxB = b;

-    }

-  }

-

-  printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);

-

-  // Bit independence is harder to pass than avalanche, so we're a bit more lax here.

-

-  bool result = (maxBias < 0.05);

-

-  return result;

-}

-

-//-----------------------------------------------------------------------------

-// BIC test variant - store all intermediate data in a table, draw diagram

-// afterwards (much faster)

-

-template< typename keytype, typename hashtype >

-void BicTest3 ( pfHash hash, const int reps, bool verbose = true )

-{

-  const int keybytes = sizeof(keytype);

-  const int keybits = keybytes * 8;

-  const int hashbytes = sizeof(hashtype);

-  const int hashbits = hashbytes * 8;

-  const int pagesize = hashbits*hashbits*4;

-

-  Rand r(11938);

-

-  double maxBias = 0;

-  int maxK = 0;

-  int maxA = 0;

-  int maxB = 0;

-

-  keytype key;

-  hashtype h1,h2;

-

-  std::vector<int> bins(keybits*pagesize,0);

-

-  for(int keybit = 0; keybit < keybits; keybit++)

-  {

-    if(keybit % (keybits/10) == 0) printf(".");

-

-    int * page = &bins[keybit*pagesize];

-

-    for(int irep = 0; irep < reps; irep++)

-    {

-      r.rand_p(&key,keybytes);

-      hash(&key,keybytes,0,&h1);

-      flipbit(key,keybit);

-      hash(&key,keybytes,0,&h2);

-

-      hashtype d = h1 ^ h2;

-

-      for(int out1 = 0; out1 < hashbits-1; out1++)

-      for(int out2 = out1+1; out2 < hashbits; out2++)

-      {

-        int * b = &page[(out1*hashbits+out2)*4];

-

-        uint32_t x = getbit(d,out1) | (getbit(d,out2) << 1);

-

-        b[x]++;

-      }

-    }

-  }

-

-  printf("\n");

-

-  for(int out1 = 0; out1 < hashbits-1; out1++)

-  {

-    for(int out2 = out1+1; out2 < hashbits; out2++)

-    {

-      if(verbose) printf("(%3d,%3d) - ",out1,out2);

-

-      for(int keybit = 0; keybit < keybits; keybit++)

-      {

-        int * page = &bins[keybit*pagesize];

-        int * bins = &page[(out1*hashbits+out2)*4];

-

-        double bias = 0;

-

-        for(int b = 0; b < 4; b++)

-        {

-          double b2 = double(bins[b]) / double(reps / 2);

-          b2 = fabs(b2 * 2 - 1);

-

-          if(b2 > bias) bias = b2;

-        }

-

-        if(bias > maxBias)

-        {

-          maxBias = bias;

-          maxK = keybit;

-          maxA = out1;

-          maxB = out2;

-        }

-

-        if(verbose) 

-        {

-          if     (bias < 0.01) printf(".");

-          else if(bias < 0.05) printf("o");

-          else if(bias < 0.33) printf("O");

-          else                 printf("X");

-        }

-      }

-

-      // Finished keybit

-

-      if(verbose) printf("\n");

-    }

-

-    if(verbose)

-    {

-      for(int i = 0; i < keybits+12; i++) printf("-");

-      printf("\n");

-    }

-  }

-

-  printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);

-}

-

-

-//-----------------------------------------------------------------------------

-// BIC test variant - iterate over output bits, then key bits. No temp storage,

-// but slooooow

-

-template< typename keytype, typename hashtype >

-void BicTest2 ( pfHash hash, const int reps, bool verbose = true )

-{

-  const int keybytes = sizeof(keytype);

-  const int keybits = keybytes * 8;

-  const int hashbytes = sizeof(hashtype);

-  const int hashbits = hashbytes * 8;

-

-  Rand r(11938);

-

-  double maxBias = 0;

-  int maxK = 0;

-  int maxA = 0;

-  int maxB = 0;

-

-  keytype key;

-  hashtype h1,h2;

-

-  for(int out1 = 0; out1 < hashbits-1; out1++)

-  for(int out2 = out1+1; out2 < hashbits; out2++)

-  {

-    if(verbose) printf("(%3d,%3d) - ",out1,out2);

-

-    for(int keybit = 0; keybit < keybits; keybit++)

-    {

-      int bins[4] = { 0, 0, 0, 0 };

-

-      for(int irep = 0; irep < reps; irep++)

-      {

-        r.rand_p(&key,keybytes);

-        hash(&key,keybytes,0,&h1);

-        flipbit(key,keybit);

-        hash(&key,keybytes,0,&h2);

-

-        hashtype d = h1 ^ h2;

-

-        uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);

-

-        bins[b]++;

-      }

-

-      double bias = 0;

-

-      for(int b = 0; b < 4; b++)

-      {

-        double b2 = double(bins[b]) / double(reps / 2);

-        b2 = fabs(b2 * 2 - 1);

-

-        if(b2 > bias) bias = b2;

-      }

-

-      if(bias > maxBias)

-      {

-        maxBias = bias;

-        maxK = keybit;

-        maxA = out1;

-        maxB = out2;

-      }

-

-      if(verbose) 

-      {

-        if     (bias < 0.05) printf(".");

-        else if(bias < 0.10) printf("o");

-        else if(bias < 0.50) printf("O");

-        else                 printf("X");

-      }

-    }

-

-    // Finished keybit

-

-    if(verbose) printf("\n");

-  }

-

-  printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);

-}

-

-//-----------------------------------------------------------------------------

+//-----------------------------------------------------------------------------
+// Flipping a single bit of a key should cause an "avalanche" of changes in
+// the hash function's output. Ideally, each output bits should flip 50% of
+// the time - if the probability of an output bit flipping is not 50%, that bit
+// is "biased". Too much bias means that patterns applied to the input will
+// cause "echoes" of the patterns in the output, which in turn can cause the
+// hash function to fail to create an even, random distribution of hash values.
+
+
+#pragma once
+
+#include "Types.h"
+#include "Random.h"
+
+#include <vector>
+#include <stdio.h>
+#include <math.h>
+
+// Avalanche fails if a bit is biased by more than 1%
+
+#define AVALANCHE_FAIL 0.01
+
+double maxBias ( std::vector<int> & counts, int reps );
+
+//-----------------------------------------------------------------------------
+
+template < typename keytype, typename hashtype >
+void calcBias ( pfHash hash, std::vector<int> & counts, int reps, Rand & r )
+{
+  const int keybytes = sizeof(keytype);
+  const int hashbytes = sizeof(hashtype);
+
+  const int keybits = keybytes * 8;
+  const int hashbits = hashbytes * 8;
+
+  keytype K;
+  hashtype A,B;
+
+  for(int irep = 0; irep < reps; irep++)
+  {
+    if(irep % (reps/10) == 0) printf(".");
+
+    r.rand_p(&K,keybytes);
+
+    hash(&K,keybytes,0,&A);
+
+    int * cursor = &counts[0];
+
+    for(int iBit = 0; iBit < keybits; iBit++)
+    {
+      flipbit(&K,keybytes,iBit);
+      hash(&K,keybytes,0,&B);
+      flipbit(&K,keybytes,iBit);
+
+      for(int iOut = 0; iOut < hashbits; iOut++)
+      {
+        int bitA = getbit(&A,hashbytes,iOut);
+        int bitB = getbit(&B,hashbytes,iOut);
+
+        (*cursor++) += (bitA ^ bitB);
+      }
+    }
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+template < typename keytype, typename hashtype >
+bool AvalancheTest ( pfHash hash, const int reps )
+{
+  Rand r(48273);
+  
+  const int keybytes = sizeof(keytype);
+  const int hashbytes = sizeof(hashtype);
+
+  const int keybits = keybytes * 8;
+  const int hashbits = hashbytes * 8;
+
+  printf("Testing %3d-bit keys -> %3d-bit hashes, %8d reps",keybits,hashbits,reps);
+
+  //----------
+
+  std::vector<int> bins(keybits*hashbits,0);
+
+  calcBias<keytype,hashtype>(hash,bins,reps,r);
+  
+  //----------
+
+  bool result = true;
+
+  double b = maxBias(bins,reps);
+
+  printf(" worst bias is %f%%",b * 100.0);
+
+  if(b > AVALANCHE_FAIL)
+  {
+    printf(" !!!!! ");
+    result = false;
+  }
+
+  printf("\n");
+
+  return result;
+}
+
+//----------------------------------------------------------------------------
+// Tests the Bit Independence Criteron. Stricter than Avalanche, but slow and
+// not really all that useful.
+
+template< typename keytype, typename hashtype >
+void BicTest ( pfHash hash, const int keybit, const int reps, double & maxBias, int & maxA, int & maxB, bool verbose )
+{
+  Rand r(11938);
+  
+  const int keybytes = sizeof(keytype);
+  const int hashbytes = sizeof(hashtype);
+  const int hashbits = hashbytes * 8;
+
+  std::vector<int> bins(hashbits*hashbits*4,0);
+
+  keytype key;
+  hashtype h1,h2;
+
+  for(int irep = 0; irep < reps; irep++)
+  {
+    if(verbose)
+    {
+      if(irep % (reps/10) == 0) printf(".");
+    }
+
+    r.rand_p(&key,keybytes);
+    hash(&key,keybytes,0,&h1);
+
+    flipbit(key,keybit);
+    hash(&key,keybytes,0,&h2);
+
+    hashtype d = h1 ^ h2;
+
+    for(int out1 = 0; out1 < hashbits; out1++)
+    for(int out2 = 0; out2 < hashbits; out2++)
+    {
+      if(out1 == out2) continue;
+
+      uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
+
+      bins[(out1 * hashbits + out2) * 4 + b]++;
+    }
+  }
+
+  if(verbose) printf("\n");
+
+  maxBias = 0;
+
+  for(int out1 = 0; out1 < hashbits; out1++)
+  {
+    for(int out2 = 0; out2 < hashbits; out2++)
+    {
+      if(out1 == out2)
+      {
+        if(verbose) printf("\\");
+        continue;
+      }
+
+      double bias = 0;
+
+      for(int b = 0; b < 4; b++)
+      {
+        double b2 = double(bins[(out1 * hashbits + out2) * 4 + b]) / double(reps / 2);
+        b2 = fabs(b2 * 2 - 1);
+
+        if(b2 > bias) bias = b2;
+      }
+
+      if(bias > maxBias)
+      {
+        maxBias = bias;
+        maxA = out1;
+        maxB = out2;
+      }
+
+      if(verbose) 
+      {
+        if     (bias < 0.01) printf(".");
+        else if(bias < 0.05) printf("o");
+        else if(bias < 0.33) printf("O");
+        else                 printf("X");
+      }
+    }
+
+    if(verbose) printf("\n");
+  }
+}
+
+//----------
+
+template< typename keytype, typename hashtype >
+bool BicTest ( pfHash hash, const int reps )
+{
+  const int keybytes = sizeof(keytype);
+  const int keybits = keybytes * 8;
+
+  double maxBias = 0;
+  int maxK = 0;
+  int maxA = 0;
+  int maxB = 0;
+
+  for(int i = 0; i < keybits; i++)
+  {
+    if(i % (keybits/10) == 0) printf(".");
+
+    double bias;
+    int a,b;
+    
+    BicTest<keytype,hashtype>(hash,i,reps,bias,a,b,true);
+
+    if(bias > maxBias)
+    {
+      maxBias = bias;
+      maxK = i;
+      maxA = a;
+      maxB = b;
+    }
+  }
+
+  printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
+
+  // Bit independence is harder to pass than avalanche, so we're a bit more lax here.
+
+  bool result = (maxBias < 0.05);
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// BIC test variant - store all intermediate data in a table, draw diagram
+// afterwards (much faster)
+
+template< typename keytype, typename hashtype >
+void BicTest3 ( pfHash hash, const int reps, bool verbose = true )
+{
+  const int keybytes = sizeof(keytype);
+  const int keybits = keybytes * 8;
+  const int hashbytes = sizeof(hashtype);
+  const int hashbits = hashbytes * 8;
+  const int pagesize = hashbits*hashbits*4;
+
+  Rand r(11938);
+
+  double maxBias = 0;
+  int maxK = 0;
+  int maxA = 0;
+  int maxB = 0;
+
+  keytype key;
+  hashtype h1,h2;
+
+  std::vector<int> bins(keybits*pagesize,0);
+
+  for(int keybit = 0; keybit < keybits; keybit++)
+  {
+    if(keybit % (keybits/10) == 0) printf(".");
+
+    int * page = &bins[keybit*pagesize];
+
+    for(int irep = 0; irep < reps; irep++)
+    {
+      r.rand_p(&key,keybytes);
+      hash(&key,keybytes,0,&h1);
+      flipbit(key,keybit);
+      hash(&key,keybytes,0,&h2);
+
+      hashtype d = h1 ^ h2;
+
+      for(int out1 = 0; out1 < hashbits-1; out1++)
+      for(int out2 = out1+1; out2 < hashbits; out2++)
+      {
+        int * b = &page[(out1*hashbits+out2)*4];
+
+        uint32_t x = getbit(d,out1) | (getbit(d,out2) << 1);
+
+        b[x]++;
+      }
+    }
+  }
+
+  printf("\n");
+
+  for(int out1 = 0; out1 < hashbits-1; out1++)
+  {
+    for(int out2 = out1+1; out2 < hashbits; out2++)
+    {
+      if(verbose) printf("(%3d,%3d) - ",out1,out2);
+
+      for(int keybit = 0; keybit < keybits; keybit++)
+      {
+        int * page = &bins[keybit*pagesize];
+        int * bins = &page[(out1*hashbits+out2)*4];
+
+        double bias = 0;
+
+        for(int b = 0; b < 4; b++)
+        {
+          double b2 = double(bins[b]) / double(reps / 2);
+          b2 = fabs(b2 * 2 - 1);
+
+          if(b2 > bias) bias = b2;
+        }
+
+        if(bias > maxBias)
+        {
+          maxBias = bias;
+          maxK = keybit;
+          maxA = out1;
+          maxB = out2;
+        }
+
+        if(verbose) 
+        {
+          if     (bias < 0.01) printf(".");
+          else if(bias < 0.05) printf("o");
+          else if(bias < 0.33) printf("O");
+          else                 printf("X");
+        }
+      }
+
+      // Finished keybit
+
+      if(verbose) printf("\n");
+    }
+
+    if(verbose)
+    {
+      for(int i = 0; i < keybits+12; i++) printf("-");
+      printf("\n");
+    }
+  }
+
+  printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
+}
+
+
+//-----------------------------------------------------------------------------
+// BIC test variant - iterate over output bits, then key bits. No temp storage,
+// but slooooow
+
+template< typename keytype, typename hashtype >
+void BicTest2 ( pfHash hash, const int reps, bool verbose = true )
+{
+  const int keybytes = sizeof(keytype);
+  const int keybits = keybytes * 8;
+  const int hashbytes = sizeof(hashtype);
+  const int hashbits = hashbytes * 8;
+
+  Rand r(11938);
+
+  double maxBias = 0;
+  int maxK = 0;
+  int maxA = 0;
+  int maxB = 0;
+
+  keytype key;
+  hashtype h1,h2;
+
+  for(int out1 = 0; out1 < hashbits-1; out1++)
+  for(int out2 = out1+1; out2 < hashbits; out2++)
+  {
+    if(verbose) printf("(%3d,%3d) - ",out1,out2);
+
+    for(int keybit = 0; keybit < keybits; keybit++)
+    {
+      int bins[4] = { 0, 0, 0, 0 };
+
+      for(int irep = 0; irep < reps; irep++)
+      {
+        r.rand_p(&key,keybytes);
+        hash(&key,keybytes,0,&h1);
+        flipbit(key,keybit);
+        hash(&key,keybytes,0,&h2);
+
+        hashtype d = h1 ^ h2;
+
+        uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
+
+        bins[b]++;
+      }
+
+      double bias = 0;
+
+      for(int b = 0; b < 4; b++)
+      {
+        double b2 = double(bins[b]) / double(reps / 2);
+        b2 = fabs(b2 * 2 - 1);
+
+        if(b2 > bias) bias = b2;
+      }
+
+      if(bias > maxBias)
+      {
+        maxBias = bias;
+        maxK = keybit;
+        maxA = out1;
+        maxB = out2;
+      }
+
+      if(verbose) 
+      {
+        if     (bias < 0.05) printf(".");
+        else if(bias < 0.10) printf("o");
+        else if(bias < 0.50) printf("O");
+        else                 printf("X");
+      }
+    }
+
+    // Finished keybit
+
+    if(verbose) printf("\n");
+  }
+
+  printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
+}
+
+//-----------------------------------------------------------------------------
diff --git a/Bitslice.cpp b/Bitslice.cpp
index 428e355..45a2249 100644
--- a/Bitslice.cpp
+++ b/Bitslice.cpp
@@ -1,127 +1,127 @@
-#include "Bitvec.h"

-#include <vector>

-#include <assert.h>

-

-// handle xnor

-

-typedef std::vector<uint32_t> slice;

-typedef std::vector<slice> slice_vec;

-

-int countbits ( slice & v )

-{

-  int c = 0;

-

-  for(size_t i = 0; i < v.size(); i++)

-  {

-    int d = countbits(v[i]);

-

-    c += d;

-  }

-

-  return c;

-}

-

-int countxor ( slice & a, slice & b )

-{

-  assert(a.size() == b.size());

-

-  int c = 0;

-

-  for(size_t i = 0; i < a.size(); i++)

-  {

-    int d = countbits(a[i] ^ b[i]);

-

-    c += d;

-  }

-

-  return c;

-}

-

-void xoreq ( slice & a, slice & b )

-{

-  assert(a.size() == b.size());

-

-  for(size_t i = 0; i < a.size(); i++)

-  {

-    a[i] ^= b[i];

-  }

-}

-

-//-----------------------------------------------------------------------------

-// Bitslice a hash set

-

-template< typename hashtype >

-void Bitslice ( std::vector<hashtype> & hashes, slice_vec & slices )

-{

-  const int hashbytes = sizeof(hashtype);

-  const int hashbits = hashbytes * 8;

-  const int slicelen = ((int)hashes.size() + 31) / 32;

-

-  slices.clear();

-  slices.resize(hashbits);

-

-  for(int i = 0; i < (int)slices.size(); i++)

-  {

-    slices[i].resize(slicelen,0);

-  }

-

-  for(int j = 0; j < hashbits; j++)

-  {

-    void * sliceblob = &(slices[j][0]);

-

-    for(int i = 0; i < (int)hashes.size(); i++)

-    {

-      int b = getbit(hashes[i],j);

-

-      setbit(sliceblob,slicelen*4,i,b);

-    }

-  }

-}

-

-void FactorSlices ( slice_vec & slices )

-{

-  std::vector<int> counts(slices.size(),0);

-

-  for(size_t i = 0; i < slices.size(); i++)

-  {

-    counts[i] = countbits(slices[i]);

-  }

-

-  bool changed = true;

-

-  while(changed)

-  {

-    int bestA = -1;

-    int bestB = -1;

-

-    for(int j = 0; j < (int)slices.size()-1; j++)

-    {

-      for(int i = j+1; i < (int)slices.size(); i++)

-      {

-        int d = countxor(slices[i],slices[j]);

-

-        if((d < counts[i]) && (d < counts[j]))

-        {

-          if(counts[i] < counts[j])

-          {

-            bestA = j;

-            bestB = i;

-          }

-        }

-        else if(d < counts[i])

-        {

-          //bestA = 

-        }

-      }

-    }

-  }

-}

-

-

-void foo ( void )

-{

-  slice a;

-  slice_vec b;

-

-  Bitslice(a,b);

+#include "Bitvec.h"
+#include <vector>
+#include <assert.h>
+
+// handle xnor
+
+typedef std::vector<uint32_t> slice;
+typedef std::vector<slice> slice_vec;
+
+int countbits ( slice & v )
+{
+  int c = 0;
+
+  for(size_t i = 0; i < v.size(); i++)
+  {
+    int d = countbits(v[i]);
+
+    c += d;
+  }
+
+  return c;
+}
+
+int countxor ( slice & a, slice & b )
+{
+  assert(a.size() == b.size());
+
+  int c = 0;
+
+  for(size_t i = 0; i < a.size(); i++)
+  {
+    int d = countbits(a[i] ^ b[i]);
+
+    c += d;
+  }
+
+  return c;
+}
+
+void xoreq ( slice & a, slice & b )
+{
+  assert(a.size() == b.size());
+
+  for(size_t i = 0; i < a.size(); i++)
+  {
+    a[i] ^= b[i];
+  }
+}
+
+//-----------------------------------------------------------------------------
+// Bitslice a hash set
+
+template< typename hashtype >
+void Bitslice ( std::vector<hashtype> & hashes, slice_vec & slices )
+{
+  const int hashbytes = sizeof(hashtype);
+  const int hashbits = hashbytes * 8;
+  const int slicelen = ((int)hashes.size() + 31) / 32;
+
+  slices.clear();
+  slices.resize(hashbits);
+
+  for(int i = 0; i < (int)slices.size(); i++)
+  {
+    slices[i].resize(slicelen,0);
+  }
+
+  for(int j = 0; j < hashbits; j++)
+  {
+    void * sliceblob = &(slices[j][0]);
+
+    for(int i = 0; i < (int)hashes.size(); i++)
+    {
+      int b = getbit(hashes[i],j);
+
+      setbit(sliceblob,slicelen*4,i,b);
+    }
+  }
+}
+
+void FactorSlices ( slice_vec & slices )
+{
+  std::vector<int> counts(slices.size(),0);
+
+  for(size_t i = 0; i < slices.size(); i++)
+  {
+    counts[i] = countbits(slices[i]);
+  }
+
+  bool changed = true;
+
+  while(changed)
+  {
+    int bestA = -1;
+    int bestB = -1;
+
+    for(int j = 0; j < (int)slices.size()-1; j++)
+    {
+      for(int i = j+1; i < (int)slices.size(); i++)
+      {
+        int d = countxor(slices[i],slices[j]);
+
+        if((d < counts[i]) && (d < counts[j]))
+        {
+          if(counts[i] < counts[j])
+          {
+            bestA = j;
+            bestB = i;
+          }
+        }
+        else if(d < counts[i])
+        {
+          //bestA = 
+        }
+      }
+    }
+  }
+}
+
+
+void foo ( void )
+{
+  slice a;
+  slice_vec b;
+
+  Bitslice(a,b);
 }
\ No newline at end of file
diff --git a/Bitvec.cpp b/Bitvec.cpp
index 6c74bcc..4855f8f 100644
--- a/Bitvec.cpp
+++ b/Bitvec.cpp
@@ -1,757 +1,757 @@
-#include "Bitvec.h"

-

-#include "Random.h"

-

-#include <assert.h>

-#include <stdio.h>

-

-#ifndef DEBUG

-#undef assert

-void assert ( bool )

-{

-}

-#endif

-

-//----------------------------------------------------------------------------

-

-void printbits ( const void * blob, int len )

-{

-  const uint8_t * data = (const uint8_t *)blob;

-

-  printf("[");

-  for(int i = 0; i < len; i++)

-  {

-    unsigned char byte = data[i];

-

-    int hi = (byte >> 4);

-    int lo = (byte & 0xF);

-

-    if(hi) printf("%01x",hi);

-    else   printf(".");

-

-    if(lo) printf("%01x",lo);

-    else   printf(".");

-

-    if(i != len-1) printf(" ");

-  }

-  printf("]");

-}

-

-void printbits2 ( const uint8_t * k, int nbytes )

-{

-  printf("[");

-

-  for(int i = nbytes-1; i >= 0; i--)

-  {

-    uint8_t b = k[i];

-

-    for(int j = 7; j >= 0; j--)

-    {

-      uint8_t c = (b & (1 << j)) ? '#' : ' ';

-

-      putc(c,stdout);

-    }

-  }

-  printf("]");

-}

-

-void printhex32 ( const void * blob, int len )

-{

-  assert((len & 3) == 0);

-

-  uint32_t * d = (uint32_t*)blob;

-

-  printf("{ ");

-

-  for(int i = 0; i < len/4; i++) 

-  {

-    printf("0x%08x, ",d[i]);

-  }

-

-  printf("}");

-}

-

-void printbytes ( const void * blob, int len )

-{

-  uint8_t * d = (uint8_t*)blob;

-

-  printf("{ ");

-

-  for(int i = 0; i < len; i++)

-  {

-    printf("0x%02x, ",d[i]);

-  }

-

-  printf(" };");

-}

-

-void printbytes2 ( const void * blob, int len )

-{

-  uint8_t * d = (uint8_t*)blob;

-

-  for(int i = 0; i < len; i++)

-  {

-    printf("%02x ",d[i]);

-  }

-}

-

-//-----------------------------------------------------------------------------

-// Bit-level manipulation

-

-// These two are from the "Bit Twiddling Hacks" webpage

-

-uint32_t popcount ( uint32_t v )

-{

-	v = v - ((v >> 1) & 0x55555555);                    // reuse input as temporary

-	v = (v & 0x33333333) + ((v >> 2) & 0x33333333);     // temp

-	uint32_t c = ((v + ((v >> 4) & 0xF0F0F0F)) * 0x1010101) >> 24; // count

-

-	return c;

-}

-

-uint32_t parity ( uint32_t v )

-{

-	v ^= v >> 1;

-	v ^= v >> 2;

-	v = (v & 0x11111111U) * 0x11111111U;

-	return (v >> 28) & 1;

-}

-

-//-----------------------------------------------------------------------------

-

-uint32_t getbit ( const void * block, int len, uint32_t bit )

-{

-  uint8_t * b = (uint8_t*)block;

-

-  int byte = bit >> 3;

-  bit = bit & 0x7;

-  

-  if(byte < len) return (b[byte] >> bit) & 1;

-

-  return 0;

-}

-

-uint32_t getbit_wrap ( const void * block, int len, uint32_t bit )

-{

-  uint8_t * b = (uint8_t*)block;

-

-  int byte = bit >> 3;

-  bit = bit & 0x7;

-  

-  byte %= len;

-    

-  return (b[byte] >> bit) & 1;

-}

-

-void setbit ( void * block, int len, uint32_t bit )

-{

-  uint8_t * b = (uint8_t*)block;

-

-  int byte = bit >> 3;

-  bit = bit & 0x7;

-  

-  if(byte < len) b[byte] |= (1 << bit);

-}

-

-void setbit ( void * block, int len, uint32_t bit, uint32_t val )

-{

-  val ? setbit(block,len,bit) : clearbit(block,len,bit);

-}

-

-void clearbit ( void * block, int len, uint32_t bit )

-{

-  uint8_t * b = (uint8_t*)block;

-

-  int byte = bit >> 3;

-  bit = bit & 0x7;

-  

-  if(byte < len) b[byte] &= ~(1 << bit);

-}

-

-void flipbit ( void * block, int len, uint32_t bit )

-{

-  uint8_t * b = (uint8_t*)block;

-

-  int byte = bit >> 3;

-  bit = bit & 0x7;

-  

-  if(byte < len) b[byte] ^= (1 << bit);

-}

-

-// from the "Bit Twiddling Hacks" webpage

-

-int countbits ( uint32_t v )

-{

-  v = v - ((v >> 1) & 0x55555555);                    // reuse input as temporary

-  v = (v & 0x33333333) + ((v >> 2) & 0x33333333);     // temp

-  int c = ((v + ((v >> 4) & 0xF0F0F0F)) * 0x1010101) >> 24; // count

-

-  return c;

-}

-

-//-----------------------------------------------------------------------------

-

-void lshift1 ( void * blob, int len, int c )

-{

-  int nbits = len*8;

-

-  for(int i = nbits-1; i >= 0; i--)

-  {

-    setbit(blob,len,i,getbit(blob,len,i-c));

-  }

-}

-

-

-void lshift8 ( void * blob, int nbytes, int c )

-{

-  uint8_t * k = (uint8_t*)blob;

-

-  if(c == 0) return;

-

-  int b = c >> 3;

-  c &= 7;

-

-  for(int i = nbytes-1; i >= b; i--)

-  {

-    k[i] = k[i-b];

-  }

-

-  for(int i = b-1; i >= 0; i--)

-  {

-    k[i] = 0;

-  }

-

-  if(c == 0) return;

-

-  for(int i = nbytes-1; i >= 0; i--)

-  {

-    uint8_t a = k[i];

-    uint8_t b = (i == 0) ? 0 : k[i-1];

-

-    k[i] = (a << c) | (b >> (8-c));

-  }

-}

-

-void lshift32 ( void * blob, int len, int c )

-{

-  assert((len & 3) == 0);

-

-  int nbytes  = len;

-  int ndwords = nbytes / 4;

-

-  uint32_t * k = reinterpret_cast<uint32_t*>(blob);

-

-  if(c == 0) return;

-

-  //----------

-

-  int b = c / 32;

-  c &= (32-1);

-

-  for(int i = ndwords-1; i >= b; i--)

-  {

-    k[i] = k[i-b];

-  }

-

-  for(int i = b-1; i >= 0; i--)

-  {

-    k[i] = 0;

-  }

-

-  if(c == 0) return;

-

-  for(int i = ndwords-1; i >= 0; i--)

-  {

-    uint32_t a = k[i];

-    uint32_t b = (i == 0) ? 0 : k[i-1];

-

-    k[i] = (a << c) | (b >> (32-c));

-  }

-}

-

-//-----------------------------------------------------------------------------

-

-void rshift1 ( void * blob, int len, int c )

-{

-  int nbits = len*8;

-

-  for(int i = 0; i < nbits; i++)

-  {

-    setbit(blob,len,i,getbit(blob,len,i+c));

-  }

-}

-

-void rshift8 ( void * blob, int nbytes, int c )

-{

-  uint8_t * k = (uint8_t*)blob;

-

-  if(c == 0) return;

-

-  int b = c >> 3;

-  c &= 7;

-

-  for(int i = 0; i < nbytes-b; i++)

-  {

-    k[i] = k[i+b];

-  }

-

-  for(int i = nbytes-b; i < nbytes; i++)

-  {

-    k[i] = 0;

-  }

-

-  if(c == 0) return;

-

-  for(int i = 0; i < nbytes; i++)

-  {

-    uint8_t a = (i == nbytes-1) ? 0 : k[i+1];

-    uint8_t b = k[i];

-

-    k[i] = (a << (8-c) ) | (b >> c);

-  }

-}

-

-void rshift32 ( void * blob, int len, int c )

-{

-  assert((len & 3) == 0);

-

-  int nbytes  = len;

-  int ndwords = nbytes / 4;

-

-  uint32_t * k = (uint32_t*)blob;

-

-  //----------

-

-  if(c == 0) return;

-

-  int b = c / 32;

-  c &= (32-1);

-

-  for(int i = 0; i < ndwords-b; i++)

-  {

-    k[i] = k[i+b];

-  }

-

-  for(int i = ndwords-b; i < ndwords; i++)

-  {

-    k[i] = 0;

-  }

-

-  if(c == 0) return;

-

-  for(int i = 0; i < ndwords; i++)

-  {

-    uint32_t a = (i == ndwords-1) ? 0 : k[i+1];

-    uint32_t b = k[i];

-

-    k[i] = (a << (32-c) ) | (b >> c);

-  }

-}

-

-//-----------------------------------------------------------------------------

-

-void lrot1 ( void * blob, int len, int c )

-{

-  int nbits = len * 8;

-

-  for(int i = 0; i < c; i++)

-  {

-    uint32_t bit = getbit(blob,len,nbits-1);

-

-    lshift1(blob,len,1);

-

-    setbit(blob,len,0,bit);

-  }

-}

-

-void lrot8 ( void * blob, int len, int c )

-{

-  int nbytes  = len;

-

-  uint8_t * k = (uint8_t*)blob;

-

-  if(c == 0) return;

-

-  //----------

-

-  int b = c / 8;

-  c &= (8-1);

-

-  for(int j = 0; j < b; j++)

-  {

-    uint8_t t = k[nbytes-1];

-

-    for(int i = nbytes-1; i > 0; i--)

-    {

-      k[i] = k[i-1];

-    }

-

-    k[0] = t;

-  }

-

-  uint8_t t = k[nbytes-1];

-

-  if(c == 0) return;

-

-  for(int i = nbytes-1; i >= 0; i--)

-  {

-    uint8_t a = k[i];

-    uint8_t b = (i == 0) ? t : k[i-1];

-

-    k[i] = (a << c) | (b >> (8-c));

-  }

-}

-

-void lrot32 ( void * blob, int len, int c )

-{

-  assert((len & 3) == 0);

-

-  int nbytes  = len;

-  int ndwords = nbytes/4;

-

-  uint32_t * k = (uint32_t*)blob;

-

-  if(c == 0) return;

-

-  //----------

-

-  int b = c / 32;

-  c &= (32-1);

-

-  for(int j = 0; j < b; j++)

-  {

-    uint32_t t = k[ndwords-1];

-

-    for(int i = ndwords-1; i > 0; i--)

-    {

-      k[i] = k[i-1];

-    }

-

-    k[0] = t;

-  }

-

-  uint32_t t = k[ndwords-1];

-

-  if(c == 0) return;

-

-  for(int i = ndwords-1; i >= 0; i--)

-  {

-    uint32_t a = k[i];

-    uint32_t b = (i == 0) ? t : k[i-1];

-

-    k[i] = (a << c) | (b >> (32-c));

-  }

-}

-

-//-----------------------------------------------------------------------------

-

-void rrot1 ( void * blob, int len, int c )

-{

-  int nbits = len * 8;

-

-  for(int i = 0; i < c; i++)

-  {

-    uint32_t bit = getbit(blob,len,0);

-

-    rshift1(blob,len,1);

-

-    setbit(blob,len,nbits-1,bit);

-  }

-}

-

-void rrot8 ( void * blob, int len, int c )

-{

-  int nbytes  = len;

-

-  uint8_t * k = (uint8_t*)blob;

-

-  if(c == 0) return;

-

-  //----------

-

-  int b = c / 8;

-  c &= (8-1);

-

-  for(int j = 0; j < b; j++)

-  {

-    uint8_t t = k[0];

-

-    for(int i = 0; i < nbytes-1; i++)

-    {

-      k[i] = k[i+1];

-    }

-

-    k[nbytes-1] = t;

-  }

-

-  if(c == 0) return;

-

-  //----------

-

-  uint8_t t = k[0];

-

-  for(int i = 0; i < nbytes; i++)

-  {

-    uint8_t a = (i == nbytes-1) ? t : k[i+1];

-    uint8_t b = k[i];

-

-    k[i] = (a << (8-c)) | (b >> c);

-  }

-}

-

-void rrot32 ( void * blob, int len, int c )

-{

-  assert((len & 3) == 0);

-

-  int nbytes  = len;

-  int ndwords = nbytes/4;

-

-  uint32_t * k = (uint32_t*)blob;

-

-  if(c == 0) return;

-

-  //----------

-

-  int b = c / 32;

-  c &= (32-1);

-

-  for(int j = 0; j < b; j++)

-  {

-    uint32_t t = k[0];

-

-    for(int i = 0; i < ndwords-1; i++)

-    {

-      k[i] = k[i+1];

-    }

-

-    k[ndwords-1] = t;

-  }

-

-  if(c == 0) return;

-

-  //----------

-

-  uint32_t t = k[0];

-

-  for(int i = 0; i < ndwords; i++)

-  {

-    uint32_t a = (i == ndwords-1) ? t : k[i+1];

-    uint32_t b = k[i];

-

-    k[i] = (a << (32-c)) | (b >> c);

-  }

-}

-

-//-----------------------------------------------------------------------------

-

-uint32_t window1 ( void * blob, int len, int start, int count )

-{

-  int nbits = len*8;

-  start %= nbits;

-

-  uint32_t t = 0;

-

-  for(int i = 0; i < count; i++)

-  {

-    setbit(&t,sizeof(t),i, getbit_wrap(blob,len,start+i));

-  }

-

-  return t;

-}

-

-uint32_t window8 ( void * blob, int len, int start, int count )

-{

-  int nbits = len*8;

-  start %= nbits;

-

-  uint32_t t = 0;

-  uint8_t * k = (uint8_t*)blob;

-

-  if(count == 0) return 0;

-

-  int c = start & (8-1);

-  int d = start / 8;

-

-  for(int i = 0; i < 4; i++)

-  {

-    int ia = (i + d + 1) % len;

-    int ib = (i + d + 0) % len;

-

-    uint32_t a = k[ia];

-    uint32_t b = k[ib];

-    

-    uint32_t m = (a << (8-c)) | (b >> c);

-

-    t |= (m << (8*i));

-

-  }

-

-  t &= ((1 << count)-1);

-

-  return t;

-}

-

-uint32_t window32 ( void * blob, int len, int start, int count )

-{

-  int nbits = len*8;

-  start %= nbits;

-

-  assert((len & 3) == 0);

-

-  int ndwords = len / 4;

-

-  uint32_t * k = (uint32_t*)blob;

-

-  if(count == 0) return 0;

-

-  int c = start & (32-1);

-  int d = start / 32;

-

-  if(c == 0) return (k[d] & ((1 << count) - 1));

-

-  int ia = (d + 1) % ndwords;

-  int ib = (d + 0) % ndwords;

-

-  uint32_t a = k[ia];

-  uint32_t b = k[ib];

-  

-  uint32_t t = (a << (32-c)) | (b >> c);

-

-  t &= ((1 << count)-1);

-

-  return t;

-}

-

-//-----------------------------------------------------------------------------

-

-bool test_shift ( void )

-{

-  Rand r(1123);

-

-  int nbits   = 64;

-  int nbytes  = nbits / 8;

-  int reps = 10000;

-

-  for(int j = 0; j < reps; j++)

-  {

-    if(j % (reps/10) == 0) printf(".");

-

-    uint64_t a = r.rand_u64();

-    uint64_t b;

-

-    for(int i = 0; i < nbits; i++)

-    {

-      b = a; lshift1  (&b,nbytes,i);  assert(b == (a << i));

-      b = a; lshift8  (&b,nbytes,i);  assert(b == (a << i));

-      b = a; lshift32 (&b,nbytes,i);  assert(b == (a << i));

-

-      b = a; rshift1  (&b,nbytes,i);  assert(b == (a >> i));

-      b = a; rshift8  (&b,nbytes,i);  assert(b == (a >> i));

-      b = a; rshift32 (&b,nbytes,i);  assert(b == (a >> i));

-

-      b = a; lrot1    (&b,nbytes,i);  assert(b == ROTL64(a,i));

-      b = a; lrot8    (&b,nbytes,i);  assert(b == ROTL64(a,i));

-      b = a; lrot32   (&b,nbytes,i);  assert(b == ROTL64(a,i));

-

-      b = a; rrot1    (&b,nbytes,i);  assert(b == ROTR64(a,i));

-      b = a; rrot8    (&b,nbytes,i);  assert(b == ROTR64(a,i));

-      b = a; rrot32   (&b,nbytes,i);  assert(b == ROTR64(a,i));

-    }

-  }

-

-  printf("PASS\n");

-  return true;

-}

-

-//-----------------------------------------------------------------------------

-

-template < int nbits >

-bool test_window2 ( void )

-{

-  Rand r(83874);

-  

-  struct keytype

-  {

-    uint8_t bytes[nbits/8];

-  };

-

-  int nbytes = nbits / 8;

-  int reps = 10000;

-

-  for(int j = 0; j < reps; j++)

-  {

-    if(j % (reps/10) == 0) printf(".");

-

-    keytype k;

-

-    r.rand_p(&k,nbytes);

-

-    for(int start = 0; start < nbits; start++)

-    {

-      for(int count = 0; count < 32; count++)

-      {

-        uint32_t a = window1(&k,nbytes,start,count);

-        uint32_t b = window8(&k,nbytes,start,count);

-        uint32_t c = window(&k,nbytes,start,count);

-

-        assert(a == b);

-        assert(a == c);

-      }

-    }

-  }

-

-  printf("PASS %d\n",nbits);

-

-  return true;

-}

-

-bool test_window ( void )

-{

-  Rand r(48402);

-  

-  int reps = 10000;

-

-  for(int j = 0; j < reps; j++)

-  {

-    if(j % (reps/10) == 0) printf(".");

-

-    int nbits   = 64;

-    int nbytes  = nbits / 8;

-

-    uint64_t x = r.rand_u64();

-

-    for(int start = 0; start < nbits; start++)

-    {

-      for(int count = 0; count < 32; count++)

-      {

-        uint32_t a = (uint32_t)ROTR64(x,start);

-        a &= ((1 << count)-1);

-        

-        uint32_t b = window1 (&x,nbytes,start,count);

-        uint32_t c = window8 (&x,nbytes,start,count);

-        uint32_t d = window32(&x,nbytes,start,count);

-        uint32_t e = window  (x,start,count);

-

-        assert(a == b);

-        assert(a == c);

-        assert(a == d);

-        assert(a == e);

-      }

-    }

-  }

-

-  printf("PASS 64\n");

-

-  test_window2<8>();

-  test_window2<16>();

-  test_window2<24>();

-  test_window2<32>();

-  test_window2<40>();

-  test_window2<48>();

-  test_window2<56>();

-  test_window2<64>();

-

-  return true;

-}

-

-//-----------------------------------------------------------------------------

+#include "Bitvec.h"
+
+#include "Random.h"
+
+#include <assert.h>
+#include <stdio.h>
+
+#ifndef DEBUG
+#undef assert
+void assert ( bool )
+{
+}
+#endif
+
+//----------------------------------------------------------------------------
+
+void printbits ( const void * blob, int len )
+{
+  const uint8_t * data = (const uint8_t *)blob;
+
+  printf("[");
+  for(int i = 0; i < len; i++)
+  {
+    unsigned char byte = data[i];
+
+    int hi = (byte >> 4);
+    int lo = (byte & 0xF);
+
+    if(hi) printf("%01x",hi);
+    else   printf(".");
+
+    if(lo) printf("%01x",lo);
+    else   printf(".");
+
+    if(i != len-1) printf(" ");
+  }
+  printf("]");
+}
+
+void printbits2 ( const uint8_t * k, int nbytes )
+{
+  printf("[");
+
+  for(int i = nbytes-1; i >= 0; i--)
+  {
+    uint8_t b = k[i];
+
+    for(int j = 7; j >= 0; j--)
+    {
+      uint8_t c = (b & (1 << j)) ? '#' : ' ';
+
+      putc(c,stdout);
+    }
+  }
+  printf("]");
+}
+
+void printhex32 ( const void * blob, int len )
+{
+  assert((len & 3) == 0);
+
+  uint32_t * d = (uint32_t*)blob;
+
+  printf("{ ");
+
+  for(int i = 0; i < len/4; i++) 
+  {
+    printf("0x%08x, ",d[i]);
+  }
+
+  printf("}");
+}
+
+void printbytes ( const void * blob, int len )
+{
+  uint8_t * d = (uint8_t*)blob;
+
+  printf("{ ");
+
+  for(int i = 0; i < len; i++)
+  {
+    printf("0x%02x, ",d[i]);
+  }
+
+  printf(" };");
+}
+
+void printbytes2 ( const void * blob, int len )
+{
+  uint8_t * d = (uint8_t*)blob;
+
+  for(int i = 0; i < len; i++)
+  {
+    printf("%02x ",d[i]);
+  }
+}
+
+//-----------------------------------------------------------------------------
+// Bit-level manipulation
+
+// These two are from the "Bit Twiddling Hacks" webpage
+
+uint32_t popcount ( uint32_t v )
+{
+	v = v - ((v >> 1) & 0x55555555);                    // reuse input as temporary
+	v = (v & 0x33333333) + ((v >> 2) & 0x33333333);     // temp
+	uint32_t c = ((v + ((v >> 4) & 0xF0F0F0F)) * 0x1010101) >> 24; // count
+
+	return c;
+}
+
+uint32_t parity ( uint32_t v )
+{
+	v ^= v >> 1;
+	v ^= v >> 2;
+	v = (v & 0x11111111U) * 0x11111111U;
+	return (v >> 28) & 1;
+}
+
+//-----------------------------------------------------------------------------
+
+uint32_t getbit ( const void * block, int len, uint32_t bit )
+{
+  uint8_t * b = (uint8_t*)block;
+
+  int byte = bit >> 3;
+  bit = bit & 0x7;
+  
+  if(byte < len) return (b[byte] >> bit) & 1;
+
+  return 0;
+}
+
+uint32_t getbit_wrap ( const void * block, int len, uint32_t bit )
+{
+  uint8_t * b = (uint8_t*)block;
+
+  int byte = bit >> 3;
+  bit = bit & 0x7;
+  
+  byte %= len;
+    
+  return (b[byte] >> bit) & 1;
+}
+
+void setbit ( void * block, int len, uint32_t bit )
+{
+  uint8_t * b = (uint8_t*)block;
+
+  int byte = bit >> 3;
+  bit = bit & 0x7;
+  
+  if(byte < len) b[byte] |= (1 << bit);
+}
+
+void setbit ( void * block, int len, uint32_t bit, uint32_t val )
+{
+  val ? setbit(block,len,bit) : clearbit(block,len,bit);
+}
+
+void clearbit ( void * block, int len, uint32_t bit )
+{
+  uint8_t * b = (uint8_t*)block;
+
+  int byte = bit >> 3;
+  bit = bit & 0x7;
+  
+  if(byte < len) b[byte] &= ~(1 << bit);
+}
+
+void flipbit ( void * block, int len, uint32_t bit )
+{
+  uint8_t * b = (uint8_t*)block;
+
+  int byte = bit >> 3;
+  bit = bit & 0x7;
+  
+  if(byte < len) b[byte] ^= (1 << bit);
+}
+
+// from the "Bit Twiddling Hacks" webpage
+
+int countbits ( uint32_t v )
+{
+  v = v - ((v >> 1) & 0x55555555);                    // reuse input as temporary
+  v = (v & 0x33333333) + ((v >> 2) & 0x33333333);     // temp
+  int c = ((v + ((v >> 4) & 0xF0F0F0F)) * 0x1010101) >> 24; // count
+
+  return c;
+}
+
+//-----------------------------------------------------------------------------
+
+void lshift1 ( void * blob, int len, int c )
+{
+  int nbits = len*8;
+
+  for(int i = nbits-1; i >= 0; i--)
+  {
+    setbit(blob,len,i,getbit(blob,len,i-c));
+  }
+}
+
+
+void lshift8 ( void * blob, int nbytes, int c )
+{
+  uint8_t * k = (uint8_t*)blob;
+
+  if(c == 0) return;
+
+  int b = c >> 3;
+  c &= 7;
+
+  for(int i = nbytes-1; i >= b; i--)
+  {
+    k[i] = k[i-b];
+  }
+
+  for(int i = b-1; i >= 0; i--)
+  {
+    k[i] = 0;
+  }
+
+  if(c == 0) return;
+
+  for(int i = nbytes-1; i >= 0; i--)
+  {
+    uint8_t a = k[i];
+    uint8_t b = (i == 0) ? 0 : k[i-1];
+
+    k[i] = (a << c) | (b >> (8-c));
+  }
+}
+
+void lshift32 ( void * blob, int len, int c )
+{
+  assert((len & 3) == 0);
+
+  int nbytes  = len;
+  int ndwords = nbytes / 4;
+
+  uint32_t * k = reinterpret_cast<uint32_t*>(blob);
+
+  if(c == 0) return;
+
+  //----------
+
+  int b = c / 32;
+  c &= (32-1);
+
+  for(int i = ndwords-1; i >= b; i--)
+  {
+    k[i] = k[i-b];
+  }
+
+  for(int i = b-1; i >= 0; i--)
+  {
+    k[i] = 0;
+  }
+
+  if(c == 0) return;
+
+  for(int i = ndwords-1; i >= 0; i--)
+  {
+    uint32_t a = k[i];
+    uint32_t b = (i == 0) ? 0 : k[i-1];
+
+    k[i] = (a << c) | (b >> (32-c));
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+void rshift1 ( void * blob, int len, int c )
+{
+  int nbits = len*8;
+
+  for(int i = 0; i < nbits; i++)
+  {
+    setbit(blob,len,i,getbit(blob,len,i+c));
+  }
+}
+
+void rshift8 ( void * blob, int nbytes, int c )
+{
+  uint8_t * k = (uint8_t*)blob;
+
+  if(c == 0) return;
+
+  int b = c >> 3;
+  c &= 7;
+
+  for(int i = 0; i < nbytes-b; i++)
+  {
+    k[i] = k[i+b];
+  }
+
+  for(int i = nbytes-b; i < nbytes; i++)
+  {
+    k[i] = 0;
+  }
+
+  if(c == 0) return;
+
+  for(int i = 0; i < nbytes; i++)
+  {
+    uint8_t a = (i == nbytes-1) ? 0 : k[i+1];
+    uint8_t b = k[i];
+
+    k[i] = (a << (8-c) ) | (b >> c);
+  }
+}
+
+void rshift32 ( void * blob, int len, int c )
+{
+  assert((len & 3) == 0);
+
+  int nbytes  = len;
+  int ndwords = nbytes / 4;
+
+  uint32_t * k = (uint32_t*)blob;
+
+  //----------
+
+  if(c == 0) return;
+
+  int b = c / 32;
+  c &= (32-1);
+
+  for(int i = 0; i < ndwords-b; i++)
+  {
+    k[i] = k[i+b];
+  }
+
+  for(int i = ndwords-b; i < ndwords; i++)
+  {
+    k[i] = 0;
+  }
+
+  if(c == 0) return;
+
+  for(int i = 0; i < ndwords; i++)
+  {
+    uint32_t a = (i == ndwords-1) ? 0 : k[i+1];
+    uint32_t b = k[i];
+
+    k[i] = (a << (32-c) ) | (b >> c);
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+void lrot1 ( void * blob, int len, int c )
+{
+  int nbits = len * 8;
+
+  for(int i = 0; i < c; i++)
+  {
+    uint32_t bit = getbit(blob,len,nbits-1);
+
+    lshift1(blob,len,1);
+
+    setbit(blob,len,0,bit);
+  }
+}
+
+void lrot8 ( void * blob, int len, int c )
+{
+  int nbytes  = len;
+
+  uint8_t * k = (uint8_t*)blob;
+
+  if(c == 0) return;
+
+  //----------
+
+  int b = c / 8;
+  c &= (8-1);
+
+  for(int j = 0; j < b; j++)
+  {
+    uint8_t t = k[nbytes-1];
+
+    for(int i = nbytes-1; i > 0; i--)
+    {
+      k[i] = k[i-1];
+    }
+
+    k[0] = t;
+  }
+
+  uint8_t t = k[nbytes-1];
+
+  if(c == 0) return;
+
+  for(int i = nbytes-1; i >= 0; i--)
+  {
+    uint8_t a = k[i];
+    uint8_t b = (i == 0) ? t : k[i-1];
+
+    k[i] = (a << c) | (b >> (8-c));
+  }
+}
+
+void lrot32 ( void * blob, int len, int c )
+{
+  assert((len & 3) == 0);
+
+  int nbytes  = len;
+  int ndwords = nbytes/4;
+
+  uint32_t * k = (uint32_t*)blob;
+
+  if(c == 0) return;
+
+  //----------
+
+  int b = c / 32;
+  c &= (32-1);
+
+  for(int j = 0; j < b; j++)
+  {
+    uint32_t t = k[ndwords-1];
+
+    for(int i = ndwords-1; i > 0; i--)
+    {
+      k[i] = k[i-1];
+    }
+
+    k[0] = t;
+  }
+
+  uint32_t t = k[ndwords-1];
+
+  if(c == 0) return;
+
+  for(int i = ndwords-1; i >= 0; i--)
+  {
+    uint32_t a = k[i];
+    uint32_t b = (i == 0) ? t : k[i-1];
+
+    k[i] = (a << c) | (b >> (32-c));
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+void rrot1 ( void * blob, int len, int c )
+{
+  int nbits = len * 8;
+
+  for(int i = 0; i < c; i++)
+  {
+    uint32_t bit = getbit(blob,len,0);
+
+    rshift1(blob,len,1);
+
+    setbit(blob,len,nbits-1,bit);
+  }
+}
+
+void rrot8 ( void * blob, int len, int c )
+{
+  int nbytes  = len;
+
+  uint8_t * k = (uint8_t*)blob;
+
+  if(c == 0) return;
+
+  //----------
+
+  int b = c / 8;
+  c &= (8-1);
+
+  for(int j = 0; j < b; j++)
+  {
+    uint8_t t = k[0];
+
+    for(int i = 0; i < nbytes-1; i++)
+    {
+      k[i] = k[i+1];
+    }
+
+    k[nbytes-1] = t;
+  }
+
+  if(c == 0) return;
+
+  //----------
+
+  uint8_t t = k[0];
+
+  for(int i = 0; i < nbytes; i++)
+  {
+    uint8_t a = (i == nbytes-1) ? t : k[i+1];
+    uint8_t b = k[i];
+
+    k[i] = (a << (8-c)) | (b >> c);
+  }
+}
+
+void rrot32 ( void * blob, int len, int c )
+{
+  assert((len & 3) == 0);
+
+  int nbytes  = len;
+  int ndwords = nbytes/4;
+
+  uint32_t * k = (uint32_t*)blob;
+
+  if(c == 0) return;
+
+  //----------
+
+  int b = c / 32;
+  c &= (32-1);
+
+  for(int j = 0; j < b; j++)
+  {
+    uint32_t t = k[0];
+
+    for(int i = 0; i < ndwords-1; i++)
+    {
+      k[i] = k[i+1];
+    }
+
+    k[ndwords-1] = t;
+  }
+
+  if(c == 0) return;
+
+  //----------
+
+  uint32_t t = k[0];
+
+  for(int i = 0; i < ndwords; i++)
+  {
+    uint32_t a = (i == ndwords-1) ? t : k[i+1];
+    uint32_t b = k[i];
+
+    k[i] = (a << (32-c)) | (b >> c);
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+uint32_t window1 ( void * blob, int len, int start, int count )
+{
+  int nbits = len*8;
+  start %= nbits;
+
+  uint32_t t = 0;
+
+  for(int i = 0; i < count; i++)
+  {
+    setbit(&t,sizeof(t),i, getbit_wrap(blob,len,start+i));
+  }
+
+  return t;
+}
+
+uint32_t window8 ( void * blob, int len, int start, int count )
+{
+  int nbits = len*8;
+  start %= nbits;
+
+  uint32_t t = 0;
+  uint8_t * k = (uint8_t*)blob;
+
+  if(count == 0) return 0;
+
+  int c = start & (8-1);
+  int d = start / 8;
+
+  for(int i = 0; i < 4; i++)
+  {
+    int ia = (i + d + 1) % len;
+    int ib = (i + d + 0) % len;
+
+    uint32_t a = k[ia];
+    uint32_t b = k[ib];
+    
+    uint32_t m = (a << (8-c)) | (b >> c);
+
+    t |= (m << (8*i));
+
+  }
+
+  t &= ((1 << count)-1);
+
+  return t;
+}
+
+uint32_t window32 ( void * blob, int len, int start, int count )
+{
+  int nbits = len*8;
+  start %= nbits;
+
+  assert((len & 3) == 0);
+
+  int ndwords = len / 4;
+
+  uint32_t * k = (uint32_t*)blob;
+
+  if(count == 0) return 0;
+
+  int c = start & (32-1);
+  int d = start / 32;
+
+  if(c == 0) return (k[d] & ((1 << count) - 1));
+
+  int ia = (d + 1) % ndwords;
+  int ib = (d + 0) % ndwords;
+
+  uint32_t a = k[ia];
+  uint32_t b = k[ib];
+  
+  uint32_t t = (a << (32-c)) | (b >> c);
+
+  t &= ((1 << count)-1);
+
+  return t;
+}
+
+//-----------------------------------------------------------------------------
+
+bool test_shift ( void )
+{
+  Rand r(1123);
+
+  int nbits   = 64;
+  int nbytes  = nbits / 8;
+  int reps = 10000;
+
+  for(int j = 0; j < reps; j++)
+  {
+    if(j % (reps/10) == 0) printf(".");
+
+    uint64_t a = r.rand_u64();
+    uint64_t b;
+
+    for(int i = 0; i < nbits; i++)
+    {
+      b = a; lshift1  (&b,nbytes,i);  assert(b == (a << i));
+      b = a; lshift8  (&b,nbytes,i);  assert(b == (a << i));
+      b = a; lshift32 (&b,nbytes,i);  assert(b == (a << i));
+
+      b = a; rshift1  (&b,nbytes,i);  assert(b == (a >> i));
+      b = a; rshift8  (&b,nbytes,i);  assert(b == (a >> i));
+      b = a; rshift32 (&b,nbytes,i);  assert(b == (a >> i));
+
+      b = a; lrot1    (&b,nbytes,i);  assert(b == ROTL64(a,i));
+      b = a; lrot8    (&b,nbytes,i);  assert(b == ROTL64(a,i));
+      b = a; lrot32   (&b,nbytes,i);  assert(b == ROTL64(a,i));
+
+      b = a; rrot1    (&b,nbytes,i);  assert(b == ROTR64(a,i));
+      b = a; rrot8    (&b,nbytes,i);  assert(b == ROTR64(a,i));
+      b = a; rrot32   (&b,nbytes,i);  assert(b == ROTR64(a,i));
+    }
+  }
+
+  printf("PASS\n");
+  return true;
+}
+
+//-----------------------------------------------------------------------------
+
+template < int nbits >
+bool test_window2 ( void )
+{
+  Rand r(83874);
+  
+  struct keytype
+  {
+    uint8_t bytes[nbits/8];
+  };
+
+  int nbytes = nbits / 8;
+  int reps = 10000;
+
+  for(int j = 0; j < reps; j++)
+  {
+    if(j % (reps/10) == 0) printf(".");
+
+    keytype k;
+
+    r.rand_p(&k,nbytes);
+
+    for(int start = 0; start < nbits; start++)
+    {
+      for(int count = 0; count < 32; count++)
+      {
+        uint32_t a = window1(&k,nbytes,start,count);
+        uint32_t b = window8(&k,nbytes,start,count);
+        uint32_t c = window(&k,nbytes,start,count);
+
+        assert(a == b);
+        assert(a == c);
+      }
+    }
+  }
+
+  printf("PASS %d\n",nbits);
+
+  return true;
+}
+
+bool test_window ( void )
+{
+  Rand r(48402);
+  
+  int reps = 10000;
+
+  for(int j = 0; j < reps; j++)
+  {
+    if(j % (reps/10) == 0) printf(".");
+
+    int nbits   = 64;
+    int nbytes  = nbits / 8;
+
+    uint64_t x = r.rand_u64();
+
+    for(int start = 0; start < nbits; start++)
+    {
+      for(int count = 0; count < 32; count++)
+      {
+        uint32_t a = (uint32_t)ROTR64(x,start);
+        a &= ((1 << count)-1);
+        
+        uint32_t b = window1 (&x,nbytes,start,count);
+        uint32_t c = window8 (&x,nbytes,start,count);
+        uint32_t d = window32(&x,nbytes,start,count);
+        uint32_t e = window  (x,start,count);
+
+        assert(a == b);
+        assert(a == c);
+        assert(a == d);
+        assert(a == e);
+      }
+    }
+  }
+
+  printf("PASS 64\n");
+
+  test_window2<8>();
+  test_window2<16>();
+  test_window2<24>();
+  test_window2<32>();
+  test_window2<40>();
+  test_window2<48>();
+  test_window2<56>();
+  test_window2<64>();
+
+  return true;
+}
+
+//-----------------------------------------------------------------------------
diff --git a/Bitvec.h b/Bitvec.h
index 8a3a1b0..4d61979 100644
--- a/Bitvec.h
+++ b/Bitvec.h
@@ -1,245 +1,245 @@
-#pragma once

-

-#include "Platform.h"

-

-#include <vector>

-

-//-----------------------------------------------------------------------------

-

-void     printbits   ( const void * blob, int len );

-void     printhex32  ( const void * blob, int len );

-void     printbytes  ( const void * blob, int len );

-void     printbytes2 ( const void * blob, int len );

-

-uint32_t popcount    ( uint32_t v );

-uint32_t parity      ( uint32_t v );

-

-uint32_t getbit      ( const void * blob, int len, uint32_t bit );

-uint32_t getbit_wrap ( const void * blob, int len, uint32_t bit );

-

-void     setbit      ( void * blob, int len, uint32_t bit );

-void     setbit      ( void * blob, int len, uint32_t bit, uint32_t val );

-

-void     clearbit    ( void * blob, int len, uint32_t bit );

-

-void     flipbit     ( void * blob, int len, uint32_t bit );

-

-int      countbits   ( uint32_t v );

-int      countbits   ( std::vector<uint32_t> & v );

-

-int      countbits   ( const void * blob, int len );

-

-void     invert      ( std::vector<uint32_t> & v );

-

-//----------

-

-template< typename T >

-inline uint32_t getbit ( T & blob, uint32_t bit )

-{

-  return getbit(&blob,sizeof(blob),bit);

-}

-

-template<> inline uint32_t getbit ( uint32_t & blob, uint32_t bit ) { return (blob >> (bit & 31)) & 1; }

-template<> inline uint32_t getbit ( uint64_t & blob, uint32_t bit ) { return (blob >> (bit & 63)) & 1; }

-

-//----------

-

-template< typename T >

-inline void setbit ( T & blob, uint32_t bit )

-{

-  return setbit(&blob,sizeof(blob),bit);

-}

-

-template<> inline void setbit ( uint32_t & blob, uint32_t bit ) { blob |= uint32_t(1) << (bit & 31); }

-template<> inline void setbit ( uint64_t & blob, uint32_t bit ) { blob |= uint64_t(1) << (bit & 63); }

-

-//----------

-

-template< typename T >

-inline void flipbit ( T & blob, uint32_t bit )

-{

-  flipbit(&blob,sizeof(blob),bit);

-}

-

-template<> inline void flipbit ( uint32_t & blob, uint32_t bit ) { bit &= 31; blob ^= (uint32_t(1) << bit); }

-template<> inline void flipbit ( uint64_t & blob, uint32_t bit ) { bit &= 63; blob ^= (uint64_t(1) << bit); }

-

-//-----------------------------------------------------------------------------

-// Left and right shift of blobs. The shift(N) versions work on chunks of N

-// bits at a time (faster)

-

-void lshift1  ( void * blob, int len, int c );

-void lshift8  ( void * blob, int len, int c );

-void lshift32 ( void * blob, int len, int c );

-

-void rshift1  ( void * blob, int len, int c );

-void rshift8  ( void * blob, int len, int c );

-void rshift32 ( void * blob, int len, int c );

-

-inline void lshift ( void * blob, int len, int c )

-{

-  if((len & 3) == 0)

-  {

-    lshift32(blob,len,c);

-  }

-  else

-  {

-    lshift8(blob,len,c);

-  }

-}

-

-inline void rshift ( void * blob, int len, int c )

-{

-  if((len & 3) == 0)

-  {

-    rshift32(blob,len,c);

-  }

-  else

-  {

-    rshift8(blob,len,c);

-  }

-}

-

-template < typename T >

-inline void lshift ( T & blob, int c )

-{

-  if((sizeof(T) & 3) == 0)

-  {

-    lshift32(&blob,sizeof(T),c);

-  }

-  else

-  {

-    lshift8(&blob,sizeof(T),c);

-  }

-}

-

-template < typename T >

-inline void rshift ( T & blob, int c )

-{

-  if((sizeof(T) & 3) == 0)

-  {

-    lshift32(&blob,sizeof(T),c);

-  }

-  else

-  {

-    lshift8(&blob,sizeof(T),c);

-  }

-}

-

-template<> inline void lshift ( uint32_t & blob, int c ) { blob <<= c; }

-template<> inline void lshift ( uint64_t & blob, int c ) { blob <<= c; }

-template<> inline void rshift ( uint32_t & blob, int c ) { blob >>= c; }

-template<> inline void rshift ( uint64_t & blob, int c ) { blob >>= c; }

-

-//-----------------------------------------------------------------------------

-// Left and right rotate of blobs. The rot(N) versions work on chunks of N

-// bits at a time (faster)

-

-void lrot1    ( void * blob, int len, int c );

-void lrot8    ( void * blob, int len, int c );

-void lrot32   ( void * blob, int len, int c );

-

-void rrot1    ( void * blob, int len, int c );

-void rrot8    ( void * blob, int len, int c );

-void rrot32   ( void * blob, int len, int c );

-

-inline void lrot ( void * blob, int len, int c )

-{

-  if((len & 3) == 0)

-  {

-    return lrot32(blob,len,c);

-  }

-  else

-  {

-    return lrot8(blob,len,c);

-  }

-}

-

-inline void rrot ( void * blob, int len, int c )

-{

-  if((len & 3) == 0)

-  {

-    return rrot32(blob,len,c);

-  }

-  else

-  {

-    return rrot8(blob,len,c);

-  }

-}

-

-template < typename T >

-inline void lrot ( T & blob, int c )

-{

-  if((sizeof(T) & 3) == 0)

-  {

-    return lrot32(&blob,sizeof(T),c);

-  }

-  else

-  {

-    return lrot8(&blob,sizeof(T),c);

-  }

-}

-

-template < typename T >

-inline void rrot ( T & blob, int c )

-{

-  if((sizeof(T) & 3) == 0)

-  {

-    return rrot32(&blob,sizeof(T),c);

-  }

-  else

-  {

-    return rrot8(&blob,sizeof(T),c);

-  }

-}

-

-template<> inline void lrot ( uint32_t & blob, int c ) { blob = ROTL32(blob,c); }

-template<> inline void lrot ( uint64_t & blob, int c ) { blob = ROTL64(blob,c); }

-template<> inline void rrot ( uint32_t & blob, int c ) { blob = ROTR32(blob,c); }

-template<> inline void rrot ( uint64_t & blob, int c ) { blob = ROTR64(blob,c); }

-

-//-----------------------------------------------------------------------------

-// Bit-windowing functions - select some N-bit subset of the input blob

-

-uint32_t window1  ( void * blob, int len, int start, int count );

-uint32_t window8  ( void * blob, int len, int start, int count );

-uint32_t window32 ( void * blob, int len, int start, int count );

-

-inline uint32_t window ( void * blob, int len, int start, int count )

-{

-  if(len & 3)

-  {

-    return window8(blob,len,start,count);

-  }

-  else

-  {

-    return window32(blob,len,start,count);

-  }

-}

-

-template < typename T >

-inline uint32_t window ( T & blob, int start, int count )

-{

-  if((sizeof(T) & 3) == 0)

-  {

-    return window32(&blob,sizeof(T),start,count);

-  }

-  else

-  {

-    return window8(&blob,sizeof(T),start,count);

-  }

-}

-

-template<> 

-inline uint32_t window ( uint32_t & blob, int start, int count )

-{

-  return ROTR32(blob,start) & ((1<<count)-1);

-}

-

-template<> 

-inline uint32_t window ( uint64_t & blob, int start, int count )

-{

-  return (uint32_t)ROTR64(blob,start) & ((1<<count)-1);

-}

-

-//-----------------------------------------------------------------------------

+#pragma once
+
+#include "Platform.h"
+
+#include <vector>
+
+//-----------------------------------------------------------------------------
+
+void     printbits   ( const void * blob, int len );
+void     printhex32  ( const void * blob, int len );
+void     printbytes  ( const void * blob, int len );
+void     printbytes2 ( const void * blob, int len );
+
+uint32_t popcount    ( uint32_t v );
+uint32_t parity      ( uint32_t v );
+
+uint32_t getbit      ( const void * blob, int len, uint32_t bit );
+uint32_t getbit_wrap ( const void * blob, int len, uint32_t bit );
+
+void     setbit      ( void * blob, int len, uint32_t bit );
+void     setbit      ( void * blob, int len, uint32_t bit, uint32_t val );
+
+void     clearbit    ( void * blob, int len, uint32_t bit );
+
+void     flipbit     ( void * blob, int len, uint32_t bit );
+
+int      countbits   ( uint32_t v );
+int      countbits   ( std::vector<uint32_t> & v );
+
+int      countbits   ( const void * blob, int len );
+
+void     invert      ( std::vector<uint32_t> & v );
+
+//----------
+
+template< typename T >
+inline uint32_t getbit ( T & blob, uint32_t bit )
+{
+  return getbit(&blob,sizeof(blob),bit);
+}
+
+template<> inline uint32_t getbit ( uint32_t & blob, uint32_t bit ) { return (blob >> (bit & 31)) & 1; }
+template<> inline uint32_t getbit ( uint64_t & blob, uint32_t bit ) { return (blob >> (bit & 63)) & 1; }
+
+//----------
+
+template< typename T >
+inline void setbit ( T & blob, uint32_t bit )
+{
+  return setbit(&blob,sizeof(blob),bit);
+}
+
+template<> inline void setbit ( uint32_t & blob, uint32_t bit ) { blob |= uint32_t(1) << (bit & 31); }
+template<> inline void setbit ( uint64_t & blob, uint32_t bit ) { blob |= uint64_t(1) << (bit & 63); }
+
+//----------
+
+template< typename T >
+inline void flipbit ( T & blob, uint32_t bit )
+{
+  flipbit(&blob,sizeof(blob),bit);
+}
+
+template<> inline void flipbit ( uint32_t & blob, uint32_t bit ) { bit &= 31; blob ^= (uint32_t(1) << bit); }
+template<> inline void flipbit ( uint64_t & blob, uint32_t bit ) { bit &= 63; blob ^= (uint64_t(1) << bit); }
+
+//-----------------------------------------------------------------------------
+// Left and right shift of blobs. The shift(N) versions work on chunks of N
+// bits at a time (faster)
+
+void lshift1  ( void * blob, int len, int c );
+void lshift8  ( void * blob, int len, int c );
+void lshift32 ( void * blob, int len, int c );
+
+void rshift1  ( void * blob, int len, int c );
+void rshift8  ( void * blob, int len, int c );
+void rshift32 ( void * blob, int len, int c );
+
+inline void lshift ( void * blob, int len, int c )
+{
+  if((len & 3) == 0)
+  {
+    lshift32(blob,len,c);
+  }
+  else
+  {
+    lshift8(blob,len,c);
+  }
+}
+
+inline void rshift ( void * blob, int len, int c )
+{
+  if((len & 3) == 0)
+  {
+    rshift32(blob,len,c);
+  }
+  else
+  {
+    rshift8(blob,len,c);
+  }
+}
+
+template < typename T >
+inline void lshift ( T & blob, int c )
+{
+  if((sizeof(T) & 3) == 0)
+  {
+    lshift32(&blob,sizeof(T),c);
+  }
+  else
+  {
+    lshift8(&blob,sizeof(T),c);
+  }
+}
+
+template < typename T >
+inline void rshift ( T & blob, int c )
+{
+  if((sizeof(T) & 3) == 0)
+  {
+    lshift32(&blob,sizeof(T),c);
+  }
+  else
+  {
+    lshift8(&blob,sizeof(T),c);
+  }
+}
+
+template<> inline void lshift ( uint32_t & blob, int c ) { blob <<= c; }
+template<> inline void lshift ( uint64_t & blob, int c ) { blob <<= c; }
+template<> inline void rshift ( uint32_t & blob, int c ) { blob >>= c; }
+template<> inline void rshift ( uint64_t & blob, int c ) { blob >>= c; }
+
+//-----------------------------------------------------------------------------
+// Left and right rotate of blobs. The rot(N) versions work on chunks of N
+// bits at a time (faster)
+
+void lrot1    ( void * blob, int len, int c );
+void lrot8    ( void * blob, int len, int c );
+void lrot32   ( void * blob, int len, int c );
+
+void rrot1    ( void * blob, int len, int c );
+void rrot8    ( void * blob, int len, int c );
+void rrot32   ( void * blob, int len, int c );
+
+inline void lrot ( void * blob, int len, int c )
+{
+  if((len & 3) == 0)
+  {
+    return lrot32(blob,len,c);
+  }
+  else
+  {
+    return lrot8(blob,len,c);
+  }
+}
+
+inline void rrot ( void * blob, int len, int c )
+{
+  if((len & 3) == 0)
+  {
+    return rrot32(blob,len,c);
+  }
+  else
+  {
+    return rrot8(blob,len,c);
+  }
+}
+
+template < typename T >
+inline void lrot ( T & blob, int c )
+{
+  if((sizeof(T) & 3) == 0)
+  {
+    return lrot32(&blob,sizeof(T),c);
+  }
+  else
+  {
+    return lrot8(&blob,sizeof(T),c);
+  }
+}
+
+template < typename T >
+inline void rrot ( T & blob, int c )
+{
+  if((sizeof(T) & 3) == 0)
+  {
+    return rrot32(&blob,sizeof(T),c);
+  }
+  else
+  {
+    return rrot8(&blob,sizeof(T),c);
+  }
+}
+
+template<> inline void lrot ( uint32_t & blob, int c ) { blob = ROTL32(blob,c); }
+template<> inline void lrot ( uint64_t & blob, int c ) { blob = ROTL64(blob,c); }
+template<> inline void rrot ( uint32_t & blob, int c ) { blob = ROTR32(blob,c); }
+template<> inline void rrot ( uint64_t & blob, int c ) { blob = ROTR64(blob,c); }
+
+//-----------------------------------------------------------------------------
+// Bit-windowing functions - select some N-bit subset of the input blob
+
+uint32_t window1  ( void * blob, int len, int start, int count );
+uint32_t window8  ( void * blob, int len, int start, int count );
+uint32_t window32 ( void * blob, int len, int start, int count );
+
+inline uint32_t window ( void * blob, int len, int start, int count )
+{
+  if(len & 3)
+  {
+    return window8(blob,len,start,count);
+  }
+  else
+  {
+    return window32(blob,len,start,count);
+  }
+}
+
+template < typename T >
+inline uint32_t window ( T & blob, int start, int count )
+{
+  if((sizeof(T) & 3) == 0)
+  {
+    return window32(&blob,sizeof(T),start,count);
+  }
+  else
+  {
+    return window8(&blob,sizeof(T),start,count);
+  }
+}
+
+template<> 
+inline uint32_t window ( uint32_t & blob, int start, int count )
+{
+  return ROTR32(blob,start) & ((1<<count)-1);
+}
+
+template<> 
+inline uint32_t window ( uint64_t & blob, int start, int count )
+{
+  return (uint32_t)ROTR64(blob,start) & ((1<<count)-1);
+}
+
+//-----------------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2b5df45..d04afdf 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,6 +10,7 @@
   AvalancheTest.cpp
   Bitslice.cpp
   Bitvec.cpp
+  CityTest.cpp
   City.cpp
   crc.cpp
   DifferentialTest.cpp
@@ -24,6 +25,8 @@
   Random.cpp
   sha1.cpp
   SpeedTest.cpp
+  Spooky.cpp
+  SpookyTest.cpp
   Stats.cpp
   SuperFastHash.cpp
   Types.cpp
diff --git a/City.cpp b/City.cpp
index 9043440..4d70dd2 100644
--- a/City.cpp
+++ b/City.cpp
@@ -1,321 +1,465 @@
-// Copyright (c) 2011 Google, Inc.

-//

-// Permission is hereby granted, free of charge, to any person obtaining a copy

-// of this software and associated documentation files (the "Software"), to deal

-// in the Software without restriction, including without limitation the rights

-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell

-// copies of the Software, and to permit persons to whom the Software is

-// furnished to do so, subject to the following conditions:

-//

-// The above copyright notice and this permission notice shall be included in

-// all copies or substantial portions of the Software.

-//

-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN

-// THE SOFTWARE.

-//

-// CityHash Version 1, by Geoff Pike and Jyrki Alakuijala

-//

-// This file provides CityHash64() and related functions.

-//

-// It's probably possible to create even faster hash functions by

-// writing a program that systematically explores some of the space of

-// possible hash functions, by using SIMD instructions, or by

-// compromising on hash quality.

-

-#include "City.h"

-

-#include <algorithm>

-

-using namespace std;

-

-#define UNALIGNED_LOAD64(p) (*(const uint64*)(p))

-#define UNALIGNED_LOAD32(p) (*(const uint32*)(p))

-

-#if !defined(LIKELY)

-#if defined(__GNUC__)

-#define LIKELY(x) (__builtin_expect(!!(x), 1))

-#else

-#define LIKELY(x) (x)

-#endif

-#endif

-

-// Some primes between 2^63 and 2^64 for various uses.

-static const uint64 k0 = 0xc3a5c85c97cb3127ULL;

-static const uint64 k1 = 0xb492b66fbe98f273ULL;

-static const uint64 k2 = 0x9ae16a3b2f90404fULL;

-static const uint64 k3 = 0xc949d7c7509e6557ULL;

-

-// Bitwise right rotate.  Normally this will compile to a single

-// instruction, especially if the shift is a manifest constant.

-static uint64 Rotate(uint64 val, int shift) {

-  // Avoid shifting by 64: doing so yields an undefined result.

-  return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));

-}

-

-// Equivalent to Rotate(), but requires the second arg to be non-zero.

-// On x86-64, and probably others, it's possible for this to compile

-// to a single instruction if both args are already in registers.

-static uint64 RotateByAtLeast1(uint64 val, int shift) {

-  return (val >> shift) | (val << (64 - shift));

-}

-

-static uint64 ShiftMix(uint64 val) {

-  return val ^ (val >> 47);

-}

-

-static uint64 HashLen16(uint64 u, uint64 v) {

-  return Hash128to64(uint128(u, v));

-}

-

-static uint64 HashLen0to16(const char *s, size_t len) {

-  if (len > 8) {

-    uint64 a = UNALIGNED_LOAD64(s);

-    uint64 b = UNALIGNED_LOAD64(s + len - 8);

-    return HashLen16(a, RotateByAtLeast1(b + len, len)) ^ b;

-  }

-  if (len >= 4) {

-    uint64 a = UNALIGNED_LOAD32(s);

-    return HashLen16(len + (a << 3), UNALIGNED_LOAD32(s + len - 4));

-  }

-  if (len > 0) {

-    uint8 a = s[0];

-    uint8 b = s[len >> 1];

-    uint8 c = s[len - 1];

-    uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);

-    uint32 z = len + (static_cast<uint32>(c) << 2);

-    return ShiftMix(y * k2 ^ z * k3) * k2;

-  }

-  return k2;

-}

-

-// This probably works well for 16-byte strings as well, but it may be overkill

-// in that case.

-static uint64 HashLen17to32(const char *s, size_t len) {

-  uint64 a = UNALIGNED_LOAD64(s) * k1;

-  uint64 b = UNALIGNED_LOAD64(s + 8);

-  uint64 c = UNALIGNED_LOAD64(s + len - 8) * k2;

-  uint64 d = UNALIGNED_LOAD64(s + len - 16) * k0;

-  return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d,

-                   a + Rotate(b ^ k3, 20) - c + len);

-}

-

-// Return a 16-byte hash for 48 bytes.  Quick and dirty.

-// Callers do best to use "random-looking" values for a and b.

-static pair<uint64, uint64> WeakHashLen32WithSeeds(

-    uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, uint64 b) {

-  a += w;

-  b = Rotate(b + a + z, 21);

-  uint64 c = a;

-  a += x;

-  a += y;

-  b += Rotate(a, 44);

-  return make_pair(a + z, b + c);

-}

-

-// Return a 16-byte hash for s[0] ... s[31], a, and b.  Quick and dirty.

-static pair<uint64, uint64> WeakHashLen32WithSeeds(

-    const char* s, uint64 a, uint64 b) {

-  return WeakHashLen32WithSeeds(UNALIGNED_LOAD64(s),

-                                UNALIGNED_LOAD64(s + 8),

-                                UNALIGNED_LOAD64(s + 16),

-                                UNALIGNED_LOAD64(s + 24),

-                                a,

-                                b);

-}

-

-// Return an 8-byte hash for 33 to 64 bytes.

-static uint64 HashLen33to64(const char *s, size_t len) {

-  uint64 z = UNALIGNED_LOAD64(s + 24);

-  uint64 a = UNALIGNED_LOAD64(s) + (len + UNALIGNED_LOAD64(s + len - 16)) * k0;

-  uint64 b = Rotate(a + z, 52);

-  uint64 c = Rotate(a, 37);

-  a += UNALIGNED_LOAD64(s + 8);

-  c += Rotate(a, 7);

-  a += UNALIGNED_LOAD64(s + 16);

-  uint64 vf = a + z;

-  uint64 vs = b + Rotate(a, 31) + c;

-  a = UNALIGNED_LOAD64(s + 16) + UNALIGNED_LOAD64(s + len - 32);

-  z = UNALIGNED_LOAD64(s + len - 8);

-  b = Rotate(a + z, 52);

-  c = Rotate(a, 37);

-  a += UNALIGNED_LOAD64(s + len - 24);

-  c += Rotate(a, 7);

-  a += UNALIGNED_LOAD64(s + len - 16);

-  uint64 wf = a + z;

-  uint64 ws = b + Rotate(a, 31) + c;

-  uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0);

-  return ShiftMix(r * k0 + vs) * k2;

-}

-

-uint64 CityHash64(const char *s, size_t len) {

-  if (len <= 32) {

-    if (len <= 16) {

-      return HashLen0to16(s, len);

-    } else {

-      return HashLen17to32(s, len);

-    }

-  } else if (len <= 64) {

-    return HashLen33to64(s, len);

-  }

-

-  // For strings over 64 bytes we hash the end first, and then as we

-  // loop we keep 56 bytes of state: v, w, x, y, and z.

-  uint64 x = UNALIGNED_LOAD64(s);

-  uint64 y = UNALIGNED_LOAD64(s + len - 16) ^ k1;

-  uint64 z = UNALIGNED_LOAD64(s + len - 56) ^ k0;

-  pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, y);

-  pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, len * k1, k0);

-  z += ShiftMix(v.second) * k1;

-  x = Rotate(z + x, 39) * k1;

-  y = Rotate(y, 33) * k1;

-

-  // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.

-  len = (len - 1) & ~static_cast<size_t>(63);

-  do {

-    x = Rotate(x + y + v.first + UNALIGNED_LOAD64(s + 16), 37) * k1;

-    y = Rotate(y + v.second + UNALIGNED_LOAD64(s + 48), 42) * k1;

-    x ^= w.second;

-    y ^= v.first;

-    z = Rotate(z ^ w.first, 33);

-    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);

-    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y);

-    std::swap(z, x);

-    s += 64;

-    len -= 64;

-  } while (len != 0);

-  return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,

-                   HashLen16(v.second, w.second) + x);

-}

-

-uint64 CityHash64WithSeed(const char *s, size_t len, uint64 seed) {

-  return CityHash64WithSeeds(s, len, k2, seed);

-}

-

-uint64 CityHash64WithSeeds(const char *s, size_t len,

-                           uint64 seed0, uint64 seed1) {

-  return HashLen16(CityHash64(s, len) - seed0, seed1);

-}

-

-// A subroutine for CityHash128().  Returns a decent 128-bit hash for strings

-// of any length representable in ssize_t.  Based on City and Murmur.

-static uint128 CityMurmur(const char *s, size_t len, uint128 seed) {

-  uint64 a = Uint128Low64(seed);

-  uint64 b = Uint128High64(seed);

-  uint64 c = 0;

-  uint64 d = 0;

-  ssize_t l = len - 16;

-  if (l <= 0) {  // len <= 16

-    c = b * k1 + HashLen0to16(s, len);

-    d = Rotate(a + (len >= 8 ? UNALIGNED_LOAD64(s) : c), 32);

-  } else {  // len > 16

-    c = HashLen16(UNALIGNED_LOAD64(s + len - 8) + k1, a);

-    d = HashLen16(b + len, c + UNALIGNED_LOAD64(s + len - 16));

-    a += d;

-    do {

-      a ^= ShiftMix(UNALIGNED_LOAD64(s) * k1) * k1;

-      a *= k1;

-      b ^= a;

-      c ^= ShiftMix(UNALIGNED_LOAD64(s + 8) * k1) * k1;

-      c *= k1;

-      d ^= c;

-      s += 16;

-      l -= 16;

-    } while (l > 0);

-  }

-  a = HashLen16(a, c);

-  b = HashLen16(d, b);

-  return uint128(a ^ b, HashLen16(b, a));

-}

-

-uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {

-  if (len < 128) {

-    return CityMurmur(s, len, seed);

-  }

-

-  // We expect len >= 128 to be the common case.  Keep 56 bytes of state:

-  // v, w, x, y, and z.

-  pair<uint64, uint64> v, w;

-  uint64 x = Uint128Low64(seed);

-  uint64 y = Uint128High64(seed);

-  uint64 z = len * k1;

-  v.first = Rotate(y ^ k1, 49) * k1 + UNALIGNED_LOAD64(s);

-  v.second = Rotate(v.first, 42) * k1 + UNALIGNED_LOAD64(s + 8);

-  w.first = Rotate(y + z, 35) * k1 + x;

-  w.second = Rotate(x + UNALIGNED_LOAD64(s + 88), 53) * k1;

-

-  // This is the same inner loop as CityHash64(), manually unrolled.

-  do {

-    x = Rotate(x + y + v.first + UNALIGNED_LOAD64(s + 16), 37) * k1;

-    y = Rotate(y + v.second + UNALIGNED_LOAD64(s + 48), 42) * k1;

-    x ^= w.second;

-    y ^= v.first;

-    z = Rotate(z ^ w.first, 33);

-    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);

-    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y);

-    std::swap(z, x);

-    s += 64;

-    x = Rotate(x + y + v.first + UNALIGNED_LOAD64(s + 16), 37) * k1;

-    y = Rotate(y + v.second + UNALIGNED_LOAD64(s + 48), 42) * k1;

-    x ^= w.second;

-    y ^= v.first;

-    z = Rotate(z ^ w.first, 33);

-    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);

-    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y);

-    std::swap(z, x);

-    s += 64;

-    len -= 128;

-  } while (LIKELY(len >= 128));

-  y += Rotate(w.first, 37) * k0 + z;

-  x += Rotate(v.first + z, 49) * k0;

-  // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.

-  for (size_t tail_done = 0; tail_done < len; ) {

-    tail_done += 32;

-    y = Rotate(y - x, 42) * k0 + v.second;

-    w.first += UNALIGNED_LOAD64(s + len - tail_done + 16);

-    x = Rotate(x, 49) * k0 + w.first;

-    w.first += v.first;

-    v = WeakHashLen32WithSeeds(s + len - tail_done, v.first, v.second);

-  }

-  // At this point our 48 bytes of state should contain more than

-  // enough information for a strong 128-bit hash.  We use two

-  // different 48-byte-to-8-byte hashes to get a 16-byte final result.

-  x = HashLen16(x, v.first);

-  y = HashLen16(y, w.first);

-  return uint128(HashLen16(x + v.second, w.second) + y,

-                 HashLen16(x + w.second, y + v.second));

-}

-

-uint128 CityHash128(const char *s, size_t len) {

-  if (len >= 16) {

-    return CityHash128WithSeed(s + 16,

-                               len - 16,

-                               uint128(UNALIGNED_LOAD64(s) ^ k3,

-                                       UNALIGNED_LOAD64(s + 8)));

-  } else if (len >= 8) {

-    return CityHash128WithSeed(NULL,

-                               0,

-                               uint128(UNALIGNED_LOAD64(s) ^ (len * k0),

-                                       UNALIGNED_LOAD64(s + len - 8) ^ k1));

-  } else {

-    return CityHash128WithSeed(s, len, uint128(k0, k1));

-  }

-}

-

-void CityHash64_test ( const void * key, int len, uint32_t seed, void * out )

-{

-  *(uint64*)out = CityHash64WithSeed((const char *)key,len,seed);

-}

-

-void CityHash128_test ( const void * key, int len, uint32_t seed, void * out )

-{

-  uint128 s(0,0);

-

-  s.first = seed;

-

-  *(uint128*)out = CityHash128WithSeed((const char*)key,len,s);

-}

+// Copyright (c) 2011 Google, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+// CityHash, by Geoff Pike and Jyrki Alakuijala
+//
+// This file provides CityHash64() and related functions.
+//
+// It's probably possible to create even faster hash functions by
+// writing a program that systematically explores some of the space of
+// possible hash functions, by using SIMD instructions, or by
+// compromising on hash quality.
+
+#include "City.h"
+
+#include <algorithm>
+#include <string.h>  // for memcpy and memset
+
+using namespace std;
+
+static uint64 UNALIGNED_LOAD64(const char *p) {
+  uint64 result;
+  memcpy(&result, p, sizeof(result));
+  return result;
+}
+
+static uint32 UNALIGNED_LOAD32(const char *p) {
+  uint32 result;
+  memcpy(&result, p, sizeof(result));
+  return result;
+}
+
+#ifndef __BIG_ENDIAN__
+
+#define uint32_in_expected_order(x) (x)
+#define uint64_in_expected_order(x) (x)
+
+#else
+
+#ifdef _MSC_VER
+#include <stdlib.h>
+#define bswap_32(x) _byteswap_ulong(x)
+#define bswap_64(x) _byteswap_uint64(x)
+
+#elif defined(__APPLE__)
+// Mac OS X / Darwin features
+#include <libkern/OSByteOrder.h>
+#define bswap_32(x) OSSwapInt32(x)
+#define bswap_64(x) OSSwapInt64(x)
+
+#else
+#include <byteswap.h>
+#endif
+
+#define uint32_in_expected_order(x) (bswap_32(x))
+#define uint64_in_expected_order(x) (bswap_64(x))
+
+#endif  // __BIG_ENDIAN__
+
+#if !defined(LIKELY)
+#if defined(__GNUC__) || defined(__INTEL_COMPILER)
+#define LIKELY(x) (__builtin_expect(!!(x), 1))
+#else
+#define LIKELY(x) (x)
+#endif
+#endif
+
+static uint64 Fetch64(const char *p) {
+  return uint64_in_expected_order(UNALIGNED_LOAD64(p));
+}
+
+static uint32 Fetch32(const char *p) {
+  return uint32_in_expected_order(UNALIGNED_LOAD32(p));
+}
+
+// Some primes between 2^63 and 2^64 for various uses.
+static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
+static const uint64 k1 = 0xb492b66fbe98f273ULL;
+static const uint64 k2 = 0x9ae16a3b2f90404fULL;
+static const uint64 k3 = 0xc949d7c7509e6557ULL;
+
+// Bitwise right rotate.  Normally this will compile to a single
+// instruction, especially if the shift is a manifest constant.
+static uint64 Rotate(uint64 val, int shift) {
+  // Avoid shifting by 64: doing so yields an undefined result.
+  return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
+}
+
+// Equivalent to Rotate(), but requires the second arg to be non-zero.
+// On x86-64, and probably others, it's possible for this to compile
+// to a single instruction if both args are already in registers.
+static uint64 RotateByAtLeast1(uint64 val, int shift) {
+  return (val >> shift) | (val << (64 - shift));
+}
+
+static uint64 ShiftMix(uint64 val) {
+  return val ^ (val >> 47);
+}
+
+static uint64 HashLen16(uint64 u, uint64 v) {
+  return Hash128to64(uint128(u, v));
+}
+
+static uint64 HashLen0to16(const char *s, size_t len) {
+  if (len > 8) {
+    uint64 a = Fetch64(s);
+    uint64 b = Fetch64(s + len - 8);
+    return HashLen16(a, RotateByAtLeast1(b + len, len)) ^ b;
+  }
+  if (len >= 4) {
+    uint64 a = Fetch32(s);
+    return HashLen16(len + (a << 3), Fetch32(s + len - 4));
+  }
+  if (len > 0) {
+    uint8 a = s[0];
+    uint8 b = s[len >> 1];
+    uint8 c = s[len - 1];
+    uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
+    uint32 z = len + (static_cast<uint32>(c) << 2);
+    return ShiftMix(y * k2 ^ z * k3) * k2;
+  }
+  return k2;
+}
+
+// This probably works well for 16-byte strings as well, but it may be overkill
+// in that case.
+static uint64 HashLen17to32(const char *s, size_t len) {
+  uint64 a = Fetch64(s) * k1;
+  uint64 b = Fetch64(s + 8);
+  uint64 c = Fetch64(s + len - 8) * k2;
+  uint64 d = Fetch64(s + len - 16) * k0;
+  return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d,
+                   a + Rotate(b ^ k3, 20) - c + len);
+}
+
+// Return a 16-byte hash for 48 bytes.  Quick and dirty.
+// Callers do best to use "random-looking" values for a and b.
+static pair<uint64, uint64> WeakHashLen32WithSeeds(
+    uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, uint64 b) {
+  a += w;
+  b = Rotate(b + a + z, 21);
+  uint64 c = a;
+  a += x;
+  a += y;
+  b += Rotate(a, 44);
+  return make_pair(a + z, b + c);
+}
+
+// Return a 16-byte hash for s[0] ... s[31], a, and b.  Quick and dirty.
+static pair<uint64, uint64> WeakHashLen32WithSeeds(
+    const char* s, uint64 a, uint64 b) {
+  return WeakHashLen32WithSeeds(Fetch64(s),
+                                Fetch64(s + 8),
+                                Fetch64(s + 16),
+                                Fetch64(s + 24),
+                                a,
+                                b);
+}
+
+// Return an 8-byte hash for 33 to 64 bytes.
+static uint64 HashLen33to64(const char *s, size_t len) {
+  uint64 z = Fetch64(s + 24);
+  uint64 a = Fetch64(s) + (len + Fetch64(s + len - 16)) * k0;
+  uint64 b = Rotate(a + z, 52);
+  uint64 c = Rotate(a, 37);
+  a += Fetch64(s + 8);
+  c += Rotate(a, 7);
+  a += Fetch64(s + 16);
+  uint64 vf = a + z;
+  uint64 vs = b + Rotate(a, 31) + c;
+  a = Fetch64(s + 16) + Fetch64(s + len - 32);
+  z = Fetch64(s + len - 8);
+  b = Rotate(a + z, 52);
+  c = Rotate(a, 37);
+  a += Fetch64(s + len - 24);
+  c += Rotate(a, 7);
+  a += Fetch64(s + len - 16);
+  uint64 wf = a + z;
+  uint64 ws = b + Rotate(a, 31) + c;
+  uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0);
+  return ShiftMix(r * k0 + vs) * k2;
+}
+
+uint64 CityHash64(const char *s, size_t len) {
+  if (len <= 32) {
+    if (len <= 16) {
+      return HashLen0to16(s, len);
+    } else {
+      return HashLen17to32(s, len);
+    }
+  } else if (len <= 64) {
+    return HashLen33to64(s, len);
+  }
+
+  // For strings over 64 bytes we hash the end first, and then as we
+  // loop we keep 56 bytes of state: v, w, x, y, and z.
+  uint64 x = Fetch64(s + len - 40);
+  uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
+  uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
+  pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
+  pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
+  x = x * k1 + Fetch64(s);
+
+  // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
+  len = (len - 1) & ~static_cast<size_t>(63);
+  do {
+    x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
+    y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
+    x ^= w.second;
+    y += v.first + Fetch64(s + 40);
+    z = Rotate(z + w.first, 33) * k1;
+    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
+    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
+    std::swap(z, x);
+    s += 64;
+    len -= 64;
+  } while (len != 0);
+  return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
+                   HashLen16(v.second, w.second) + x);
+}
+
+uint64 CityHash64WithSeed(const char *s, size_t len, uint64 seed) {
+  return CityHash64WithSeeds(s, len, k2, seed);
+}
+
+uint64 CityHash64WithSeeds(const char *s, size_t len,
+                           uint64 seed0, uint64 seed1) {
+  return HashLen16(CityHash64(s, len) - seed0, seed1);
+}
+
+// A subroutine for CityHash128().  Returns a decent 128-bit hash for strings
+// of any length representable in signed long.  Based on City and Murmur.
+static uint128 CityMurmur(const char *s, size_t len, uint128 seed) {
+  uint64 a = Uint128Low64(seed);
+  uint64 b = Uint128High64(seed);
+  uint64 c = 0;
+  uint64 d = 0;
+  signed long l = len - 16;
+  if (l <= 0) {  // len <= 16
+    a = ShiftMix(a * k1) * k1;
+    c = b * k1 + HashLen0to16(s, len);
+    d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c));
+  } else {  // len > 16
+    c = HashLen16(Fetch64(s + len - 8) + k1, a);
+    d = HashLen16(b + len, c + Fetch64(s + len - 16));
+    a += d;
+    do {
+      a ^= ShiftMix(Fetch64(s) * k1) * k1;
+      a *= k1;
+      b ^= a;
+      c ^= ShiftMix(Fetch64(s + 8) * k1) * k1;
+      c *= k1;
+      d ^= c;
+      s += 16;
+      l -= 16;
+    } while (l > 0);
+  }
+  a = HashLen16(a, c);
+  b = HashLen16(d, b);
+  return uint128(a ^ b, HashLen16(b, a));
+}
+
+uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
+  if (len < 128) {
+    return CityMurmur(s, len, seed);
+  }
+
+  // We expect len >= 128 to be the common case.  Keep 56 bytes of state:
+  // v, w, x, y, and z.
+  pair<uint64, uint64> v, w;
+  uint64 x = Uint128Low64(seed);
+  uint64 y = Uint128High64(seed);
+  uint64 z = len * k1;
+  v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
+  v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8);
+  w.first = Rotate(y + z, 35) * k1 + x;
+  w.second = Rotate(x + Fetch64(s + 88), 53) * k1;
+
+  // This is the same inner loop as CityHash64(), manually unrolled.
+  do {
+    x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
+    y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
+    x ^= w.second;
+    y += v.first + Fetch64(s + 40);
+    z = Rotate(z + w.first, 33) * k1;
+    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
+    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
+    std::swap(z, x);
+    s += 64;
+    x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
+    y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
+    x ^= w.second;
+    y += v.first + Fetch64(s + 40);
+    z = Rotate(z + w.first, 33) * k1;
+    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
+    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
+    std::swap(z, x);
+    s += 64;
+    len -= 128;
+  } while (LIKELY(len >= 128));
+  x += Rotate(v.first + z, 49) * k0;
+  z += Rotate(w.first, 37) * k0;
+  // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
+  for (size_t tail_done = 0; tail_done < len; ) {
+    tail_done += 32;
+    y = Rotate(x + y, 42) * k0 + v.second;
+    w.first += Fetch64(s + len - tail_done + 16);
+    x = x * k0 + w.first;
+    z += w.second + Fetch64(s + len - tail_done);
+    w.second += v.first;
+    v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
+  }
+  // At this point our 56 bytes of state should contain more than
+  // enough information for a strong 128-bit hash.  We use two
+  // different 56-byte-to-8-byte hashes to get a 16-byte final result.
+  x = HashLen16(x, v.first);
+  y = HashLen16(y + z, w.first);
+  return uint128(HashLen16(x + v.second, w.second) + y,
+                 HashLen16(x + w.second, y + v.second));
+}
+
+uint128 CityHash128(const char *s, size_t len) {
+  if (len >= 16) {
+    return CityHash128WithSeed(s + 16,
+                               len - 16,
+                               uint128(Fetch64(s) ^ k3,
+                                       Fetch64(s + 8)));
+  } else if (len >= 8) {
+    return CityHash128WithSeed(NULL,
+                               0,
+                               uint128(Fetch64(s) ^ (len * k0),
+                                       Fetch64(s + len - 8) ^ k1));
+  } else {
+    return CityHash128WithSeed(s, len, uint128(k0, k1));
+  }
+}
+
+#ifdef __SSE4_2__
+#include <nmmintrin.h>
+
+// Requires len >= 240.
+static void CityHashCrc256Long(const char *s, size_t len,
+                               uint32 seed, uint64 *result) {
+  uint64 a = Fetch64(s + 56) + k0;
+  uint64 b = Fetch64(s + 96) + k0;
+  uint64 c = result[0] = HashLen16(b, len);
+  uint64 d = result[1] = Fetch64(s + 120) * k0 + len;
+  uint64 e = Fetch64(s + 184) + seed;
+  uint64 f = seed;
+  uint64 g = 0;
+  uint64 h = 0;
+  uint64 i = 0;
+  uint64 j = 0;
+  uint64 t = c + d;
+
+  // 240 bytes of input per iter.
+  size_t iters = len / 240;
+  len -= iters * 240;
+  do {
+#define CHUNK(multiplier, z)                                    \
+    {                                                           \
+      uint64 old_a = a;                                         \
+      a = Rotate(b, 41 ^ z) * multiplier + Fetch64(s);          \
+      b = Rotate(c, 27 ^ z) * multiplier + Fetch64(s + 8);      \
+      c = Rotate(d, 41 ^ z) * multiplier + Fetch64(s + 16);     \
+      d = Rotate(e, 33 ^ z) * multiplier + Fetch64(s + 24);     \
+      e = Rotate(t, 25 ^ z) * multiplier + Fetch64(s + 32);     \
+      t = old_a;                                                \
+    }                                                           \
+    f = _mm_crc32_u64(f, a);                                    \
+    g = _mm_crc32_u64(g, b);                                    \
+    h = _mm_crc32_u64(h, c);                                    \
+    i = _mm_crc32_u64(i, d);                                    \
+    j = _mm_crc32_u64(j, e);                                    \
+    s += 40
+
+    CHUNK(1, 1); CHUNK(k0, 0);
+    CHUNK(1, 1); CHUNK(k0, 0);
+    CHUNK(1, 1); CHUNK(k0, 0);
+  } while (--iters > 0);
+
+  while (len >= 40) {
+    CHUNK(k0, 0);
+    len -= 40;
+  }
+  if (len > 0) {
+    s = s + len - 40;
+    CHUNK(k0, 0);
+  }
+  j += i << 32;
+  a = HashLen16(a, j);
+  h += g << 32;
+  b += h;
+  c = HashLen16(c, f) + i;
+  d = HashLen16(d, e + result[0]);
+  j += e;
+  i += HashLen16(h, t);
+  e = HashLen16(a, d) + j;
+  f = HashLen16(b, c) + a;
+  g = HashLen16(j, i) + c;
+  result[0] = e + f + g + h;
+  a = ShiftMix((a + g) * k0) * k0 + b;
+  result[1] += a + result[0];
+  a = ShiftMix(a * k0) * k0 + c;
+  result[2] = a + result[1];
+  a = ShiftMix((a + e) * k0) * k0;
+  result[3] = a + result[2];
+}
+
+// Requires len < 240.
+static void CityHashCrc256Short(const char *s, size_t len, uint64 *result) {
+  char buf[240];
+  memcpy(buf, s, len);
+  memset(buf + len, 0, 240 - len);
+  CityHashCrc256Long(buf, 240, ~static_cast<uint32>(len), result);
+}
+
+void CityHashCrc256(const char *s, size_t len, uint64 *result) {
+  if (LIKELY(len >= 240)) {
+    CityHashCrc256Long(s, len, 0, result);
+  } else {
+    CityHashCrc256Short(s, len, result);
+  }
+}
+
+uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed) {
+  if (len <= 900) {
+    return CityHash128WithSeed(s, len, seed);
+  } else {
+    uint64 result[4];
+    CityHashCrc256(s, len, result);
+    uint64 u = Uint128High64(seed) + result[0];
+    uint64 v = Uint128Low64(seed) + result[1];
+    return uint128(HashLen16(u, v + result[2]),
+                   HashLen16(Rotate(v, 32), u * k0 + result[3]));
+  }
+}
+
+uint128 CityHashCrc128(const char *s, size_t len) {
+  if (len <= 900) {
+    return CityHash128(s, len);
+  } else {
+    uint64 result[4];
+    CityHashCrc256(s, len, result);
+    return uint128(result[2], result[3]);
+  }
+}
+
+#endif
diff --git a/City.h b/City.h
index 171f693..02f3457 100644
--- a/City.h
+++ b/City.h
@@ -1,97 +1,106 @@
-// Copyright (c) 2011 Google, Inc.

-//

-// Permission is hereby granted, free of charge, to any person obtaining a copy

-// of this software and associated documentation files (the "Software"), to deal

-// in the Software without restriction, including without limitation the rights

-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell

-// copies of the Software, and to permit persons to whom the Software is

-// furnished to do so, subject to the following conditions:

-//

-// The above copyright notice and this permission notice shall be included in

-// all copies or substantial portions of the Software.

-//

-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN

-// THE SOFTWARE.

-//

-// CityHash Version 1, by Geoff Pike and Jyrki Alakuijala

-//

-// This file provides a few functions for hashing strings. On x86-64

-// hardware in 2011, CityHash64() is faster than other high-quality

-// hash functions, such as Murmur.  This is largely due to higher

-// instruction-level parallelism.  CityHash64() and CityHash128() also perform

-// well on hash-quality tests.

-//

-// CityHash128() is optimized for relatively long strings and returns

-// a 128-bit hash.  For strings more than about 2000 bytes it can be

-// faster than CityHash64().

-//

-// Functions in the CityHash family are not suitable for cryptography.

-//

-// WARNING: This code has not been tested on big-endian platforms!

-// It is known to work well on little-endian platforms that have a small penalty

-// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.

-//

-// By the way, for some hash functions, given strings a and b, the hash

-// of a+b is easily derived from the hashes of a and b.  This property

-// doesn't hold for any hash functions in this file.

-

-#ifndef CITY_HASH_H_

-#define CITY_HASH_H_

-

-#if defined(_MSC_VER) || defined(__CYGWIN__)

-#include "pstdint.h"

-typedef int ssize_t;

-#pragma warning(disable:4267)

-#else

-#include <stdint.h>

-#endif

-

-#include <stdlib.h>  // for size_t.

-#include <utility>

-

-typedef uint8_t uint8;

-typedef uint32_t uint32;

-typedef uint64_t uint64;

-typedef std::pair<uint64, uint64> uint128;

-

-inline uint64 Uint128Low64(const uint128& x) { return x.first; }

-inline uint64 Uint128High64(const uint128& x) { return x.second; }

-

-// Hash function for a byte array.

-uint64 CityHash64(const char *buf, size_t len);

-

-// Hash function for a byte array.  For convenience, a 64-bit seed is also

-// hashed into the result.

-uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);

-

-// Hash function for a byte array.  For convenience, two seeds are also

-// hashed into the result.

-uint64 CityHash64WithSeeds(const char *buf, size_t len,

-                           uint64 seed0, uint64 seed1);

-

-// Hash function for a byte array.

-uint128 CityHash128(const char *s, size_t len);

-

-// Hash function for a byte array.  For convenience, a 128-bit seed is also

-// hashed into the result.

-uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);

-

-// Hash 128 input bits down to 64 bits of output.

-// This is intended to be a reasonably good hash function.

-inline uint64 Hash128to64(const uint128& x) {

-  // Murmur-inspired hashing.

-  const uint64 kMul = 0x9ddfea08eb382d69ULL;

-  uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;

-  a ^= (a >> 47);

-  uint64 b = (Uint128High64(x) ^ a) * kMul;

-  b ^= (b >> 47);

-  b *= kMul;

-  return b;

-}

-

-#endif  // CITY_HASH_H_

+// Copyright (c) 2011 Google, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+// CityHash, by Geoff Pike and Jyrki Alakuijala
+//
+// This file provides a few functions for hashing strings. On x86-64
+// hardware in 2011, CityHash64() is faster than other high-quality
+// hash functions, such as Murmur.  This is largely due to higher
+// instruction-level parallelism.  CityHash64() and CityHash128() also perform
+// well on hash-quality tests.
+//
+// CityHash128() is optimized for relatively long strings and returns
+// a 128-bit hash.  For strings more than about 2000 bytes it can be
+// faster than CityHash64().
+//
+// Functions in the CityHash family are not suitable for cryptography.
+//
+// WARNING: This code has not been tested on big-endian platforms!
+// It is known to work well on little-endian platforms that have a small penalty
+// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
+//
+// By the way, for some hash functions, given strings a and b, the hash
+// of a+b is easily derived from the hashes of a and b.  This property
+// doesn't hold for any hash functions in this file.
+
+#ifndef CITY_HASH_H_
+#define CITY_HASH_H_
+
+#include <stdlib.h>  // for size_t.
+#include <stdint.h>
+#include <utility>
+
+typedef uint8_t uint8;
+typedef uint32_t uint32;
+typedef uint64_t uint64;
+typedef std::pair<uint64, uint64> uint128;
+
+inline uint64 Uint128Low64(const uint128& x) { return x.first; }
+inline uint64 Uint128High64(const uint128& x) { return x.second; }
+
+// Hash function for a byte array.
+uint64 CityHash64(const char *buf, size_t len);
+
+// Hash function for a byte array.  For convenience, a 64-bit seed is also
+// hashed into the result.
+uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);
+
+// Hash function for a byte array.  For convenience, two seeds are also
+// hashed into the result.
+uint64 CityHash64WithSeeds(const char *buf, size_t len,
+                           uint64 seed0, uint64 seed1);
+
+// Hash function for a byte array.
+uint128 CityHash128(const char *s, size_t len);
+
+// Hash function for a byte array.  For convenience, a 128-bit seed is also
+// hashed into the result.
+uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);
+
+// Hash 128 input bits down to 64 bits of output.
+// This is intended to be a reasonably good hash function.
+inline uint64 Hash128to64(const uint128& x) {
+  // Murmur-inspired hashing.
+  const uint64 kMul = 0x9ddfea08eb382d69ULL;
+  uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
+  a ^= (a >> 47);
+  uint64 b = (Uint128High64(x) ^ a) * kMul;
+  b ^= (b >> 47);
+  b *= kMul;
+  return b;
+}
+
+// Conditionally include declarations for versions of City that require SSE4.2
+// instructions to be available.
+#ifdef __SSE4_2__
+
+// Hash function for a byte array.
+uint128 CityHashCrc128(const char *s, size_t len);
+
+// Hash function for a byte array.  For convenience, a 128-bit seed is also
+// hashed into the result.
+uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed);
+
+// Hash function for a byte array.  Sets result[0] ... result[3].
+void CityHashCrc256(const char *s, size_t len, uint64 *result);
+
+#endif  // __SSE4_2__
+
+#endif  // CITY_HASH_H_
diff --git a/CityTest.cpp b/CityTest.cpp
new file mode 100644
index 0000000..4190cc8
--- /dev/null
+++ b/CityTest.cpp
@@ -0,0 +1,15 @@
+#include "City.h"
+
+void CityHash64_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  *(uint64*)out = CityHash64WithSeed((const char *)key,len,seed);
+}
+
+void CityHash128_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  uint128 s(0,0);
+
+  s.first = seed;
+
+  *(uint128*)out = CityHash128WithSeed((const char*)key,len,s);
+}
diff --git a/DifferentialTest.cpp b/DifferentialTest.cpp
index b356085..d9067c9 100644
--- a/DifferentialTest.cpp
+++ b/DifferentialTest.cpp
@@ -1,3 +1,3 @@
-#include "DifferentialTest.h"

-

-//----------------------------------------------------------------------------

+#include "DifferentialTest.h"
+
+//----------------------------------------------------------------------------
diff --git a/DifferentialTest.h b/DifferentialTest.h
index 3136cbb..824d72e 100644
--- a/DifferentialTest.h
+++ b/DifferentialTest.h
@@ -1,281 +1,281 @@
-//-----------------------------------------------------------------------------

-// Differential collision & distribution tests - generate a bunch of random keys,

-// see what happens to the hash value when we flip a few bits of the key.

-

-#pragma once

-

-#include "Types.h"

-#include "Stats.h"      // for chooseUpToK

-#include "KeysetTest.h" // for SparseKeygenRecurse

-#include "Random.h"

-

-#include <vector>

-#include <algorithm>

-#include <stdio.h>

-

-//-----------------------------------------------------------------------------

-// Sort through the differentials, ignoring collisions that only occured once 

-// (these could be false positives). If we find collisions of 3 or more, the

-// differential test fails.

-

-template < class keytype >

-bool ProcessDifferentials ( std::vector<keytype> & diffs, int reps, bool dumpCollisions )

-{

-  std::sort(diffs.begin(), diffs.end());

-

-  int count = 1;

-  int ignore = 0;

-

-  bool result = true;

-

-  if(diffs.size())

-  {

-    keytype kp = diffs[0];

-

-    for(int i = 1; i < (int)diffs.size(); i++)

-    {

-      if(diffs[i] == kp)

-      {

-        count++;

-        continue;

-      }

-      else

-      {

-        if(count > 1)

-        {

-          result = false;

-

-          double pct = 100 * (double(count) / double(reps));

-

-          if(dumpCollisions)

-          {

-            printbits((unsigned char*)&kp,sizeof(kp));

-            printf(" - %4.2f%%\n", pct );

-          }

-        }

-        else 

-        {

-          ignore++;

-        }

-

-        kp = diffs[i];

-        count = 1;

-      }

-    }

-

-    if(count > 1)

-    {

-      double pct = 100 * (double(count) / double(reps));

-

-      if(dumpCollisions)

-      {

-        printbits((unsigned char*)&kp,sizeof(kp));

-        printf(" - %4.2f%%\n", pct );

-      }

-    }

-    else 

-    {

-      ignore++;

-    }

-  }

-

-  printf("%d total collisions, of which %d single collisions were ignored",(int)diffs.size(),ignore);

-

-  if(result == false)

-  {

-    printf(" !!!!! ");

-  }

-

-  printf("\n");

-  printf("\n");

-

-  return result;

-}

-

-//-----------------------------------------------------------------------------

-// Check all possible keybits-choose-N differentials for collisions, report

-// ones that occur significantly more often than expected.

-

-// Random collisions can happen with probability 1 in 2^32 - if we do more than

-// 2^32 tests, we'll probably see some spurious random collisions, so don't report

-// them.

-

-template < typename keytype, typename hashtype >

-void DiffTestRecurse ( pfHash hash, keytype & k1, keytype & k2, hashtype & h1, hashtype & h2, int start, int bitsleft, std::vector<keytype> & diffs )

-{

-  const int bits = sizeof(keytype)*8;

-

-  for(int i = start; i < bits; i++)

-  {

-    flipbit(&k2,sizeof(k2),i);

-    bitsleft--;

-

-    hash(&k2,sizeof(k2),0,&h2);

-

-    if(h1 == h2)

-    {

-      diffs.push_back(k1 ^ k2);

-    }

-

-    if(bitsleft)

-    {

-      DiffTestRecurse(hash,k1,k2,h1,h2,i+1,bitsleft,diffs);

-    }

-

-    flipbit(&k2,sizeof(k2),i);

-    bitsleft++;

-  }

-}

-

-//----------

-

-template < typename keytype, typename hashtype >

-bool DiffTest ( pfHash hash, int diffbits, int reps, bool dumpCollisions )

-{

-  const int keybits = sizeof(keytype) * 8;

-  const int hashbits = sizeof(hashtype) * 8;

-

-  double diffcount = chooseUpToK(keybits,diffbits);

-  double testcount = (diffcount * double(reps));

-  double expected  = testcount / pow(2.0,double(hashbits));

-

-  Rand r(100);

-

-  std::vector<keytype> diffs;

-

-  keytype k1,k2;

-  hashtype h1,h2;

-

-  printf("Testing %0.f up-to-%d-bit differentials in %d-bit keys -> %d bit hashes.\n",diffcount,diffbits,keybits,hashbits);

-  printf("%d reps, %0.f total tests, expecting %2.2f random collisions",reps,testcount,expected);

-

-  for(int i = 0; i < reps; i++)

-  {

-    if(i % (reps/10) == 0) printf(".");

-

-    r.rand_p(&k1,sizeof(keytype));

-    k2 = k1;

-

-    hash(&k1,sizeof(k1),0,(uint32_t*)&h1);

-

-    DiffTestRecurse<keytype,hashtype>(hash,k1,k2,h1,h2,0,diffbits,diffs);

-  }

-  printf("\n");

-

-  bool result = true;

-

-  result &= ProcessDifferentials(diffs,reps,dumpCollisions);

-

-  return result;

-}

-

-//-----------------------------------------------------------------------------

-// Differential distribution test - for each N-bit input differential, generate

-// a large set of differential key pairs, hash them, and test the output 

-// differentials using our distribution test code.

-

-// This is a very hard test to pass - even if the hash values are well-distributed,

-// the differences between hash values may not be. It's also not entirely relevant

-// for testing hash functions, but it's still interesting.

-

-// This test is a _lot_ of work, as it's essentially a full keyset test for

-// each of a potentially huge number of input differentials. To speed things

-// along, we do only a few distribution tests per keyset instead of the full

-// grid.

-

-// #TODO - put diagram drawing back on

-

-template < typename keytype, typename hashtype >

-void DiffDistTest ( pfHash hash, const int diffbits, int trials, double & worst, double & avg )

-{

-  std::vector<keytype>  keys(trials);

-  std::vector<hashtype> A(trials),B(trials);

-

-  for(int i = 0; i < trials; i++)

-  {

-    rand_p(&keys[i],sizeof(keytype));

-

-    hash(&keys[i],sizeof(keytype),0,(uint32_t*)&A[i]);

-  }

-

-  //----------

-

-  std::vector<keytype> diffs;

-

-  keytype temp(0);

-

-  SparseKeygenRecurse<keytype>(0,diffbits,true,temp,diffs);

-

-  //----------

-

-  worst = 0;

-  avg = 0;

-

-  hashtype h2;

-

-  for(size_t j = 0; j < diffs.size(); j++)

-  {

-    keytype & d = diffs[j];

-

-    for(int i = 0; i < trials; i++)

-    {

-      keytype k2 = keys[i] ^ d;

-

-      hash(&k2,sizeof(k2),0,&h2);

-

-      B[i] = A[i] ^ h2;

-    }

-

-    double dworst,davg;

-

-    TestDistributionFast(B,dworst,davg);

-

-    avg += davg;

-    worst = (dworst > worst) ? dworst : worst;

-  }

-

-  avg /= double(diffs.size());

-}

-

-//-----------------------------------------------------------------------------

-// Simpler differential-distribution test - for all 1-bit differentials,

-// generate random key pairs and run full distribution/collision tests on the

-// hash differentials

-

-template < typename keytype, typename hashtype >

-bool DiffDistTest2 ( pfHash hash  )

-{

-  Rand r(857374);

-

-  int keybits = sizeof(keytype) * 8;

-  const int keycount = 256*256*32;

-  keytype k;

-  

-  std::vector<hashtype> hashes(keycount);

-  hashtype h1,h2;

-

-  bool result = true;

-

-  for(int keybit = 0; keybit < keybits; keybit++)

-  {

-    printf("Testing bit %d\n",keybit);

-

-    for(int i = 0; i < keycount; i++)

-    {

-      r.rand_p(&k,sizeof(keytype));

-      

-      hash(&k,sizeof(keytype),0,&h1);

-      flipbit(&k,sizeof(keytype),keybit);

-      hash(&k,sizeof(keytype),0,&h2);

-

-      hashes[i] = h1 ^ h2;

-    }

-

-    result &= TestHashList<hashtype>(hashes,true,true,true);

-    printf("\n");

-  }

-

-  return result;

-}

-

-//----------------------------------------------------------------------------

+//-----------------------------------------------------------------------------
+// Differential collision & distribution tests - generate a bunch of random keys,
+// see what happens to the hash value when we flip a few bits of the key.
+
+#pragma once
+
+#include "Types.h"
+#include "Stats.h"      // for chooseUpToK
+#include "KeysetTest.h" // for SparseKeygenRecurse
+#include "Random.h"
+
+#include <vector>
+#include <algorithm>
+#include <stdio.h>
+
+//-----------------------------------------------------------------------------
+// Sort through the differentials, ignoring collisions that only occured once 
+// (these could be false positives). If we find collisions of 3 or more, the
+// differential test fails.
+
+template < class keytype >
+bool ProcessDifferentials ( std::vector<keytype> & diffs, int reps, bool dumpCollisions )
+{
+  std::sort(diffs.begin(), diffs.end());
+
+  int count = 1;
+  int ignore = 0;
+
+  bool result = true;
+
+  if(diffs.size())
+  {
+    keytype kp = diffs[0];
+
+    for(int i = 1; i < (int)diffs.size(); i++)
+    {
+      if(diffs[i] == kp)
+      {
+        count++;
+        continue;
+      }
+      else
+      {
+        if(count > 1)
+        {
+          result = false;
+
+          double pct = 100 * (double(count) / double(reps));
+
+          if(dumpCollisions)
+          {
+            printbits((unsigned char*)&kp,sizeof(kp));
+            printf(" - %4.2f%%\n", pct );
+          }
+        }
+        else 
+        {
+          ignore++;
+        }
+
+        kp = diffs[i];
+        count = 1;
+      }
+    }
+
+    if(count > 1)
+    {
+      double pct = 100 * (double(count) / double(reps));
+
+      if(dumpCollisions)
+      {
+        printbits((unsigned char*)&kp,sizeof(kp));
+        printf(" - %4.2f%%\n", pct );
+      }
+    }
+    else 
+    {
+      ignore++;
+    }
+  }
+
+  printf("%d total collisions, of which %d single collisions were ignored",(int)diffs.size(),ignore);
+
+  if(result == false)
+  {
+    printf(" !!!!! ");
+  }
+
+  printf("\n");
+  printf("\n");
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Check all possible keybits-choose-N differentials for collisions, report
+// ones that occur significantly more often than expected.
+
+// Random collisions can happen with probability 1 in 2^32 - if we do more than
+// 2^32 tests, we'll probably see some spurious random collisions, so don't report
+// them.
+
+template < typename keytype, typename hashtype >
+void DiffTestRecurse ( pfHash hash, keytype & k1, keytype & k2, hashtype & h1, hashtype & h2, int start, int bitsleft, std::vector<keytype> & diffs )
+{
+  const int bits = sizeof(keytype)*8;
+
+  for(int i = start; i < bits; i++)
+  {
+    flipbit(&k2,sizeof(k2),i);
+    bitsleft--;
+
+    hash(&k2,sizeof(k2),0,&h2);
+
+    if(h1 == h2)
+    {
+      diffs.push_back(k1 ^ k2);
+    }
+
+    if(bitsleft)
+    {
+      DiffTestRecurse(hash,k1,k2,h1,h2,i+1,bitsleft,diffs);
+    }
+
+    flipbit(&k2,sizeof(k2),i);
+    bitsleft++;
+  }
+}
+
+//----------
+
+template < typename keytype, typename hashtype >
+bool DiffTest ( pfHash hash, int diffbits, int reps, bool dumpCollisions )
+{
+  const int keybits = sizeof(keytype) * 8;
+  const int hashbits = sizeof(hashtype) * 8;
+
+  double diffcount = chooseUpToK(keybits,diffbits);
+  double testcount = (diffcount * double(reps));
+  double expected  = testcount / pow(2.0,double(hashbits));
+
+  Rand r(100);
+
+  std::vector<keytype> diffs;
+
+  keytype k1,k2;
+  hashtype h1,h2;
+
+  printf("Testing %0.f up-to-%d-bit differentials in %d-bit keys -> %d bit hashes.\n",diffcount,diffbits,keybits,hashbits);
+  printf("%d reps, %0.f total tests, expecting %2.2f random collisions",reps,testcount,expected);
+
+  for(int i = 0; i < reps; i++)
+  {
+    if(i % (reps/10) == 0) printf(".");
+
+    r.rand_p(&k1,sizeof(keytype));
+    k2 = k1;
+
+    hash(&k1,sizeof(k1),0,(uint32_t*)&h1);
+
+    DiffTestRecurse<keytype,hashtype>(hash,k1,k2,h1,h2,0,diffbits,diffs);
+  }
+  printf("\n");
+
+  bool result = true;
+
+  result &= ProcessDifferentials(diffs,reps,dumpCollisions);
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Differential distribution test - for each N-bit input differential, generate
+// a large set of differential key pairs, hash them, and test the output 
+// differentials using our distribution test code.
+
+// This is a very hard test to pass - even if the hash values are well-distributed,
+// the differences between hash values may not be. It's also not entirely relevant
+// for testing hash functions, but it's still interesting.
+
+// This test is a _lot_ of work, as it's essentially a full keyset test for
+// each of a potentially huge number of input differentials. To speed things
+// along, we do only a few distribution tests per keyset instead of the full
+// grid.
+
+// #TODO - put diagram drawing back on
+
+template < typename keytype, typename hashtype >
+void DiffDistTest ( pfHash hash, const int diffbits, int trials, double & worst, double & avg )
+{
+  std::vector<keytype>  keys(trials);
+  std::vector<hashtype> A(trials),B(trials);
+
+  for(int i = 0; i < trials; i++)
+  {
+    rand_p(&keys[i],sizeof(keytype));
+
+    hash(&keys[i],sizeof(keytype),0,(uint32_t*)&A[i]);
+  }
+
+  //----------
+
+  std::vector<keytype> diffs;
+
+  keytype temp(0);
+
+  SparseKeygenRecurse<keytype>(0,diffbits,true,temp,diffs);
+
+  //----------
+
+  worst = 0;
+  avg = 0;
+
+  hashtype h2;
+
+  for(size_t j = 0; j < diffs.size(); j++)
+  {
+    keytype & d = diffs[j];
+
+    for(int i = 0; i < trials; i++)
+    {
+      keytype k2 = keys[i] ^ d;
+
+      hash(&k2,sizeof(k2),0,&h2);
+
+      B[i] = A[i] ^ h2;
+    }
+
+    double dworst,davg;
+
+    TestDistributionFast(B,dworst,davg);
+
+    avg += davg;
+    worst = (dworst > worst) ? dworst : worst;
+  }
+
+  avg /= double(diffs.size());
+}
+
+//-----------------------------------------------------------------------------
+// Simpler differential-distribution test - for all 1-bit differentials,
+// generate random key pairs and run full distribution/collision tests on the
+// hash differentials
+
+template < typename keytype, typename hashtype >
+bool DiffDistTest2 ( pfHash hash  )
+{
+  Rand r(857374);
+
+  int keybits = sizeof(keytype) * 8;
+  const int keycount = 256*256*32;
+  keytype k;
+  
+  std::vector<hashtype> hashes(keycount);
+  hashtype h1,h2;
+
+  bool result = true;
+
+  for(int keybit = 0; keybit < keybits; keybit++)
+  {
+    printf("Testing bit %d\n",keybit);
+
+    for(int i = 0; i < keycount; i++)
+    {
+      r.rand_p(&k,sizeof(keytype));
+      
+      hash(&k,sizeof(keytype),0,&h1);
+      flipbit(&k,sizeof(keytype),keybit);
+      hash(&k,sizeof(keytype),0,&h2);
+
+      hashes[i] = h1 ^ h2;
+    }
+
+    result &= TestHashList<hashtype>(hashes,true,true,true);
+    printf("\n");
+  }
+
+  return result;
+}
+
+//----------------------------------------------------------------------------
diff --git a/Hashes.cpp b/Hashes.cpp
index 1930bc5..36a6c96 100644
--- a/Hashes.cpp
+++ b/Hashes.cpp
@@ -1,155 +1,155 @@
-#include "Hashes.h"

-

-#include "Random.h"

-

-

-#include <stdlib.h>

-//#include <stdint.h>

-#include <assert.h>

-//#include <emmintrin.h>

-//#include <xmmintrin.h>

-

-//----------------------------------------------------------------------------

-// fake / bad hashes

-

-void BadHash ( const void * key, int len, uint32_t seed, void * out )

-{

-  uint32_t h = seed;

-

-  const uint8_t * data = (const uint8_t*)key;

-

-  for(int i = 0; i < len; i++)

-  {

-    h ^= h >> 3;

-    h ^= h << 5;

-    h ^= data[i];

-  }

-

-  *(uint32_t*)out = h;

-}

-

-void sumhash ( const void * key, int len, uint32_t seed, void * out )

-{

-  uint32_t h = seed;

-

-  const uint8_t * data = (const uint8_t*)key;

-

-  for(int i = 0; i < len; i++)

-  {

-    h += data[i];

-  }

-

-  *(uint32_t*)out = h;

-}

-

-void sumhash32 ( const void * key, int len, uint32_t seed, void * out )

-{

-  uint32_t h = seed;

-

-  const uint32_t * data = (const uint32_t*)key;

-

-  for(int i = 0; i < len/4; i++)

-  {

-    h += data[i];

-  }

-

-  *(uint32_t*)out = h;

-}

-

-void DoNothingHash ( const void *, int, uint32_t, void * )

-{

-}

-

-//-----------------------------------------------------------------------------

-// One-byte-at-a-time hash based on Murmur's mix

-

-uint32_t MurmurOAAT ( const void * key, int len, uint32_t seed )

-{

-  const uint8_t * data = (const uint8_t*)key;

-

-  uint32_t h = seed;

-

-  for(int i = 0; i < len; i++)

-  {

-    h ^= data[i];

-    h *= 0x5bd1e995;

-    h ^= h >> 15;

-  }

-

-  return h;

-}

-

-void MurmurOAAT_test ( const void * key, int len, uint32_t seed, void * out )

-{

-	*(uint32_t*)out = MurmurOAAT(key,len,seed);

-}

-

-//----------------------------------------------------------------------------

-

-void FNV ( const void * key, int len, uint32_t seed, void * out )

-{

-  unsigned int h = seed;

-

-  const uint8_t * data = (const uint8_t*)key;

-

-  h ^= BIG_CONSTANT(2166136261);

-

-  for(int i = 0; i < len; i++)

-  {

-    h ^= data[i];

-    h *= 16777619;

-  }

-

-  *(uint32_t*)out = h;

-}

-

-//-----------------------------------------------------------------------------

-

-uint32_t x17 ( const void * key, int len, uint32_t h ) 

-{

-  const uint8_t * data = (const uint8_t*)key;

-    

-  for(int i = 0; i < len; ++i) 

-  {

-        h = 17 * h + (data[i] - ' ');

-    }

-

-    return h ^ (h >> 16);

-}

-

-//-----------------------------------------------------------------------------

-

-uint32_t Bernstein ( const void * key, int len, uint32_t h ) 

-{

-  const uint8_t * data = (const uint8_t*)key;

-    

-  for(int i = 0; i < len; ++i) 

-  {

-        h = 33 * h + data[i];

-    }

-

-  return h;

-}

-

-//-----------------------------------------------------------------------------

-// Crap8 hash from http://www.team5150.com/~andrew/noncryptohashzoo/Crap8.html

-

-uint32_t Crap8( const uint8_t *key, uint32_t len, uint32_t seed ) {

-  #define c8fold( a, b, y, z ) { p = (uint32_t)(a) * (uint64_t)(b); y ^= (uint32_t)p; z ^= (uint32_t)(p >> 32); }

-  #define c8mix( in ) { h *= m; c8fold( in, m, k, h ); }

-

-  const uint32_t m = 0x83d2e73b, n = 0x97e1cc59, *key4 = (const uint32_t *)key;

-  uint32_t h = len + seed, k = n + len;

-  uint64_t p;

-

-  while ( len >= 8 ) { c8mix(key4[0]) c8mix(key4[1]) key4 += 2; len -= 8; }

-  if ( len >= 4 ) { c8mix(key4[0]) key4 += 1; len -= 4; }

-  if ( len ) { c8mix( key4[0] & ( ( 1 << ( len * 8 ) ) - 1 ) ) }

-  c8fold( h ^ k, n, k, k )

-  return k;

-}

-

-void Crap8_test ( const void * key, int len, uint32_t seed, void * out )

-{

-  *(uint32_t*)out = Crap8((const uint8_t*)key,len,seed);

-}

+#include "Hashes.h"
+
+#include "Random.h"
+
+
+#include <stdlib.h>
+//#include <stdint.h>
+#include <assert.h>
+//#include <emmintrin.h>
+//#include <xmmintrin.h>
+
+//----------------------------------------------------------------------------
+// fake / bad hashes
+
+void BadHash ( const void * key, int len, uint32_t seed, void * out )
+{
+  uint32_t h = seed;
+
+  const uint8_t * data = (const uint8_t*)key;
+
+  for(int i = 0; i < len; i++)
+  {
+    h ^= h >> 3;
+    h ^= h << 5;
+    h ^= data[i];
+  }
+
+  *(uint32_t*)out = h;
+}
+
+void sumhash ( const void * key, int len, uint32_t seed, void * out )
+{
+  uint32_t h = seed;
+
+  const uint8_t * data = (const uint8_t*)key;
+
+  for(int i = 0; i < len; i++)
+  {
+    h += data[i];
+  }
+
+  *(uint32_t*)out = h;
+}
+
+void sumhash32 ( const void * key, int len, uint32_t seed, void * out )
+{
+  uint32_t h = seed;
+
+  const uint32_t * data = (const uint32_t*)key;
+
+  for(int i = 0; i < len/4; i++)
+  {
+    h += data[i];
+  }
+
+  *(uint32_t*)out = h;
+}
+
+void DoNothingHash ( const void *, int, uint32_t, void * )
+{
+}
+
+//-----------------------------------------------------------------------------
+// One-byte-at-a-time hash based on Murmur's mix
+
+uint32_t MurmurOAAT ( const void * key, int len, uint32_t seed )
+{
+  const uint8_t * data = (const uint8_t*)key;
+
+  uint32_t h = seed;
+
+  for(int i = 0; i < len; i++)
+  {
+    h ^= data[i];
+    h *= 0x5bd1e995;
+    h ^= h >> 15;
+  }
+
+  return h;
+}
+
+void MurmurOAAT_test ( const void * key, int len, uint32_t seed, void * out )
+{
+	*(uint32_t*)out = MurmurOAAT(key,len,seed);
+}
+
+//----------------------------------------------------------------------------
+
+void FNV ( const void * key, int len, uint32_t seed, void * out )
+{
+  unsigned int h = seed;
+
+  const uint8_t * data = (const uint8_t*)key;
+
+  h ^= BIG_CONSTANT(2166136261);
+
+  for(int i = 0; i < len; i++)
+  {
+    h ^= data[i];
+    h *= 16777619;
+  }
+
+  *(uint32_t*)out = h;
+}
+
+//-----------------------------------------------------------------------------
+
+uint32_t x17 ( const void * key, int len, uint32_t h ) 
+{
+  const uint8_t * data = (const uint8_t*)key;
+    
+  for(int i = 0; i < len; ++i) 
+  {
+        h = 17 * h + (data[i] - ' ');
+    }
+
+    return h ^ (h >> 16);
+}
+
+//-----------------------------------------------------------------------------
+
+void Bernstein ( const void * key, int len, uint32_t seed, void * out ) 
+{
+  const uint8_t * data = (const uint8_t*)key;
+    
+  for(int i = 0; i < len; ++i) 
+  {
+        seed = 33 * seed + data[i];
+    }
+
+  *(uint32_t*)out = seed;
+}
+
+//-----------------------------------------------------------------------------
+// Crap8 hash from http://www.team5150.com/~andrew/noncryptohashzoo/Crap8.html
+
+uint32_t Crap8( const uint8_t *key, uint32_t len, uint32_t seed ) {
+  #define c8fold( a, b, y, z ) { p = (uint32_t)(a) * (uint64_t)(b); y ^= (uint32_t)p; z ^= (uint32_t)(p >> 32); }
+  #define c8mix( in ) { h *= m; c8fold( in, m, k, h ); }
+
+  const uint32_t m = 0x83d2e73b, n = 0x97e1cc59, *key4 = (const uint32_t *)key;
+  uint32_t h = len + seed, k = n + len;
+  uint64_t p;
+
+  while ( len >= 8 ) { c8mix(key4[0]) c8mix(key4[1]) key4 += 2; len -= 8; }
+  if ( len >= 4 ) { c8mix(key4[0]) key4 += 1; len -= 4; }
+  if ( len ) { c8mix( key4[0] & ( ( 1 << ( len * 8 ) ) - 1 ) ) }
+  c8fold( h ^ k, n, k, k )
+  return k;
+}
+
+void Crap8_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  *(uint32_t*)out = Crap8((const uint8_t*)key,len,seed);
+}
diff --git a/Hashes.h b/Hashes.h
index 2120cd8..6c04ae1 100644
--- a/Hashes.h
+++ b/Hashes.h
@@ -1,73 +1,78 @@
-#pragma once

-

-#include "Types.h"

-

-#include "MurmurHash1.h"

-#include "MurmurHash2.h"

-#include "MurmurHash3.h"

-

-//----------

-// These are _not_ hash functions (even though people tend to use crc32 as one...)

-

-void sumhash               ( const void * key, int len, uint32_t seed, void * out );

-void sumhash32             ( const void * key, int len, uint32_t seed, void * out );

-

-void DoNothingHash         ( const void * key, int len, uint32_t seed, void * out );

-void crc32                 ( const void * key, int len, uint32_t seed, void * out );

-

-void randhash_32           ( const void * key, int len, uint32_t seed, void * out );

-void randhash_64           ( const void * key, int len, uint32_t seed, void * out );

-void randhash_128          ( const void * key, int len, uint32_t seed, void * out );

-

-//----------

-// Cryptographic hashes

-

-void md5_32                ( const void * key, int len, uint32_t seed, void * out );

-void sha1_32a              ( const void * key, int len, uint32_t seed, void * out );

-

-//----------

-// General purpose hashes

-

-void FNV                   ( const void * key, int len, uint32_t seed, void * out );

-void SuperFastHash         ( const void * key, int len, uint32_t seed, void * out );

-void lookup3_test          ( const void * key, int len, uint32_t seed, void * out );

-void MurmurOAAT_test       ( const void * key, int len, uint32_t seed, void * out );

-void Crap8_test            ( const void * key, int len, uint32_t seed, void * out );

-void CityHash128_test      ( const void * key, int len, uint32_t seed, void * out );

-void CityHash64_test       ( const void * key, int len, uint32_t seed, void * out );

-

-uint32_t MurmurOAAT ( const void * key, int len, uint32_t seed );

-

-//----------

-// MurmurHash2

-

-void MurmurHash2_test      ( const void * key, int len, uint32_t seed, void * out );

-void MurmurHash2A_test     ( const void * key, int len, uint32_t seed, void * out );

-

-//-----------------------------------------------------------------------------

-// Test harnesses for Murmur1/2

-

-inline void MurmurHash1_test ( const void * key, int len, uint32_t seed, void * out )

-{

-  *(uint32_t*)out = MurmurHash1(key,len,seed);

-}

-

-inline void MurmurHash2_test ( const void * key, int len, uint32_t seed, void * out )

-{

-  *(uint32_t*)out = MurmurHash2(key,len,seed);

-}

-

-inline void MurmurHash2A_test ( const void * key, int len, uint32_t seed, void * out )

-{

-  *(uint32_t*)out = MurmurHash2A(key,len,seed);

-}

-

-inline void MurmurHash64A_test ( const void * key, int len, uint32_t seed, void * out )

-{

-  *(uint64_t*)out = MurmurHash64A(key,len,seed);

-}

-

-inline void MurmurHash64B_test ( const void * key, int len, uint32_t seed, void * out )

-{

-  *(uint64_t*)out = MurmurHash64B(key,len,seed);

-}
\ No newline at end of file
+#pragma once
+
+#include "Types.h"
+
+#include "MurmurHash1.h"
+#include "MurmurHash2.h"
+#include "MurmurHash3.h"
+
+//----------
+// These are _not_ hash functions (even though people tend to use crc32 as one...)
+
+void sumhash               ( const void * key, int len, uint32_t seed, void * out );
+void sumhash32             ( const void * key, int len, uint32_t seed, void * out );
+
+void DoNothingHash         ( const void * key, int len, uint32_t seed, void * out );
+void crc32                 ( const void * key, int len, uint32_t seed, void * out );
+
+void randhash_32           ( const void * key, int len, uint32_t seed, void * out );
+void randhash_64           ( const void * key, int len, uint32_t seed, void * out );
+void randhash_128          ( const void * key, int len, uint32_t seed, void * out );
+
+//----------
+// Cryptographic hashes
+
+void md5_32                ( const void * key, int len, uint32_t seed, void * out );
+void sha1_32a              ( const void * key, int len, uint32_t seed, void * out );
+
+//----------
+// General purpose hashes
+
+void FNV                   ( const void * key, int len, uint32_t seed, void * out );
+void Bernstein             ( const void * key, int len, uint32_t seed, void * out );
+void SuperFastHash         ( const void * key, int len, uint32_t seed, void * out );
+void lookup3_test          ( const void * key, int len, uint32_t seed, void * out );
+void MurmurOAAT_test       ( const void * key, int len, uint32_t seed, void * out );
+void Crap8_test            ( const void * key, int len, uint32_t seed, void * out );
+void CityHash128_test      ( const void * key, int len, uint32_t seed, void * out );
+void CityHash64_test       ( const void * key, int len, uint32_t seed, void * out );
+
+void SpookyHash32_test     ( const void * key, int len, uint32_t seed, void * out );
+void SpookyHash64_test     ( const void * key, int len, uint32_t seed, void * out );
+void SpookyHash128_test    ( const void * key, int len, uint32_t seed, void * out );
+
+uint32_t MurmurOAAT ( const void * key, int len, uint32_t seed );
+
+//----------
+// MurmurHash2
+
+void MurmurHash2_test      ( const void * key, int len, uint32_t seed, void * out );
+void MurmurHash2A_test     ( const void * key, int len, uint32_t seed, void * out );
+
+//-----------------------------------------------------------------------------
+// Test harnesses for Murmur1/2
+
+inline void MurmurHash1_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  *(uint32_t*)out = MurmurHash1(key,len,seed);
+}
+
+inline void MurmurHash2_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  *(uint32_t*)out = MurmurHash2(key,len,seed);
+}
+
+inline void MurmurHash2A_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  *(uint32_t*)out = MurmurHash2A(key,len,seed);
+}
+
+inline void MurmurHash64A_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  *(uint64_t*)out = MurmurHash64A(key,len,seed);
+}
+
+inline void MurmurHash64B_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  *(uint64_t*)out = MurmurHash64B(key,len,seed);
+}
diff --git a/KeysetTest.cpp b/KeysetTest.cpp
index 5561030..b3b8a4c 100644
--- a/KeysetTest.cpp
+++ b/KeysetTest.cpp
@@ -1,327 +1,327 @@
-#include "KeysetTest.h"

-

-#include "Platform.h"

-#include "Random.h"

-

-#include <map>

-#include <set>

-

-//-----------------------------------------------------------------------------

-// This should hopefully be a thorough and uambiguous test of whether a hash

-// is correctly implemented on a given platform

-

-bool VerificationTest ( pfHash hash, const int hashbits, uint32_t expected, bool verbose )

-{

-  const int hashbytes = hashbits / 8;

-

-  uint8_t * key    = new uint8_t[256];

-  uint8_t * hashes = new uint8_t[hashbytes * 256];

-  uint8_t * final  = new uint8_t[hashbytes];

-

-  memset(key,0,256);

-  memset(hashes,0,hashbytes*256);

-  memset(final,0,hashbytes);

-

-  // Hash keys of the form {0}, {0,1}, {0,1,2}... up to N=255,using 256-N as

-  // the seed

-

-  for(int i = 0; i < 256; i++)

-  {

-    key[i] = (uint8_t)i;

-

-    hash(key,i,256-i,&hashes[i*hashbytes]);

-  }

-

-  // Then hash the result array

-

-  hash(hashes,hashbytes*256,0,final);

-

-  // The first four bytes of that hash, interpreted as a little-endian integer, is our

-  // verification value

-

-  uint32_t verification = (final[0] << 0) | (final[1] << 8) | (final[2] << 16) | (final[3] << 24);

-

-  delete [] key;

-  delete [] hashes;

-  delete [] final;

-

-  //----------

-

-  if(expected != verification)

-  {

-    if(verbose) printf("Verification value 0x%08X : Failed! (Expected 0x%08x)\n",verification,expected);

-    return false;

-  }

-  else

-  {

-    if(verbose) printf("Verification value 0x%08X : Passed!\n",verification);

-    return true;

-  }

-}

-

-//----------------------------------------------------------------------------

-// Basic sanity checks -

-

-// A hash function should not be reading outside the bounds of the key.

-

-// Flipping a bit of a key should, with overwhelmingly high probability,

-// result in a different hash.

-

-// Hashing the same key twice should always produce the same result.

-

-// The memory alignment of the key should not affect the hash result.

-

-bool SanityTest ( pfHash hash, const int hashbits )

-{

-  printf("Running sanity check 1");

-  

-  Rand r(883741);

-

-  bool result = true;

-

-  const int hashbytes = hashbits/8;

-  const int reps = 10;

-  const int keymax = 128;

-  const int pad = 16;

-  const int buflen = keymax + pad*3;

-  

-  uint8_t * buffer1 = new uint8_t[buflen];

-  uint8_t * buffer2 = new uint8_t[buflen];

-

-  uint8_t * hash1 = new uint8_t[hashbytes];

-  uint8_t * hash2 = new uint8_t[hashbytes];

-

-  //----------

-  

-  for(int irep = 0; irep < reps; irep++)

-  {

-    if(irep % (reps/10) == 0) printf(".");

-

-    for(int len = 4; len <= keymax; len++)

-    {

-      for(int offset = pad; offset < pad*2; offset++)

-      {

-        uint8_t * key1 = &buffer1[pad];

-        uint8_t * key2 = &buffer2[pad+offset];

-

-        r.rand_p(buffer1,buflen);

-        r.rand_p(buffer2,buflen);

-

-        memcpy(key2,key1,len);

-

-        hash(key1,len,0,hash1);

-

-        for(int bit = 0; bit < (len * 8); bit++)

-        {

-          // Flip a bit, hash the key -> we should get a different result.

-

-          flipbit(key2,len,bit);

-          hash(key2,len,0,hash2);

-

-          if(memcmp(hash1,hash2,hashbytes) == 0)

-          {

-            result = false;

-          }

-

-          // Flip it back, hash again -> we should get the original result.

-

-          flipbit(key2,len,bit);

-          hash(key2,len,0,hash2);

-

-          if(memcmp(hash1,hash2,hashbytes) != 0)

-          {

-            result = false;

-          }

-        }

-      }

-    }

-  }

-

-  if(result == false)

-  {

-    printf("*********FAIL*********\n");

-  }

-  else

-  {

-    printf("PASS\n");

-  }

-

-  delete [] hash1;

-  delete [] hash2;

-

-  return result;

-}

-

-//----------------------------------------------------------------------------

-// Appending zero bytes to a key should always cause it to produce a different

-// hash value

-

-void AppendedZeroesTest ( pfHash hash, const int hashbits )

-{

-  printf("Running sanity check 2");

-  

-  Rand r(173994);

-

-  const int hashbytes = hashbits/8;

-

-  for(int rep = 0; rep < 100; rep++)

-  {

-    if(rep % 10 == 0) printf(".");

-

-    unsigned char key[256];

-

-    memset(key,0,sizeof(key));

-

-    r.rand_p(key,32);

-

-    uint32_t h1[16];

-    uint32_t h2[16];

-

-    memset(h1,0,hashbytes);

-    memset(h2,0,hashbytes);

-

-    for(int i = 0; i < 32; i++)

-    {

-      hash(key,32+i,0,h1);

-

-      if(memcmp(h1,h2,hashbytes) == 0)

-      {

-        printf("\n*********FAIL*********\n");

-        return;

-      }

-

-      memcpy(h2,h1,hashbytes);

-    }

-  }

-

-  printf("PASS\n");

-}

-

-//-----------------------------------------------------------------------------

-// Generate all keys of up to N bytes containing two non-zero bytes

-

-void TwoBytesKeygen ( int maxlen, KeyCallback & c )

-{

-  //----------

-  // Compute # of keys

-

-  int keycount = 0;

-

-  for(int i = 2; i <= maxlen; i++) keycount += (int)chooseK(i,2);

-

-  keycount *= 255*255;

-

-  for(int i = 2; i <= maxlen; i++) keycount += i*255;

-

-  printf("Keyset 'TwoBytes' - up-to-%d-byte keys, %d total keys\n",maxlen, keycount);

-

-  c.reserve(keycount);

-

-  //----------

-  // Add all keys with one non-zero byte

-

-  uint8_t key[256];

-

-  memset(key,0,256);

-

-  for(int keylen = 2; keylen <= maxlen; keylen++)

-  for(int byteA = 0; byteA < keylen; byteA++)

-  {

-    for(int valA = 1; valA <= 255; valA++)

-    {

-      key[byteA] = (uint8_t)valA;

-

-      c(key,keylen);

-    }

-

-    key[byteA] = 0;

-  }

-

-  //----------

-  // Add all keys with two non-zero bytes

-

-  for(int keylen = 2; keylen <= maxlen; keylen++)

-  for(int byteA = 0; byteA < keylen-1; byteA++)

-  for(int byteB = byteA+1; byteB < keylen; byteB++)

-  {

-    for(int valA = 1; valA <= 255; valA++)

-    {

-      key[byteA] = (uint8_t)valA;

-

-      for(int valB = 1; valB <= 255; valB++)

-      {

-        key[byteB] = (uint8_t)valB;

-        c(key,keylen);

-      }

-

-      key[byteB] = 0;

-    }

-

-    key[byteA] = 0;

-  }

-}

-

-//-----------------------------------------------------------------------------

-

-template< typename hashtype >

-void DumpCollisionMap ( CollisionMap<hashtype,ByteVec> & cmap )

-{

-  typedef CollisionMap<hashtype,ByteVec> cmap_t;

-

-  for(typename cmap_t::iterator it = cmap.begin(); it != cmap.end(); ++it)

-  {

-    const hashtype & hash = (*it).first;

-

-    printf("Hash - ");

-    printbytes(&hash,sizeof(hashtype));

-    printf("\n");

-

-    std::vector<ByteVec> & keys = (*it).second;

-

-    for(int i = 0; i < (int)keys.size(); i++)

-    {

-      ByteVec & key = keys[i];

-

-      printf("Key  - ");

-      printbytes(&key[0],(int)key.size());

-      printf("\n");

-    }

-    printf("\n");

-  }

-

-}

-

-// test code

-

-void ReportCollisions ( pfHash hash )

-{

-  printf("Hashing keyset\n");

-

-  std::vector<uint128_t> hashes;

-

-  HashCallback<uint128_t> c(hash,hashes);

-

-  TwoBytesKeygen(20,c);

-

-  printf("%d hashes\n",(int)hashes.size());

-

-  printf("Finding collisions\n");

-

-  HashSet<uint128_t> collisions;

-

-  FindCollisions(hashes,collisions,1000);

-

-  printf("%d collisions\n",(int)collisions.size());

-

-  printf("Mapping collisions\n");

-

-  CollisionMap<uint128_t,ByteVec> cmap;

-

-  CollisionCallback<uint128_t> c2(hash,collisions,cmap);

-

-  TwoBytesKeygen(20,c2);

-

-  printf("Dumping collisions\n");

-

-  DumpCollisionMap(cmap);

-}

+#include "KeysetTest.h"
+
+#include "Platform.h"
+#include "Random.h"
+
+#include <map>
+#include <set>
+
+//-----------------------------------------------------------------------------
+// This should hopefully be a thorough and uambiguous test of whether a hash
+// is correctly implemented on a given platform
+
+bool VerificationTest ( pfHash hash, const int hashbits, uint32_t expected, bool verbose )
+{
+  const int hashbytes = hashbits / 8;
+
+  uint8_t * key    = new uint8_t[256];
+  uint8_t * hashes = new uint8_t[hashbytes * 256];
+  uint8_t * final  = new uint8_t[hashbytes];
+
+  memset(key,0,256);
+  memset(hashes,0,hashbytes*256);
+  memset(final,0,hashbytes);
+
+  // Hash keys of the form {0}, {0,1}, {0,1,2}... up to N=255,using 256-N as
+  // the seed
+
+  for(int i = 0; i < 256; i++)
+  {
+    key[i] = (uint8_t)i;
+
+    hash(key,i,256-i,&hashes[i*hashbytes]);
+  }
+
+  // Then hash the result array
+
+  hash(hashes,hashbytes*256,0,final);
+
+  // The first four bytes of that hash, interpreted as a little-endian integer, is our
+  // verification value
+
+  uint32_t verification = (final[0] << 0) | (final[1] << 8) | (final[2] << 16) | (final[3] << 24);
+
+  delete [] key;
+  delete [] hashes;
+  delete [] final;
+
+  //----------
+
+  if(expected != verification)
+  {
+    if(verbose) printf("Verification value 0x%08X : Failed! (Expected 0x%08x)\n",verification,expected);
+    return false;
+  }
+  else
+  {
+    if(verbose) printf("Verification value 0x%08X : Passed!\n",verification);
+    return true;
+  }
+}
+
+//----------------------------------------------------------------------------
+// Basic sanity checks -
+
+// A hash function should not be reading outside the bounds of the key.
+
+// Flipping a bit of a key should, with overwhelmingly high probability,
+// result in a different hash.
+
+// Hashing the same key twice should always produce the same result.
+
+// The memory alignment of the key should not affect the hash result.
+
+bool SanityTest ( pfHash hash, const int hashbits )
+{
+  printf("Running sanity check 1");
+  
+  Rand r(883741);
+
+  bool result = true;
+
+  const int hashbytes = hashbits/8;
+  const int reps = 10;
+  const int keymax = 128;
+  const int pad = 16;
+  const int buflen = keymax + pad*3;
+  
+  uint8_t * buffer1 = new uint8_t[buflen];
+  uint8_t * buffer2 = new uint8_t[buflen];
+
+  uint8_t * hash1 = new uint8_t[hashbytes];
+  uint8_t * hash2 = new uint8_t[hashbytes];
+
+  //----------
+  
+  for(int irep = 0; irep < reps; irep++)
+  {
+    if(irep % (reps/10) == 0) printf(".");
+
+    for(int len = 4; len <= keymax; len++)
+    {
+      for(int offset = pad; offset < pad*2; offset++)
+      {
+        uint8_t * key1 = &buffer1[pad];
+        uint8_t * key2 = &buffer2[pad+offset];
+
+        r.rand_p(buffer1,buflen);
+        r.rand_p(buffer2,buflen);
+
+        memcpy(key2,key1,len);
+
+        hash(key1,len,0,hash1);
+
+        for(int bit = 0; bit < (len * 8); bit++)
+        {
+          // Flip a bit, hash the key -> we should get a different result.
+
+          flipbit(key2,len,bit);
+          hash(key2,len,0,hash2);
+
+          if(memcmp(hash1,hash2,hashbytes) == 0)
+          {
+            result = false;
+          }
+
+          // Flip it back, hash again -> we should get the original result.
+
+          flipbit(key2,len,bit);
+          hash(key2,len,0,hash2);
+
+          if(memcmp(hash1,hash2,hashbytes) != 0)
+          {
+            result = false;
+          }
+        }
+      }
+    }
+  }
+
+  if(result == false)
+  {
+    printf("*********FAIL*********\n");
+  }
+  else
+  {
+    printf("PASS\n");
+  }
+
+  delete [] hash1;
+  delete [] hash2;
+
+  return result;
+}
+
+//----------------------------------------------------------------------------
+// Appending zero bytes to a key should always cause it to produce a different
+// hash value
+
+void AppendedZeroesTest ( pfHash hash, const int hashbits )
+{
+  printf("Running sanity check 2");
+  
+  Rand r(173994);
+
+  const int hashbytes = hashbits/8;
+
+  for(int rep = 0; rep < 100; rep++)
+  {
+    if(rep % 10 == 0) printf(".");
+
+    unsigned char key[256];
+
+    memset(key,0,sizeof(key));
+
+    r.rand_p(key,32);
+
+    uint32_t h1[16];
+    uint32_t h2[16];
+
+    memset(h1,0,hashbytes);
+    memset(h2,0,hashbytes);
+
+    for(int i = 0; i < 32; i++)
+    {
+      hash(key,32+i,0,h1);
+
+      if(memcmp(h1,h2,hashbytes) == 0)
+      {
+        printf("\n*********FAIL*********\n");
+        return;
+      }
+
+      memcpy(h2,h1,hashbytes);
+    }
+  }
+
+  printf("PASS\n");
+}
+
+//-----------------------------------------------------------------------------
+// Generate all keys of up to N bytes containing two non-zero bytes
+
+void TwoBytesKeygen ( int maxlen, KeyCallback & c )
+{
+  //----------
+  // Compute # of keys
+
+  int keycount = 0;
+
+  for(int i = 2; i <= maxlen; i++) keycount += (int)chooseK(i,2);
+
+  keycount *= 255*255;
+
+  for(int i = 2; i <= maxlen; i++) keycount += i*255;
+
+  printf("Keyset 'TwoBytes' - up-to-%d-byte keys, %d total keys\n",maxlen, keycount);
+
+  c.reserve(keycount);
+
+  //----------
+  // Add all keys with one non-zero byte
+
+  uint8_t key[256];
+
+  memset(key,0,256);
+
+  for(int keylen = 2; keylen <= maxlen; keylen++)
+  for(int byteA = 0; byteA < keylen; byteA++)
+  {
+    for(int valA = 1; valA <= 255; valA++)
+    {
+      key[byteA] = (uint8_t)valA;
+
+      c(key,keylen);
+    }
+
+    key[byteA] = 0;
+  }
+
+  //----------
+  // Add all keys with two non-zero bytes
+
+  for(int keylen = 2; keylen <= maxlen; keylen++)
+  for(int byteA = 0; byteA < keylen-1; byteA++)
+  for(int byteB = byteA+1; byteB < keylen; byteB++)
+  {
+    for(int valA = 1; valA <= 255; valA++)
+    {
+      key[byteA] = (uint8_t)valA;
+
+      for(int valB = 1; valB <= 255; valB++)
+      {
+        key[byteB] = (uint8_t)valB;
+        c(key,keylen);
+      }
+
+      key[byteB] = 0;
+    }
+
+    key[byteA] = 0;
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+template< typename hashtype >
+void DumpCollisionMap ( CollisionMap<hashtype,ByteVec> & cmap )
+{
+  typedef CollisionMap<hashtype,ByteVec> cmap_t;
+
+  for(typename cmap_t::iterator it = cmap.begin(); it != cmap.end(); ++it)
+  {
+    const hashtype & hash = (*it).first;
+
+    printf("Hash - ");
+    printbytes(&hash,sizeof(hashtype));
+    printf("\n");
+
+    std::vector<ByteVec> & keys = (*it).second;
+
+    for(int i = 0; i < (int)keys.size(); i++)
+    {
+      ByteVec & key = keys[i];
+
+      printf("Key  - ");
+      printbytes(&key[0],(int)key.size());
+      printf("\n");
+    }
+    printf("\n");
+  }
+
+}
+
+// test code
+
+void ReportCollisions ( pfHash hash )
+{
+  printf("Hashing keyset\n");
+
+  std::vector<uint128_t> hashes;
+
+  HashCallback<uint128_t> c(hash,hashes);
+
+  TwoBytesKeygen(20,c);
+
+  printf("%d hashes\n",(int)hashes.size());
+
+  printf("Finding collisions\n");
+
+  HashSet<uint128_t> collisions;
+
+  FindCollisions(hashes,collisions,1000);
+
+  printf("%d collisions\n",(int)collisions.size());
+
+  printf("Mapping collisions\n");
+
+  CollisionMap<uint128_t,ByteVec> cmap;
+
+  CollisionCallback<uint128_t> c2(hash,collisions,cmap);
+
+  TwoBytesKeygen(20,c2);
+
+  printf("Dumping collisions\n");
+
+  DumpCollisionMap(cmap);
+}
diff --git a/KeysetTest.h b/KeysetTest.h
index 55d5d5f..dce54d2 100644
--- a/KeysetTest.h
+++ b/KeysetTest.h
@@ -1,439 +1,439 @@
-//-----------------------------------------------------------------------------

-// Keyset tests generate various sorts of difficult-to-hash keysets and compare

-// the distribution and collision frequency of the hash results against an

-// ideal random distribution

-

-// The sanity checks are also in this cpp/h

-

-#pragma once

-

-#include "Types.h"

-#include "Stats.h"

-#include "Random.h"   // for rand_p

-

-#include <algorithm>  // for std::swap

-#include <assert.h>

-

-//-----------------------------------------------------------------------------

-// Sanity tests

-

-bool VerificationTest   ( pfHash hash, const int hashbits, uint32_t expected, bool verbose );

-bool SanityTest         ( pfHash hash, const int hashbits );

-void AppendedZeroesTest ( pfHash hash, const int hashbits );

-

-//-----------------------------------------------------------------------------

-// Keyset 'Combination' - all possible combinations of input blocks

-

-template< typename hashtype >

-void CombinationKeygenRecurse ( uint32_t * key, int len, int maxlen, 

-                  uint32_t * blocks, int blockcount, 

-                pfHash hash, std::vector<hashtype> & hashes )

-{

-  if(len == maxlen) return;

-

-  for(int i = 0; i < blockcount; i++)

-  {

-    key[len] = blocks[i];

-  

-    //if(len == maxlen-1)

-    {

-      hashtype h;

-      hash(key,(len+1) * sizeof(uint32_t),0,&h);

-      hashes.push_back(h);

-    }

-

-    //else

-    {

-      CombinationKeygenRecurse(key,len+1,maxlen,blocks,blockcount,hash,hashes);

-    }

-  }

-}

-

-template< typename hashtype >

-bool CombinationKeyTest ( hashfunc<hashtype> hash, int maxlen, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )

-{

-  printf("Keyset 'Combination' - up to %d blocks from a set of %d - ",maxlen,blockcount);

-

-  //----------

-

-  std::vector<hashtype> hashes;

-

-  uint32_t * key = new uint32_t[maxlen];

-

-  CombinationKeygenRecurse<hashtype>(key,0,maxlen,blocks,blockcount,hash,hashes);

-

-  delete [] key;

-

-  printf("%d keys\n",(int)hashes.size());

-

-  //----------

-

-  bool result = true;

-

-  result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);

-  

-  printf("\n");

-

-  return result;

-}

-

-//----------------------------------------------------------------------------

-// Keyset 'Permutation' - given a set of 32-bit blocks, generate keys

-// consisting of all possible permutations of those blocks

-

-template< typename hashtype >

-void PermutationKeygenRecurse ( pfHash hash, uint32_t * blocks, int blockcount, int k, std::vector<hashtype> & hashes )

-{

-  if(k == blockcount-1)

-  {

-    hashtype h;

-

-    hash(blocks,blockcount * sizeof(uint32_t),0,&h);

-

-    hashes.push_back(h);

-

-    return;

-  }

-

-  for(int i = k; i < blockcount; i++)

-  {

-    std::swap(blocks[k],blocks[i]);

-

-    PermutationKeygenRecurse(hash,blocks,blockcount,k+1,hashes);

-

-    std::swap(blocks[k],blocks[i]);

-  }

-}

-

-template< typename hashtype >

-bool PermutationKeyTest ( hashfunc<hashtype> hash, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )

-{

-  printf("Keyset 'Permutation' - %d blocks - ",blockcount);

-

-  //----------

-

-  std::vector<hashtype> hashes;

-

-  PermutationKeygenRecurse<hashtype>(hash,blocks,blockcount,0,hashes);

-

-  printf("%d keys\n",(int)hashes.size());

-

-  //----------

-

-  bool result = true;

-

-  result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);

-  

-  printf("\n");

-

-  return result;

-}

-

-//-----------------------------------------------------------------------------

-// Keyset 'Sparse' - generate all possible N-bit keys with up to K bits set

-

-template < typename keytype, typename hashtype >

-void SparseKeygenRecurse ( pfHash hash, int start, int bitsleft, bool inclusive, keytype & k, std::vector<hashtype> & hashes )

-{

-  const int nbytes = sizeof(keytype);

-  const int nbits = nbytes * 8;

-

-  hashtype h;

-

-  for(int i = start; i < nbits; i++)

-  {

-    flipbit(&k,nbytes,i);

-

-    if(inclusive || (bitsleft == 1))

-    {

-      hash(&k,sizeof(keytype),0,&h);

-      hashes.push_back(h);

-    }

-

-    if(bitsleft > 1)

-    {

-      SparseKeygenRecurse(hash,i+1,bitsleft-1,inclusive,k,hashes);

-    }

-

-    flipbit(&k,nbytes,i);

-  }

-}

-

-//----------

-

-template < int keybits, typename hashtype >

-bool SparseKeyTest ( hashfunc<hashtype> hash, const int setbits, bool inclusive, bool testColl, bool testDist, bool drawDiagram  )

-{

-  printf("Keyset 'Sparse' - %d-bit keys with %s %d bits set - ",keybits, inclusive ? "up to" : "exactly", setbits);

-

-  typedef Blob<keybits> keytype;

-

-  std::vector<hashtype> hashes;

-

-  keytype k;

-  memset(&k,0,sizeof(k));

-

-  if(inclusive)

-  {

-    hashtype h;

-

-    hash(&k,sizeof(keytype),0,&h);

-

-    hashes.push_back(h);

-  }

-

-  SparseKeygenRecurse(hash,0,setbits,inclusive,k,hashes);

-

-  printf("%d keys\n",(int)hashes.size());

-

-  bool result = true;

-  

-  result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);

-

-  printf("\n");

-

-  return result;

-}

-

-//-----------------------------------------------------------------------------

-// Keyset 'Windows' - for all possible N-bit windows of a K-bit key, generate

-// all possible keys with bits set in that window

-

-template < typename keytype, typename hashtype >

-bool WindowedKeyTest ( hashfunc<hashtype> hash, const int windowbits, bool testCollision, bool testDistribution, bool drawDiagram )

-{

-  const int keybits = sizeof(keytype) * 8;

-  const int keycount = 1 << windowbits;

-

-  std::vector<hashtype> hashes;

-  hashes.resize(keycount);

-

-  bool result = true;

-

-  int testcount = keybits;

-

-  printf("Keyset 'Windowed' - %3d-bit key, %3d-bit window - %d tests, %d keys per test\n",keybits,windowbits,testcount,keycount);

-

-  for(int j = 0; j <= testcount; j++)

-  {

-    int minbit = j;

-

-    keytype key;

-

-    for(int i = 0; i < keycount; i++)

-    {

-      key = i;

-      //key = key << minbit;

-

-      lrot(&key,sizeof(keytype),minbit);

-

-      hash(&key,sizeof(keytype),0,&hashes[i]);

-    }

-

-    printf("Window at %3d - ",j);

-

-    result &= TestHashList(hashes,testCollision,testDistribution,drawDiagram);

-

-    //printf("\n");

-  }

-

-  return result;

-}

-

-//-----------------------------------------------------------------------------

-// Keyset 'Cyclic' - generate keys that consist solely of N repetitions of M

-// bytes.

-

-// (This keyset type is designed to make MurmurHash2 fail)

-

-template < typename hashtype >

-bool CyclicKeyTest ( pfHash hash, int cycleLen, int cycleReps, const int keycount, bool drawDiagram )

-{

-  printf("Keyset 'Cyclic' - %d cycles of %d bytes - %d keys\n",cycleReps,cycleLen,keycount);

-

-  Rand r(483723);

-

-  std::vector<hashtype> hashes;

-  hashes.resize(keycount);

-

-  int keyLen = cycleLen * cycleReps;

-

-  uint8_t * cycle = new uint8_t[cycleLen + 16];

-  uint8_t * key = new uint8_t[keyLen];

-

-  //----------

-

-  for(int i = 0; i < keycount; i++)

-  {

-    r.rand_p(cycle,cycleLen);

-

-    *(uint32_t*)cycle = f3mix(i ^ 0x746a94f1);

-

-    for(int j = 0; j < keyLen; j++)

-    {

-      key[j] = cycle[j % cycleLen];

-    }

-

-    hash(key,keyLen,0,&hashes[i]);

-  }

-

-  //----------

-  

-  bool result = true;

-

-  result &= TestHashList(hashes,true,true,drawDiagram);

-  printf("\n");

-

-  delete [] cycle;

-  delete [] key;

-

-  return result;

-}

-

-//-----------------------------------------------------------------------------

-// Keyset 'TwoBytes' - generate all keys up to length N with two non-zero bytes

-

-void TwoBytesKeygen ( int maxlen, KeyCallback & c );

-

-template < typename hashtype >

-bool TwoBytesTest2 ( pfHash hash, int maxlen, bool drawDiagram )

-{

-  std::vector<hashtype> hashes;

-

-  HashCallback<hashtype> c(hash,hashes);

-

-  TwoBytesKeygen(maxlen,c);

-

-  bool result = true;

-

-  result &= TestHashList(hashes,true,true,drawDiagram);

-  printf("\n");

-

-  return result;

-}

-

-//-----------------------------------------------------------------------------

-// Keyset 'Text' - generate all keys of the form "prefix"+"core"+"suffix",

-// where "core" consists of all possible combinations of the given character

-// set of length N.

-

-template < typename hashtype >

-bool TextKeyTest ( hashfunc<hashtype> hash, const char * prefix, const char * coreset, const int corelen, const char * suffix, bool drawDiagram )

-{

-  const int prefixlen = (int)strlen(prefix);

-  const int suffixlen = (int)strlen(suffix);

-  const int corecount = (int)strlen(coreset);

-

-  const int keybytes = prefixlen + corelen + suffixlen;

-  const int keycount = (int)pow(double(corecount),double(corelen));

-

-  printf("Keyset 'Text' - keys of form \"%s[",prefix);

-  for(int i = 0; i < corelen; i++) printf("X");		

-  printf("]%s\" - %d keys\n",suffix,keycount);

-

-  uint8_t * key = new uint8_t[keybytes+1];

-

-  key[keybytes] = 0;

-

-  memcpy(key,prefix,prefixlen);

-  memcpy(key+prefixlen+corelen,suffix,suffixlen);

-

-  //----------

-

-  std::vector<hashtype> hashes;

-  hashes.resize(keycount);

-

-  for(int i = 0; i < keycount; i++)

-  {

-    int t = i;

-

-    for(int j = 0; j < corelen; j++)

-    {

-      key[prefixlen+j] = coreset[t % corecount]; t /= corecount;

-    }

-

-    hash(key,keybytes,0,&hashes[i]);

-  }

-

-  //----------

-

-  bool result = true;

-

-  result &= TestHashList(hashes,true,true,drawDiagram);

-

-  printf("\n");

-

-  delete [] key;

-

-  return result;

-}

-

-//-----------------------------------------------------------------------------

-// Keyset 'Zeroes' - keys consisting of all zeroes, differing only in length

-

-// We reuse one block of empty bytes, otherwise the RAM cost is enormous.

-

-template < typename hashtype >

-bool ZeroKeyTest ( pfHash hash, bool drawDiagram )

-{

-  int keycount = 64*1024;

-

-  printf("Keyset 'Zeroes' - %d keys\n",keycount);

-

-  unsigned char * nullblock = new unsigned char[keycount];

-  memset(nullblock,0,keycount);

-

-  //----------

-

-  std::vector<hashtype> hashes;

-

-  hashes.resize(keycount);

-

-  for(int i = 0; i < keycount; i++)

-  {

-    hash(nullblock,i,0,&hashes[i]);

-  }

-

-  bool result = true;

-

-  result &= TestHashList(hashes,true,true,drawDiagram);

-

-  printf("\n");

-

-  delete [] nullblock;

-

-  return result;

-}

-

-//-----------------------------------------------------------------------------

-// Keyset 'Seed' - hash "the quick brown fox..." using different seeds

-

-template < typename hashtype >

-bool SeedTest ( pfHash hash, int keycount, bool drawDiagram )

-{

-  printf("Keyset 'Seed' - %d keys\n",keycount);

-

-  const char * text = "The quick brown fox jumps over the lazy dog";

-  const int len = (int)strlen(text);

-

-  //----------

-

-  std::vector<hashtype> hashes;

-

-  hashes.resize(keycount);

-

-  for(int i = 0; i < keycount; i++)

-  {

-    hash(text,len,i,&hashes[i]);

-  }

-

-  bool result = true;

-

-  result &= TestHashList(hashes,true,true,drawDiagram);

-

-  printf("\n");

-

-  return result;

-}

-

-//-----------------------------------------------------------------------------

+//-----------------------------------------------------------------------------
+// Keyset tests generate various sorts of difficult-to-hash keysets and compare
+// the distribution and collision frequency of the hash results against an
+// ideal random distribution
+
+// The sanity checks are also in this cpp/h
+
+#pragma once
+
+#include "Types.h"
+#include "Stats.h"
+#include "Random.h"   // for rand_p
+
+#include <algorithm>  // for std::swap
+#include <assert.h>
+
+//-----------------------------------------------------------------------------
+// Sanity tests
+
+bool VerificationTest   ( pfHash hash, const int hashbits, uint32_t expected, bool verbose );
+bool SanityTest         ( pfHash hash, const int hashbits );
+void AppendedZeroesTest ( pfHash hash, const int hashbits );
+
+//-----------------------------------------------------------------------------
+// Keyset 'Combination' - all possible combinations of input blocks
+
+template< typename hashtype >
+void CombinationKeygenRecurse ( uint32_t * key, int len, int maxlen, 
+                  uint32_t * blocks, int blockcount, 
+                pfHash hash, std::vector<hashtype> & hashes )
+{
+  if(len == maxlen) return;
+
+  for(int i = 0; i < blockcount; i++)
+  {
+    key[len] = blocks[i];
+  
+    //if(len == maxlen-1)
+    {
+      hashtype h;
+      hash(key,(len+1) * sizeof(uint32_t),0,&h);
+      hashes.push_back(h);
+    }
+
+    //else
+    {
+      CombinationKeygenRecurse(key,len+1,maxlen,blocks,blockcount,hash,hashes);
+    }
+  }
+}
+
+template< typename hashtype >
+bool CombinationKeyTest ( hashfunc<hashtype> hash, int maxlen, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
+{
+  printf("Keyset 'Combination' - up to %d blocks from a set of %d - ",maxlen,blockcount);
+
+  //----------
+
+  std::vector<hashtype> hashes;
+
+  uint32_t * key = new uint32_t[maxlen];
+
+  CombinationKeygenRecurse<hashtype>(key,0,maxlen,blocks,blockcount,hash,hashes);
+
+  delete [] key;
+
+  printf("%d keys\n",(int)hashes.size());
+
+  //----------
+
+  bool result = true;
+
+  result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
+  
+  printf("\n");
+
+  return result;
+}
+
+//----------------------------------------------------------------------------
+// Keyset 'Permutation' - given a set of 32-bit blocks, generate keys
+// consisting of all possible permutations of those blocks
+
+template< typename hashtype >
+void PermutationKeygenRecurse ( pfHash hash, uint32_t * blocks, int blockcount, int k, std::vector<hashtype> & hashes )
+{
+  if(k == blockcount-1)
+  {
+    hashtype h;
+
+    hash(blocks,blockcount * sizeof(uint32_t),0,&h);
+
+    hashes.push_back(h);
+
+    return;
+  }
+
+  for(int i = k; i < blockcount; i++)
+  {
+    std::swap(blocks[k],blocks[i]);
+
+    PermutationKeygenRecurse(hash,blocks,blockcount,k+1,hashes);
+
+    std::swap(blocks[k],blocks[i]);
+  }
+}
+
+template< typename hashtype >
+bool PermutationKeyTest ( hashfunc<hashtype> hash, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
+{
+  printf("Keyset 'Permutation' - %d blocks - ",blockcount);
+
+  //----------
+
+  std::vector<hashtype> hashes;
+
+  PermutationKeygenRecurse<hashtype>(hash,blocks,blockcount,0,hashes);
+
+  printf("%d keys\n",(int)hashes.size());
+
+  //----------
+
+  bool result = true;
+
+  result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
+  
+  printf("\n");
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'Sparse' - generate all possible N-bit keys with up to K bits set
+
+template < typename keytype, typename hashtype >
+void SparseKeygenRecurse ( pfHash hash, int start, int bitsleft, bool inclusive, keytype & k, std::vector<hashtype> & hashes )
+{
+  const int nbytes = sizeof(keytype);
+  const int nbits = nbytes * 8;
+
+  hashtype h;
+
+  for(int i = start; i < nbits; i++)
+  {
+    flipbit(&k,nbytes,i);
+
+    if(inclusive || (bitsleft == 1))
+    {
+      hash(&k,sizeof(keytype),0,&h);
+      hashes.push_back(h);
+    }
+
+    if(bitsleft > 1)
+    {
+      SparseKeygenRecurse(hash,i+1,bitsleft-1,inclusive,k,hashes);
+    }
+
+    flipbit(&k,nbytes,i);
+  }
+}
+
+//----------
+
+template < int keybits, typename hashtype >
+bool SparseKeyTest ( hashfunc<hashtype> hash, const int setbits, bool inclusive, bool testColl, bool testDist, bool drawDiagram  )
+{
+  printf("Keyset 'Sparse' - %d-bit keys with %s %d bits set - ",keybits, inclusive ? "up to" : "exactly", setbits);
+
+  typedef Blob<keybits> keytype;
+
+  std::vector<hashtype> hashes;
+
+  keytype k;
+  memset(&k,0,sizeof(k));
+
+  if(inclusive)
+  {
+    hashtype h;
+
+    hash(&k,sizeof(keytype),0,&h);
+
+    hashes.push_back(h);
+  }
+
+  SparseKeygenRecurse(hash,0,setbits,inclusive,k,hashes);
+
+  printf("%d keys\n",(int)hashes.size());
+
+  bool result = true;
+  
+  result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
+
+  printf("\n");
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'Windows' - for all possible N-bit windows of a K-bit key, generate
+// all possible keys with bits set in that window
+
+template < typename keytype, typename hashtype >
+bool WindowedKeyTest ( hashfunc<hashtype> hash, const int windowbits, bool testCollision, bool testDistribution, bool drawDiagram )
+{
+  const int keybits = sizeof(keytype) * 8;
+  const int keycount = 1 << windowbits;
+
+  std::vector<hashtype> hashes;
+  hashes.resize(keycount);
+
+  bool result = true;
+
+  int testcount = keybits;
+
+  printf("Keyset 'Windowed' - %3d-bit key, %3d-bit window - %d tests, %d keys per test\n",keybits,windowbits,testcount,keycount);
+
+  for(int j = 0; j <= testcount; j++)
+  {
+    int minbit = j;
+
+    keytype key;
+
+    for(int i = 0; i < keycount; i++)
+    {
+      key = i;
+      //key = key << minbit;
+
+      lrot(&key,sizeof(keytype),minbit);
+
+      hash(&key,sizeof(keytype),0,&hashes[i]);
+    }
+
+    printf("Window at %3d - ",j);
+
+    result &= TestHashList(hashes,testCollision,testDistribution,drawDiagram);
+
+    //printf("\n");
+  }
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'Cyclic' - generate keys that consist solely of N repetitions of M
+// bytes.
+
+// (This keyset type is designed to make MurmurHash2 fail)
+
+template < typename hashtype >
+bool CyclicKeyTest ( pfHash hash, int cycleLen, int cycleReps, const int keycount, bool drawDiagram )
+{
+  printf("Keyset 'Cyclic' - %d cycles of %d bytes - %d keys\n",cycleReps,cycleLen,keycount);
+
+  Rand r(483723);
+
+  std::vector<hashtype> hashes;
+  hashes.resize(keycount);
+
+  int keyLen = cycleLen * cycleReps;
+
+  uint8_t * cycle = new uint8_t[cycleLen + 16];
+  uint8_t * key = new uint8_t[keyLen];
+
+  //----------
+
+  for(int i = 0; i < keycount; i++)
+  {
+    r.rand_p(cycle,cycleLen);
+
+    *(uint32_t*)cycle = f3mix(i ^ 0x746a94f1);
+
+    for(int j = 0; j < keyLen; j++)
+    {
+      key[j] = cycle[j % cycleLen];
+    }
+
+    hash(key,keyLen,0,&hashes[i]);
+  }
+
+  //----------
+  
+  bool result = true;
+
+  result &= TestHashList(hashes,true,true,drawDiagram);
+  printf("\n");
+
+  delete [] cycle;
+  delete [] key;
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'TwoBytes' - generate all keys up to length N with two non-zero bytes
+
+void TwoBytesKeygen ( int maxlen, KeyCallback & c );
+
+template < typename hashtype >
+bool TwoBytesTest2 ( pfHash hash, int maxlen, bool drawDiagram )
+{
+  std::vector<hashtype> hashes;
+
+  HashCallback<hashtype> c(hash,hashes);
+
+  TwoBytesKeygen(maxlen,c);
+
+  bool result = true;
+
+  result &= TestHashList(hashes,true,true,drawDiagram);
+  printf("\n");
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'Text' - generate all keys of the form "prefix"+"core"+"suffix",
+// where "core" consists of all possible combinations of the given character
+// set of length N.
+
+template < typename hashtype >
+bool TextKeyTest ( hashfunc<hashtype> hash, const char * prefix, const char * coreset, const int corelen, const char * suffix, bool drawDiagram )
+{
+  const int prefixlen = (int)strlen(prefix);
+  const int suffixlen = (int)strlen(suffix);
+  const int corecount = (int)strlen(coreset);
+
+  const int keybytes = prefixlen + corelen + suffixlen;
+  const int keycount = (int)pow(double(corecount),double(corelen));
+
+  printf("Keyset 'Text' - keys of form \"%s[",prefix);
+  for(int i = 0; i < corelen; i++) printf("X");		
+  printf("]%s\" - %d keys\n",suffix,keycount);
+
+  uint8_t * key = new uint8_t[keybytes+1];
+
+  key[keybytes] = 0;
+
+  memcpy(key,prefix,prefixlen);
+  memcpy(key+prefixlen+corelen,suffix,suffixlen);
+
+  //----------
+
+  std::vector<hashtype> hashes;
+  hashes.resize(keycount);
+
+  for(int i = 0; i < keycount; i++)
+  {
+    int t = i;
+
+    for(int j = 0; j < corelen; j++)
+    {
+      key[prefixlen+j] = coreset[t % corecount]; t /= corecount;
+    }
+
+    hash(key,keybytes,0,&hashes[i]);
+  }
+
+  //----------
+
+  bool result = true;
+
+  result &= TestHashList(hashes,true,true,drawDiagram);
+
+  printf("\n");
+
+  delete [] key;
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'Zeroes' - keys consisting of all zeroes, differing only in length
+
+// We reuse one block of empty bytes, otherwise the RAM cost is enormous.
+
+template < typename hashtype >
+bool ZeroKeyTest ( pfHash hash, bool drawDiagram )
+{
+  int keycount = 64*1024;
+
+  printf("Keyset 'Zeroes' - %d keys\n",keycount);
+
+  unsigned char * nullblock = new unsigned char[keycount];
+  memset(nullblock,0,keycount);
+
+  //----------
+
+  std::vector<hashtype> hashes;
+
+  hashes.resize(keycount);
+
+  for(int i = 0; i < keycount; i++)
+  {
+    hash(nullblock,i,0,&hashes[i]);
+  }
+
+  bool result = true;
+
+  result &= TestHashList(hashes,true,true,drawDiagram);
+
+  printf("\n");
+
+  delete [] nullblock;
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Keyset 'Seed' - hash "the quick brown fox..." using different seeds
+
+template < typename hashtype >
+bool SeedTest ( pfHash hash, int keycount, bool drawDiagram )
+{
+  printf("Keyset 'Seed' - %d keys\n",keycount);
+
+  const char * text = "The quick brown fox jumps over the lazy dog";
+  const int len = (int)strlen(text);
+
+  //----------
+
+  std::vector<hashtype> hashes;
+
+  hashes.resize(keycount);
+
+  for(int i = 0; i < keycount; i++)
+  {
+    hash(text,len,i,&hashes[i]);
+  }
+
+  bool result = true;
+
+  result &= TestHashList(hashes,true,true,drawDiagram);
+
+  printf("\n");
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
diff --git a/MurmurHash1.cpp b/MurmurHash1.cpp
index b21e9f7..8225566 100644
--- a/MurmurHash1.cpp
+++ b/MurmurHash1.cpp
@@ -1,174 +1,174 @@
-//-----------------------------------------------------------------------------

-// MurmurHash was written by Austin Appleby, and is placed in the public

-// domain. The author hereby disclaims copyright to this source code.

-

-// Note - This code makes a few assumptions about how your machine behaves -

-

-// 1. We can read a 4-byte value from any address without crashing

-// 2. sizeof(int) == 4

-

-// And it has a few limitations -

-

-// 1. It will not work incrementally.

-// 2. It will not produce the same results on little-endian and big-endian

-//    machines.

-

-#include "MurmurHash1.h"

-

-//-----------------------------------------------------------------------------

-

-uint32_t MurmurHash1 ( const void * key, int len, uint32_t seed )

-{

-  const unsigned int m = 0xc6a4a793;

-

-  const int r = 16;

-

-  unsigned int h = seed ^ (len * m);

-

-  //----------

-  

-  const unsigned char * data = (const unsigned char *)key;

-

-  while(len >= 4)

-  {

-    unsigned int k = *(unsigned int *)data;

-

-    h += k;

-    h *= m;

-    h ^= h >> 16;

-

-    data += 4;

-    len -= 4;

-  }

-  

-  //----------

-  

-  switch(len)

-  {

-  case 3:

-    h += data[2] << 16;

-  case 2:

-    h += data[1] << 8;

-  case 1:

-    h += data[0];

-    h *= m;

-    h ^= h >> r;

-  };

- 

-  //----------

-

-  h *= m;

-  h ^= h >> 10;

-  h *= m;

-  h ^= h >> 17;

-

-  return h;

-} 

-

-//-----------------------------------------------------------------------------

-// MurmurHash1Aligned, by Austin Appleby

-

-// Same algorithm as MurmurHash1, but only does aligned reads - should be safer

-// on certain platforms. 

-

-// Performance should be equal to or better than the simple version.

-

-unsigned int MurmurHash1Aligned ( const void * key, int len, unsigned int seed )

-{

-  const unsigned int m = 0xc6a4a793;

-  const int r = 16;

-

-  const unsigned char * data = (const unsigned char *)key;

-

-  unsigned int h = seed ^ (len * m);

-

-  int align = (uint64_t)data & 3;

-

-  if(align && (len >= 4))

-  {

-    // Pre-load the temp registers

-

-    unsigned int t = 0, d = 0;

-

-    switch(align)

-    {

-      case 1: t |= data[2] << 16;

-      case 2: t |= data[1] << 8;

-      case 3: t |= data[0];

-    }

-

-    t <<= (8 * align);

-

-    data += 4-align;

-    len -= 4-align;

-

-    int sl = 8 * (4-align);

-    int sr = 8 * align;

-

-    // Mix

-

-    while(len >= 4)

-    {

-      d = *(unsigned int *)data;

-      t = (t >> sr) | (d << sl);

-      h += t;

-      h *= m;

-      h ^= h >> r;

-      t = d;

-

-      data += 4;

-      len -= 4;

-    }

-

-    // Handle leftover data in temp registers

-

-    int pack = len < align ? len : align;

-

-    d = 0;

-

-    switch(pack)

-    {

-    case 3: d |= data[2] << 16;

-    case 2: d |= data[1] << 8;

-    case 1: d |= data[0];

-    case 0: h += (t >> sr) | (d << sl);

-        h *= m;

-        h ^= h >> r;

-    }

-

-    data += pack;

-    len -= pack;

-  }

-  else

-  {

-    while(len >= 4)

-    {

-      h += *(unsigned int *)data;

-      h *= m;

-      h ^= h >> r;

-

-      data += 4;

-      len -= 4;

-    }

-  }

-

-  //----------

-  // Handle tail bytes

-

-  switch(len)

-  {

-  case 3: h += data[2] << 16;

-  case 2: h += data[1] << 8;

-  case 1: h += data[0];

-      h *= m;

-      h ^= h >> r;

-  };

-

-  h *= m;

-  h ^= h >> 10;

-  h *= m;

-  h ^= h >> 17;

-

-  return h;

-}

-

+//-----------------------------------------------------------------------------
+// MurmurHash was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - This code makes a few assumptions about how your machine behaves -
+
+// 1. We can read a 4-byte value from any address without crashing
+// 2. sizeof(int) == 4
+
+// And it has a few limitations -
+
+// 1. It will not work incrementally.
+// 2. It will not produce the same results on little-endian and big-endian
+//    machines.
+
+#include "MurmurHash1.h"
+
+//-----------------------------------------------------------------------------
+
+uint32_t MurmurHash1 ( const void * key, int len, uint32_t seed )
+{
+  const unsigned int m = 0xc6a4a793;
+
+  const int r = 16;
+
+  unsigned int h = seed ^ (len * m);
+
+  //----------
+  
+  const unsigned char * data = (const unsigned char *)key;
+
+  while(len >= 4)
+  {
+    unsigned int k = *(unsigned int *)data;
+
+    h += k;
+    h *= m;
+    h ^= h >> 16;
+
+    data += 4;
+    len -= 4;
+  }
+  
+  //----------
+  
+  switch(len)
+  {
+  case 3:
+    h += data[2] << 16;
+  case 2:
+    h += data[1] << 8;
+  case 1:
+    h += data[0];
+    h *= m;
+    h ^= h >> r;
+  };
+ 
+  //----------
+
+  h *= m;
+  h ^= h >> 10;
+  h *= m;
+  h ^= h >> 17;
+
+  return h;
+} 
+
+//-----------------------------------------------------------------------------
+// MurmurHash1Aligned, by Austin Appleby
+
+// Same algorithm as MurmurHash1, but only does aligned reads - should be safer
+// on certain platforms. 
+
+// Performance should be equal to or better than the simple version.
+
+unsigned int MurmurHash1Aligned ( const void * key, int len, unsigned int seed )
+{
+  const unsigned int m = 0xc6a4a793;
+  const int r = 16;
+
+  const unsigned char * data = (const unsigned char *)key;
+
+  unsigned int h = seed ^ (len * m);
+
+  int align = (uint64_t)data & 3;
+
+  if(align && (len >= 4))
+  {
+    // Pre-load the temp registers
+
+    unsigned int t = 0, d = 0;
+
+    switch(align)
+    {
+      case 1: t |= data[2] << 16;
+      case 2: t |= data[1] << 8;
+      case 3: t |= data[0];
+    }
+
+    t <<= (8 * align);
+
+    data += 4-align;
+    len -= 4-align;
+
+    int sl = 8 * (4-align);
+    int sr = 8 * align;
+
+    // Mix
+
+    while(len >= 4)
+    {
+      d = *(unsigned int *)data;
+      t = (t >> sr) | (d << sl);
+      h += t;
+      h *= m;
+      h ^= h >> r;
+      t = d;
+
+      data += 4;
+      len -= 4;
+    }
+
+    // Handle leftover data in temp registers
+
+    int pack = len < align ? len : align;
+
+    d = 0;
+
+    switch(pack)
+    {
+    case 3: d |= data[2] << 16;
+    case 2: d |= data[1] << 8;
+    case 1: d |= data[0];
+    case 0: h += (t >> sr) | (d << sl);
+        h *= m;
+        h ^= h >> r;
+    }
+
+    data += pack;
+    len -= pack;
+  }
+  else
+  {
+    while(len >= 4)
+    {
+      h += *(unsigned int *)data;
+      h *= m;
+      h ^= h >> r;
+
+      data += 4;
+      len -= 4;
+    }
+  }
+
+  //----------
+  // Handle tail bytes
+
+  switch(len)
+  {
+  case 3: h += data[2] << 16;
+  case 2: h += data[1] << 8;
+  case 1: h += data[0];
+      h *= m;
+      h ^= h >> r;
+  };
+
+  h *= m;
+  h ^= h >> 10;
+  h *= m;
+  h ^= h >> 17;
+
+  return h;
+}
+
diff --git a/MurmurHash1.h b/MurmurHash1.h
index 40ddbc4..93b08c3 100644
--- a/MurmurHash1.h
+++ b/MurmurHash1.h
@@ -1,34 +1,34 @@
-//-----------------------------------------------------------------------------

-// MurmurHash1 was written by Austin Appleby, and is placed in the public

-// domain. The author hereby disclaims copyright to this source code.

-

-#ifndef _MURMURHASH1_H_

-#define _MURMURHASH1_H_

-

-//-----------------------------------------------------------------------------

-// Platform-specific functions and macros

-

-// Microsoft Visual Studio

-

-#if defined(_MSC_VER)

-

-typedef unsigned char uint8_t;

-typedef unsigned long uint32_t;

-typedef unsigned __int64 uint64_t;

-

-// Other compilers

-

-#else	// defined(_MSC_VER)

-

-#include <stdint.h>

-

-#endif // !defined(_MSC_VER)

-

-//-----------------------------------------------------------------------------

-

-uint32_t MurmurHash1        ( const void * key, int len, uint32_t seed );

-uint32_t MurmurHash1Aligned ( const void * key, int len, uint32_t seed );

-

-//-----------------------------------------------------------------------------

-

-#endif // _MURMURHASH1_H_

+//-----------------------------------------------------------------------------
+// MurmurHash1 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#ifndef _MURMURHASH1_H_
+#define _MURMURHASH1_H_
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+typedef unsigned char uint8_t;
+typedef unsigned long uint32_t;
+typedef unsigned __int64 uint64_t;
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#include <stdint.h>
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+uint32_t MurmurHash1        ( const void * key, int len, uint32_t seed );
+uint32_t MurmurHash1Aligned ( const void * key, int len, uint32_t seed );
+
+//-----------------------------------------------------------------------------
+
+#endif // _MURMURHASH1_H_
diff --git a/MurmurHash2.cpp b/MurmurHash2.cpp
index dbb2053..cd1e53a 100644
--- a/MurmurHash2.cpp
+++ b/MurmurHash2.cpp
@@ -1,523 +1,523 @@
-//-----------------------------------------------------------------------------

-// MurmurHash2 was written by Austin Appleby, and is placed in the public

-// domain. The author hereby disclaims copyright to this source code.

-

-// Note - This code makes a few assumptions about how your machine behaves -

-

-// 1. We can read a 4-byte value from any address without crashing

-// 2. sizeof(int) == 4

-

-// And it has a few limitations -

-

-// 1. It will not work incrementally.

-// 2. It will not produce the same results on little-endian and big-endian

-//    machines.

-

-#include "MurmurHash2.h"

-

-//-----------------------------------------------------------------------------

-// Platform-specific functions and macros

-

-// Microsoft Visual Studio

-

-#if defined(_MSC_VER)

-

-#define BIG_CONSTANT(x) (x)

-

-// Other compilers

-

-#else	// defined(_MSC_VER)

-

-#define BIG_CONSTANT(x) (x##LLU)

-

-#endif // !defined(_MSC_VER)

-

-//-----------------------------------------------------------------------------

-

-uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed )

-{

-  // 'm' and 'r' are mixing constants generated offline.

-  // They're not really 'magic', they just happen to work well.

-

-  const uint32_t m = 0x5bd1e995;

-  const int r = 24;

-

-  // Initialize the hash to a 'random' value

-

-  uint32_t h = seed ^ len;

-

-  // Mix 4 bytes at a time into the hash

-

-  const unsigned char * data = (const unsigned char *)key;

-

-  while(len >= 4)

-  {

-    uint32_t k = *(uint32_t*)data;

-

-    k *= m;

-    k ^= k >> r;

-    k *= m;

-

-    h *= m;

-    h ^= k;

-

-    data += 4;

-    len -= 4;

-  }

-

-  // Handle the last few bytes of the input array

-

-  switch(len)

-  {

-  case 3: h ^= data[2] << 16;

-  case 2: h ^= data[1] << 8;

-  case 1: h ^= data[0];

-      h *= m;

-  };

-

-  // Do a few final mixes of the hash to ensure the last few

-  // bytes are well-incorporated.

-

-  h ^= h >> 13;

-  h *= m;

-  h ^= h >> 15;

-

-  return h;

-} 

-

-//-----------------------------------------------------------------------------

-// MurmurHash2, 64-bit versions, by Austin Appleby

-

-// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment 

-// and endian-ness issues if used across multiple platforms.

-

-// 64-bit hash for 64-bit platforms

-

-uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed )

-{

-  const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);

-  const int r = 47;

-

-  uint64_t h = seed ^ (len * m);

-

-  const uint64_t * data = (const uint64_t *)key;

-  const uint64_t * end = data + (len/8);

-

-  while(data != end)

-  {

-    uint64_t k = *data++;

-

-    k *= m; 

-    k ^= k >> r; 

-    k *= m; 

-    

-    h ^= k;

-    h *= m; 

-  }

-

-  const unsigned char * data2 = (const unsigned char*)data;

-

-  switch(len & 7)

-  {

-  case 7: h ^= uint64_t(data2[6]) << 48;

-  case 6: h ^= uint64_t(data2[5]) << 40;

-  case 5: h ^= uint64_t(data2[4]) << 32;

-  case 4: h ^= uint64_t(data2[3]) << 24;

-  case 3: h ^= uint64_t(data2[2]) << 16;

-  case 2: h ^= uint64_t(data2[1]) << 8;

-  case 1: h ^= uint64_t(data2[0]);

-          h *= m;

-  };

- 

-  h ^= h >> r;

-  h *= m;

-  h ^= h >> r;

-

-  return h;

-} 

-

-

-// 64-bit hash for 32-bit platforms

-

-uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed )

-{

-  const uint32_t m = 0x5bd1e995;

-  const int r = 24;

-

-  uint32_t h1 = uint32_t(seed) ^ len;

-  uint32_t h2 = uint32_t(seed >> 32);

-

-  const uint32_t * data = (const uint32_t *)key;

-

-  while(len >= 8)

-  {

-    uint32_t k1 = *data++;

-    k1 *= m; k1 ^= k1 >> r; k1 *= m;

-    h1 *= m; h1 ^= k1;

-    len -= 4;

-

-    uint32_t k2 = *data++;

-    k2 *= m; k2 ^= k2 >> r; k2 *= m;

-    h2 *= m; h2 ^= k2;

-    len -= 4;

-  }

-

-  if(len >= 4)

-  {

-    uint32_t k1 = *data++;

-    k1 *= m; k1 ^= k1 >> r; k1 *= m;

-    h1 *= m; h1 ^= k1;

-    len -= 4;

-  }

-

-  switch(len)

-  {

-  case 3: h2 ^= ((unsigned char*)data)[2] << 16;

-  case 2: h2 ^= ((unsigned char*)data)[1] << 8;

-  case 1: h2 ^= ((unsigned char*)data)[0];

-      h2 *= m;

-  };

-

-  h1 ^= h2 >> 18; h1 *= m;

-  h2 ^= h1 >> 22; h2 *= m;

-  h1 ^= h2 >> 17; h1 *= m;

-  h2 ^= h1 >> 19; h2 *= m;

-

-  uint64_t h = h1;

-

-  h = (h << 32) | h2;

-

-  return h;

-} 

-

-//-----------------------------------------------------------------------------

-// MurmurHash2A, by Austin Appleby

-

-// This is a variant of MurmurHash2 modified to use the Merkle-Damgard 

-// construction. Bulk speed should be identical to Murmur2, small-key speed 

-// will be 10%-20% slower due to the added overhead at the end of the hash.

-

-// This variant fixes a minor issue where null keys were more likely to

-// collide with each other than expected, and also makes the function

-// more amenable to incremental implementations.

-

-#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }

-

-uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed )

-{

-  const uint32_t m = 0x5bd1e995;

-  const int r = 24;

-  uint32_t l = len;

-

-  const unsigned char * data = (const unsigned char *)key;

-

-  uint32_t h = seed;

-

-  while(len >= 4)

-  {

-    uint32_t k = *(uint32_t*)data;

-

-    mmix(h,k);

-

-    data += 4;

-    len -= 4;

-  }

-

-  uint32_t t = 0;

-

-  switch(len)

-  {

-  case 3: t ^= data[2] << 16;

-  case 2: t ^= data[1] << 8;

-  case 1: t ^= data[0];

-  };

-

-  mmix(h,t);

-  mmix(h,l);

-

-  h ^= h >> 13;

-  h *= m;

-  h ^= h >> 15;

-

-  return h;

-}

-

-//-----------------------------------------------------------------------------

-// CMurmurHash2A, by Austin Appleby

-

-// This is a sample implementation of MurmurHash2A designed to work 

-// incrementally.

-

-// Usage - 

-

-// CMurmurHash2A hasher

-// hasher.Begin(seed);

-// hasher.Add(data1,size1);

-// hasher.Add(data2,size2);

-// ...

-// hasher.Add(dataN,sizeN);

-// uint32_t hash = hasher.End()

-

-class CMurmurHash2A

-{

-public:

-

-  void Begin ( uint32_t seed = 0 )

-  {

-    m_hash  = seed;

-    m_tail  = 0;

-    m_count = 0;

-    m_size  = 0;

-  }

-

-  void Add ( const unsigned char * data, int len )

-  {

-    m_size += len;

-

-    MixTail(data,len);

-

-    while(len >= 4)

-    {

-      uint32_t k = *(uint32_t*)data;

-

-      mmix(m_hash,k);

-

-      data += 4;

-      len -= 4;

-    }

-

-    MixTail(data,len);

-  }

-

-  uint32_t End ( void )

-  {

-    mmix(m_hash,m_tail);

-    mmix(m_hash,m_size);

-

-    m_hash ^= m_hash >> 13;

-    m_hash *= m;

-    m_hash ^= m_hash >> 15;

-

-    return m_hash;

-  }

-

-private:

-

-  static const uint32_t m = 0x5bd1e995;

-  static const int r = 24;

-

-  void MixTail ( const unsigned char * & data, int & len )

-  {

-    while( len && ((len<4) || m_count) )

-    {

-      m_tail |= (*data++) << (m_count * 8);

-

-      m_count++;

-      len--;

-

-      if(m_count == 4)

-      {

-        mmix(m_hash,m_tail);

-        m_tail = 0;

-        m_count = 0;

-      }

-    }

-  }

-

-  uint32_t m_hash;

-  uint32_t m_tail;

-  uint32_t m_count;

-  uint32_t m_size;

-};

-

-//-----------------------------------------------------------------------------

-// MurmurHashNeutral2, by Austin Appleby

-

-// Same as MurmurHash2, but endian- and alignment-neutral.

-// Half the speed though, alas.

-

-uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed )

-{

-  const uint32_t m = 0x5bd1e995;

-  const int r = 24;

-

-  uint32_t h = seed ^ len;

-

-  const unsigned char * data = (const unsigned char *)key;

-

-  while(len >= 4)

-  {

-    uint32_t k;

-

-    k  = data[0];

-    k |= data[1] << 8;

-    k |= data[2] << 16;

-    k |= data[3] << 24;

-

-    k *= m; 

-    k ^= k >> r; 

-    k *= m;

-

-    h *= m;

-    h ^= k;

-

-    data += 4;

-    len -= 4;

-  }

-  

-  switch(len)

-  {

-  case 3: h ^= data[2] << 16;

-  case 2: h ^= data[1] << 8;

-  case 1: h ^= data[0];

-          h *= m;

-  };

-

-  h ^= h >> 13;

-  h *= m;

-  h ^= h >> 15;

-

-  return h;

-} 

-

-//-----------------------------------------------------------------------------

-// MurmurHashAligned2, by Austin Appleby

-

-// Same algorithm as MurmurHash2, but only does aligned reads - should be safer

-// on certain platforms. 

-

-// Performance will be lower than MurmurHash2

-

-#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }

-

-

-uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed )

-{

-  const uint32_t m = 0x5bd1e995;

-  const int r = 24;

-

-  const unsigned char * data = (const unsigned char *)key;

-

-  uint32_t h = seed ^ len;

-

-  int align = (uint64_t)data & 3;

-

-  if(align && (len >= 4))

-  {

-    // Pre-load the temp registers

-

-    uint32_t t = 0, d = 0;

-

-    switch(align)

-    {

-      case 1: t |= data[2] << 16;

-      case 2: t |= data[1] << 8;

-      case 3: t |= data[0];

-    }

-

-    t <<= (8 * align);

-

-    data += 4-align;

-    len -= 4-align;

-

-    int sl = 8 * (4-align);

-    int sr = 8 * align;

-

-    // Mix

-

-    while(len >= 4)

-    {

-      d = *(uint32_t *)data;

-      t = (t >> sr) | (d << sl);

-

-      uint32_t k = t;

-

-      MIX(h,k,m);

-

-      t = d;

-

-      data += 4;

-      len -= 4;

-    }

-

-    // Handle leftover data in temp registers

-

-    d = 0;

-

-    if(len >= align)

-    {

-      switch(align)

-      {

-      case 3: d |= data[2] << 16;

-      case 2: d |= data[1] << 8;

-      case 1: d |= data[0];

-      }

-

-      uint32_t k = (t >> sr) | (d << sl);

-      MIX(h,k,m);

-

-      data += align;

-      len -= align;

-

-      //----------

-      // Handle tail bytes

-

-      switch(len)

-      {

-      case 3: h ^= data[2] << 16;

-      case 2: h ^= data[1] << 8;

-      case 1: h ^= data[0];

-          h *= m;

-      };

-    }

-    else

-    {

-      switch(len)

-      {

-      case 3: d |= data[2] << 16;

-      case 2: d |= data[1] << 8;

-      case 1: d |= data[0];

-      case 0: h ^= (t >> sr) | (d << sl);

-          h *= m;

-      }

-    }

-

-    h ^= h >> 13;

-    h *= m;

-    h ^= h >> 15;

-

-    return h;

-  }

-  else

-  {

-    while(len >= 4)

-    {

-      uint32_t k = *(uint32_t *)data;

-

-      MIX(h,k,m);

-

-      data += 4;

-      len -= 4;

-    }

-

-    //----------

-    // Handle tail bytes

-

-    switch(len)

-    {

-    case 3: h ^= data[2] << 16;

-    case 2: h ^= data[1] << 8;

-    case 1: h ^= data[0];

-        h *= m;

-    };

-

-    h ^= h >> 13;

-    h *= m;

-    h ^= h >> 15;

-

-    return h;

-  }

-}

-

-//-----------------------------------------------------------------------------

-

+//-----------------------------------------------------------------------------
+// MurmurHash2 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - This code makes a few assumptions about how your machine behaves -
+
+// 1. We can read a 4-byte value from any address without crashing
+// 2. sizeof(int) == 4
+
+// And it has a few limitations -
+
+// 1. It will not work incrementally.
+// 2. It will not produce the same results on little-endian and big-endian
+//    machines.
+
+#include "MurmurHash2.h"
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define BIG_CONSTANT(x) (x)
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed )
+{
+  // 'm' and 'r' are mixing constants generated offline.
+  // They're not really 'magic', they just happen to work well.
+
+  const uint32_t m = 0x5bd1e995;
+  const int r = 24;
+
+  // Initialize the hash to a 'random' value
+
+  uint32_t h = seed ^ len;
+
+  // Mix 4 bytes at a time into the hash
+
+  const unsigned char * data = (const unsigned char *)key;
+
+  while(len >= 4)
+  {
+    uint32_t k = *(uint32_t*)data;
+
+    k *= m;
+    k ^= k >> r;
+    k *= m;
+
+    h *= m;
+    h ^= k;
+
+    data += 4;
+    len -= 4;
+  }
+
+  // Handle the last few bytes of the input array
+
+  switch(len)
+  {
+  case 3: h ^= data[2] << 16;
+  case 2: h ^= data[1] << 8;
+  case 1: h ^= data[0];
+      h *= m;
+  };
+
+  // Do a few final mixes of the hash to ensure the last few
+  // bytes are well-incorporated.
+
+  h ^= h >> 13;
+  h *= m;
+  h ^= h >> 15;
+
+  return h;
+} 
+
+//-----------------------------------------------------------------------------
+// MurmurHash2, 64-bit versions, by Austin Appleby
+
+// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment 
+// and endian-ness issues if used across multiple platforms.
+
+// 64-bit hash for 64-bit platforms
+
+uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed )
+{
+  const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
+  const int r = 47;
+
+  uint64_t h = seed ^ (len * m);
+
+  const uint64_t * data = (const uint64_t *)key;
+  const uint64_t * end = data + (len/8);
+
+  while(data != end)
+  {
+    uint64_t k = *data++;
+
+    k *= m; 
+    k ^= k >> r; 
+    k *= m; 
+    
+    h ^= k;
+    h *= m; 
+  }
+
+  const unsigned char * data2 = (const unsigned char*)data;
+
+  switch(len & 7)
+  {
+  case 7: h ^= uint64_t(data2[6]) << 48;
+  case 6: h ^= uint64_t(data2[5]) << 40;
+  case 5: h ^= uint64_t(data2[4]) << 32;
+  case 4: h ^= uint64_t(data2[3]) << 24;
+  case 3: h ^= uint64_t(data2[2]) << 16;
+  case 2: h ^= uint64_t(data2[1]) << 8;
+  case 1: h ^= uint64_t(data2[0]);
+          h *= m;
+  };
+ 
+  h ^= h >> r;
+  h *= m;
+  h ^= h >> r;
+
+  return h;
+} 
+
+
+// 64-bit hash for 32-bit platforms
+
+uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed )
+{
+  const uint32_t m = 0x5bd1e995;
+  const int r = 24;
+
+  uint32_t h1 = uint32_t(seed) ^ len;
+  uint32_t h2 = uint32_t(seed >> 32);
+
+  const uint32_t * data = (const uint32_t *)key;
+
+  while(len >= 8)
+  {
+    uint32_t k1 = *data++;
+    k1 *= m; k1 ^= k1 >> r; k1 *= m;
+    h1 *= m; h1 ^= k1;
+    len -= 4;
+
+    uint32_t k2 = *data++;
+    k2 *= m; k2 ^= k2 >> r; k2 *= m;
+    h2 *= m; h2 ^= k2;
+    len -= 4;
+  }
+
+  if(len >= 4)
+  {
+    uint32_t k1 = *data++;
+    k1 *= m; k1 ^= k1 >> r; k1 *= m;
+    h1 *= m; h1 ^= k1;
+    len -= 4;
+  }
+
+  switch(len)
+  {
+  case 3: h2 ^= ((unsigned char*)data)[2] << 16;
+  case 2: h2 ^= ((unsigned char*)data)[1] << 8;
+  case 1: h2 ^= ((unsigned char*)data)[0];
+      h2 *= m;
+  };
+
+  h1 ^= h2 >> 18; h1 *= m;
+  h2 ^= h1 >> 22; h2 *= m;
+  h1 ^= h2 >> 17; h1 *= m;
+  h2 ^= h1 >> 19; h2 *= m;
+
+  uint64_t h = h1;
+
+  h = (h << 32) | h2;
+
+  return h;
+} 
+
+//-----------------------------------------------------------------------------
+// MurmurHash2A, by Austin Appleby
+
+// This is a variant of MurmurHash2 modified to use the Merkle-Damgard 
+// construction. Bulk speed should be identical to Murmur2, small-key speed 
+// will be 10%-20% slower due to the added overhead at the end of the hash.
+
+// This variant fixes a minor issue where null keys were more likely to
+// collide with each other than expected, and also makes the function
+// more amenable to incremental implementations.
+
+#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
+
+uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed )
+{
+  const uint32_t m = 0x5bd1e995;
+  const int r = 24;
+  uint32_t l = len;
+
+  const unsigned char * data = (const unsigned char *)key;
+
+  uint32_t h = seed;
+
+  while(len >= 4)
+  {
+    uint32_t k = *(uint32_t*)data;
+
+    mmix(h,k);
+
+    data += 4;
+    len -= 4;
+  }
+
+  uint32_t t = 0;
+
+  switch(len)
+  {
+  case 3: t ^= data[2] << 16;
+  case 2: t ^= data[1] << 8;
+  case 1: t ^= data[0];
+  };
+
+  mmix(h,t);
+  mmix(h,l);
+
+  h ^= h >> 13;
+  h *= m;
+  h ^= h >> 15;
+
+  return h;
+}
+
+//-----------------------------------------------------------------------------
+// CMurmurHash2A, by Austin Appleby
+
+// This is a sample implementation of MurmurHash2A designed to work 
+// incrementally.
+
+// Usage - 
+
+// CMurmurHash2A hasher
+// hasher.Begin(seed);
+// hasher.Add(data1,size1);
+// hasher.Add(data2,size2);
+// ...
+// hasher.Add(dataN,sizeN);
+// uint32_t hash = hasher.End()
+
+class CMurmurHash2A
+{
+public:
+
+  void Begin ( uint32_t seed = 0 )
+  {
+    m_hash  = seed;
+    m_tail  = 0;
+    m_count = 0;
+    m_size  = 0;
+  }
+
+  void Add ( const unsigned char * data, int len )
+  {
+    m_size += len;
+
+    MixTail(data,len);
+
+    while(len >= 4)
+    {
+      uint32_t k = *(uint32_t*)data;
+
+      mmix(m_hash,k);
+
+      data += 4;
+      len -= 4;
+    }
+
+    MixTail(data,len);
+  }
+
+  uint32_t End ( void )
+  {
+    mmix(m_hash,m_tail);
+    mmix(m_hash,m_size);
+
+    m_hash ^= m_hash >> 13;
+    m_hash *= m;
+    m_hash ^= m_hash >> 15;
+
+    return m_hash;
+  }
+
+private:
+
+  static const uint32_t m = 0x5bd1e995;
+  static const int r = 24;
+
+  void MixTail ( const unsigned char * & data, int & len )
+  {
+    while( len && ((len<4) || m_count) )
+    {
+      m_tail |= (*data++) << (m_count * 8);
+
+      m_count++;
+      len--;
+
+      if(m_count == 4)
+      {
+        mmix(m_hash,m_tail);
+        m_tail = 0;
+        m_count = 0;
+      }
+    }
+  }
+
+  uint32_t m_hash;
+  uint32_t m_tail;
+  uint32_t m_count;
+  uint32_t m_size;
+};
+
+//-----------------------------------------------------------------------------
+// MurmurHashNeutral2, by Austin Appleby
+
+// Same as MurmurHash2, but endian- and alignment-neutral.
+// Half the speed though, alas.
+
+uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed )
+{
+  const uint32_t m = 0x5bd1e995;
+  const int r = 24;
+
+  uint32_t h = seed ^ len;
+
+  const unsigned char * data = (const unsigned char *)key;
+
+  while(len >= 4)
+  {
+    uint32_t k;
+
+    k  = data[0];
+    k |= data[1] << 8;
+    k |= data[2] << 16;
+    k |= data[3] << 24;
+
+    k *= m; 
+    k ^= k >> r; 
+    k *= m;
+
+    h *= m;
+    h ^= k;
+
+    data += 4;
+    len -= 4;
+  }
+  
+  switch(len)
+  {
+  case 3: h ^= data[2] << 16;
+  case 2: h ^= data[1] << 8;
+  case 1: h ^= data[0];
+          h *= m;
+  };
+
+  h ^= h >> 13;
+  h *= m;
+  h ^= h >> 15;
+
+  return h;
+} 
+
+//-----------------------------------------------------------------------------
+// MurmurHashAligned2, by Austin Appleby
+
+// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
+// on certain platforms. 
+
+// Performance will be lower than MurmurHash2
+
+#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
+
+
+uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed )
+{
+  const uint32_t m = 0x5bd1e995;
+  const int r = 24;
+
+  const unsigned char * data = (const unsigned char *)key;
+
+  uint32_t h = seed ^ len;
+
+  int align = (uint64_t)data & 3;
+
+  if(align && (len >= 4))
+  {
+    // Pre-load the temp registers
+
+    uint32_t t = 0, d = 0;
+
+    switch(align)
+    {
+      case 1: t |= data[2] << 16;
+      case 2: t |= data[1] << 8;
+      case 3: t |= data[0];
+    }
+
+    t <<= (8 * align);
+
+    data += 4-align;
+    len -= 4-align;
+
+    int sl = 8 * (4-align);
+    int sr = 8 * align;
+
+    // Mix
+
+    while(len >= 4)
+    {
+      d = *(uint32_t *)data;
+      t = (t >> sr) | (d << sl);
+
+      uint32_t k = t;
+
+      MIX(h,k,m);
+
+      t = d;
+
+      data += 4;
+      len -= 4;
+    }
+
+    // Handle leftover data in temp registers
+
+    d = 0;
+
+    if(len >= align)
+    {
+      switch(align)
+      {
+      case 3: d |= data[2] << 16;
+      case 2: d |= data[1] << 8;
+      case 1: d |= data[0];
+      }
+
+      uint32_t k = (t >> sr) | (d << sl);
+      MIX(h,k,m);
+
+      data += align;
+      len -= align;
+
+      //----------
+      // Handle tail bytes
+
+      switch(len)
+      {
+      case 3: h ^= data[2] << 16;
+      case 2: h ^= data[1] << 8;
+      case 1: h ^= data[0];
+          h *= m;
+      };
+    }
+    else
+    {
+      switch(len)
+      {
+      case 3: d |= data[2] << 16;
+      case 2: d |= data[1] << 8;
+      case 1: d |= data[0];
+      case 0: h ^= (t >> sr) | (d << sl);
+          h *= m;
+      }
+    }
+
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    return h;
+  }
+  else
+  {
+    while(len >= 4)
+    {
+      uint32_t k = *(uint32_t *)data;
+
+      MIX(h,k,m);
+
+      data += 4;
+      len -= 4;
+    }
+
+    //----------
+    // Handle tail bytes
+
+    switch(len)
+    {
+    case 3: h ^= data[2] << 16;
+    case 2: h ^= data[1] << 8;
+    case 1: h ^= data[0];
+        h *= m;
+    };
+
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    return h;
+  }
+}
+
+//-----------------------------------------------------------------------------
+
diff --git a/MurmurHash2.h b/MurmurHash2.h
index 38dbbeb..32993c2 100644
--- a/MurmurHash2.h
+++ b/MurmurHash2.h
@@ -1,39 +1,39 @@
-//-----------------------------------------------------------------------------

-// MurmurHash2 was written by Austin Appleby, and is placed in the public

-// domain. The author hereby disclaims copyright to this source code.

-

-#ifndef _MURMURHASH2_H_

-#define _MURMURHASH2_H_

-

-//-----------------------------------------------------------------------------

-// Platform-specific functions and macros

-

-// Microsoft Visual Studio

-

-#if defined(_MSC_VER)

-

-typedef unsigned char uint8_t;

-typedef unsigned long uint32_t;

-typedef unsigned __int64 uint64_t;

-

-// Other compilers

-

-#else	// defined(_MSC_VER)

-

-#include <stdint.h>

-

-#endif // !defined(_MSC_VER)

-

-//-----------------------------------------------------------------------------

-

-uint32_t MurmurHash2        ( const void * key, int len, uint32_t seed );

-uint64_t MurmurHash64A      ( const void * key, int len, uint64_t seed );

-uint64_t MurmurHash64B      ( const void * key, int len, uint64_t seed );

-uint32_t MurmurHash2A       ( const void * key, int len, uint32_t seed );

-uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed );

-uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed );

-

-//-----------------------------------------------------------------------------

-

-#endif // _MURMURHASH2_H_

-

+//-----------------------------------------------------------------------------
+// MurmurHash2 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#ifndef _MURMURHASH2_H_
+#define _MURMURHASH2_H_
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+typedef unsigned char uint8_t;
+typedef unsigned long uint32_t;
+typedef unsigned __int64 uint64_t;
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#include <stdint.h>
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+uint32_t MurmurHash2        ( const void * key, int len, uint32_t seed );
+uint64_t MurmurHash64A      ( const void * key, int len, uint64_t seed );
+uint64_t MurmurHash64B      ( const void * key, int len, uint64_t seed );
+uint32_t MurmurHash2A       ( const void * key, int len, uint32_t seed );
+uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed );
+uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed );
+
+//-----------------------------------------------------------------------------
+
+#endif // _MURMURHASH2_H_
+
diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp
index 0bf7386..09ffb26 100644
--- a/MurmurHash3.cpp
+++ b/MurmurHash3.cpp
@@ -1,335 +1,335 @@
-//-----------------------------------------------------------------------------

-// MurmurHash3 was written by Austin Appleby, and is placed in the public

-// domain. The author hereby disclaims copyright to this source code.

-

-// Note - The x86 and x64 versions do _not_ produce the same results, as the

-// algorithms are optimized for their respective platforms. You can still

-// compile and run any of them on any platform, but your performance with the

-// non-native version will be less than optimal.

-

-#include "MurmurHash3.h"

-

-//-----------------------------------------------------------------------------

-// Platform-specific functions and macros

-

-// Microsoft Visual Studio

-

-#if defined(_MSC_VER)

-

-#define FORCE_INLINE	__forceinline

-

-#include <stdlib.h>

-

-#define ROTL32(x,y)	_rotl(x,y)

-#define ROTL64(x,y)	_rotl64(x,y)

-

-#define BIG_CONSTANT(x) (x)

-

-// Other compilers

-

-#else	// defined(_MSC_VER)

-

-#define	FORCE_INLINE __attribute__((always_inline))

-

-inline uint32_t rotl32 ( uint32_t x, int8_t r )

-{

-  return (x << r) | (x >> (32 - r));

-}

-

-inline uint64_t rotl64 ( uint64_t x, int8_t r )

-{

-  return (x << r) | (x >> (64 - r));

-}

-

-#define	ROTL32(x,y)	rotl32(x,y)

-#define ROTL64(x,y)	rotl64(x,y)

-

-#define BIG_CONSTANT(x) (x##LLU)

-

-#endif // !defined(_MSC_VER)

-

-//-----------------------------------------------------------------------------

-// Block read - if your platform needs to do endian-swapping or can only

-// handle aligned reads, do the conversion here

-

-FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i )

-{

-  return p[i];

-}

-

-FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )

-{

-  return p[i];

-}

-

-//-----------------------------------------------------------------------------

-// Finalization mix - force all bits of a hash block to avalanche

-

-FORCE_INLINE uint32_t fmix ( uint32_t h )

-{

-  h ^= h >> 16;

-  h *= 0x85ebca6b;

-  h ^= h >> 13;

-  h *= 0xc2b2ae35;

-  h ^= h >> 16;

-

-  return h;

-}

-

-//----------

-

-FORCE_INLINE uint64_t fmix ( uint64_t k )

-{

-  k ^= k >> 33;

-  k *= BIG_CONSTANT(0xff51afd7ed558ccd);

-  k ^= k >> 33;

-  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);

-  k ^= k >> 33;

-

-  return k;

-}

-

-//-----------------------------------------------------------------------------

-

-void MurmurHash3_x86_32 ( const void * key, int len,

-                          uint32_t seed, void * out )

-{

-  const uint8_t * data = (const uint8_t*)key;

-  const int nblocks = len / 4;

-

-  uint32_t h1 = seed;

-

-  uint32_t c1 = 0xcc9e2d51;

-  uint32_t c2 = 0x1b873593;

-

-  //----------

-  // body

-

-  const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);

-

-  for(int i = -nblocks; i; i++)

-  {

-    uint32_t k1 = getblock(blocks,i);

-

-    k1 *= c1;

-    k1 = ROTL32(k1,15);

-    k1 *= c2;

-    

-    h1 ^= k1;

-    h1 = ROTL32(h1,13); 

-    h1 = h1*5+0xe6546b64;

-  }

-

-  //----------

-  // tail

-

-  const uint8_t * tail = (const uint8_t*)(data + nblocks*4);

-

-  uint32_t k1 = 0;

-

-  switch(len & 3)

-  {

-  case 3: k1 ^= tail[2] << 16;

-  case 2: k1 ^= tail[1] << 8;

-  case 1: k1 ^= tail[0];

-          k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;

-  };

-

-  //----------

-  // finalization

-

-  h1 ^= len;

-

-  h1 = fmix(h1);

-

-  *(uint32_t*)out = h1;

-} 

-

-//-----------------------------------------------------------------------------

-

-void MurmurHash3_x86_128 ( const void * key, const int len,

-                           uint32_t seed, void * out )

-{

-  const uint8_t * data = (const uint8_t*)key;

-  const int nblocks = len / 16;

-

-  uint32_t h1 = seed;

-  uint32_t h2 = seed;

-  uint32_t h3 = seed;

-  uint32_t h4 = seed;

-

-  uint32_t c1 = 0x239b961b; 

-  uint32_t c2 = 0xab0e9789;

-  uint32_t c3 = 0x38b34ae5; 

-  uint32_t c4 = 0xa1e38b93;

-

-  //----------

-  // body

-

-  const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);

-

-  for(int i = -nblocks; i; i++)

-  {

-    uint32_t k1 = getblock(blocks,i*4+0);

-    uint32_t k2 = getblock(blocks,i*4+1);

-    uint32_t k3 = getblock(blocks,i*4+2);

-    uint32_t k4 = getblock(blocks,i*4+3);

-

-    k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;

-

-    h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;

-

-    k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;

-

-    h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;

-

-    k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;

-

-    h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;

-

-    k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;

-

-    h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;

-  }

-

-  //----------

-  // tail

-

-  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);

-

-  uint32_t k1 = 0;

-  uint32_t k2 = 0;

-  uint32_t k3 = 0;

-  uint32_t k4 = 0;

-

-  switch(len & 15)

-  {

-  case 15: k4 ^= tail[14] << 16;

-  case 14: k4 ^= tail[13] << 8;

-  case 13: k4 ^= tail[12] << 0;

-           k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;

-

-  case 12: k3 ^= tail[11] << 24;

-  case 11: k3 ^= tail[10] << 16;

-  case 10: k3 ^= tail[ 9] << 8;

-  case  9: k3 ^= tail[ 8] << 0;

-           k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;

-

-  case  8: k2 ^= tail[ 7] << 24;

-  case  7: k2 ^= tail[ 6] << 16;

-  case  6: k2 ^= tail[ 5] << 8;

-  case  5: k2 ^= tail[ 4] << 0;

-           k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;

-

-  case  4: k1 ^= tail[ 3] << 24;

-  case  3: k1 ^= tail[ 2] << 16;

-  case  2: k1 ^= tail[ 1] << 8;

-  case  1: k1 ^= tail[ 0] << 0;

-           k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;

-  };

-

-  //----------

-  // finalization

-

-  h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;

-

-  h1 += h2; h1 += h3; h1 += h4;

-  h2 += h1; h3 += h1; h4 += h1;

-

-  h1 = fmix(h1);

-  h2 = fmix(h2);

-  h3 = fmix(h3);

-  h4 = fmix(h4);

-

-  h1 += h2; h1 += h3; h1 += h4;

-  h2 += h1; h3 += h1; h4 += h1;

-

-  ((uint32_t*)out)[0] = h1;

-  ((uint32_t*)out)[1] = h2;

-  ((uint32_t*)out)[2] = h3;

-  ((uint32_t*)out)[3] = h4;

-}

-

-//-----------------------------------------------------------------------------

-

-void MurmurHash3_x64_128 ( const void * key, const int len,

-                           const uint32_t seed, void * out )

-{

-  const uint8_t * data = (const uint8_t*)key;

-  const int nblocks = len / 16;

-

-  uint64_t h1 = seed;

-  uint64_t h2 = seed;

-

-  uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);

-  uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);

-

-  //----------

-  // body

-

-  const uint64_t * blocks = (const uint64_t *)(data);

-

-  for(int i = 0; i < nblocks; i++)

-  {

-    uint64_t k1 = getblock(blocks,i*2+0);

-    uint64_t k2 = getblock(blocks,i*2+1);

-

-    k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;

-

-    h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;

-

-    k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;

-

-    h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;

-  }

-

-  //----------

-  // tail

-

-  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);

-

-  uint64_t k1 = 0;

-  uint64_t k2 = 0;

-

-  switch(len & 15)

-  {

-  case 15: k2 ^= uint64_t(tail[14]) << 48;

-  case 14: k2 ^= uint64_t(tail[13]) << 40;

-  case 13: k2 ^= uint64_t(tail[12]) << 32;

-  case 12: k2 ^= uint64_t(tail[11]) << 24;

-  case 11: k2 ^= uint64_t(tail[10]) << 16;

-  case 10: k2 ^= uint64_t(tail[ 9]) << 8;

-  case  9: k2 ^= uint64_t(tail[ 8]) << 0;

-           k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;

-

-  case  8: k1 ^= uint64_t(tail[ 7]) << 56;

-  case  7: k1 ^= uint64_t(tail[ 6]) << 48;

-  case  6: k1 ^= uint64_t(tail[ 5]) << 40;

-  case  5: k1 ^= uint64_t(tail[ 4]) << 32;

-  case  4: k1 ^= uint64_t(tail[ 3]) << 24;

-  case  3: k1 ^= uint64_t(tail[ 2]) << 16;

-  case  2: k1 ^= uint64_t(tail[ 1]) << 8;

-  case  1: k1 ^= uint64_t(tail[ 0]) << 0;

-           k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;

-  };

-

-  //----------

-  // finalization

-

-  h1 ^= len; h2 ^= len;

-

-  h1 += h2;

-  h2 += h1;

-

-  h1 = fmix(h1);

-  h2 = fmix(h2);

-

-  h1 += h2;

-  h2 += h1;

-

-  ((uint64_t*)out)[0] = h1;

-  ((uint64_t*)out)[1] = h2;

-}

-

-//-----------------------------------------------------------------------------

-

+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - The x86 and x64 versions do _not_ produce the same results, as the
+// algorithms are optimized for their respective platforms. You can still
+// compile and run any of them on any platform, but your performance with the
+// non-native version will be less than optimal.
+
+#include "MurmurHash3.h"
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define FORCE_INLINE	__forceinline
+
+#include <stdlib.h>
+
+#define ROTL32(x,y)	_rotl(x,y)
+#define ROTL64(x,y)	_rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x)
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#define	FORCE_INLINE __attribute__((always_inline))
+
+inline uint32_t rotl32 ( uint32_t x, int8_t r )
+{
+  return (x << r) | (x >> (32 - r));
+}
+
+inline uint64_t rotl64 ( uint64_t x, int8_t r )
+{
+  return (x << r) | (x >> (64 - r));
+}
+
+#define	ROTL32(x,y)	rotl32(x,y)
+#define ROTL64(x,y)	rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+// Block read - if your platform needs to do endian-swapping or can only
+// handle aligned reads, do the conversion here
+
+FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i )
+{
+  return p[i];
+}
+
+FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
+{
+  return p[i];
+}
+
+//-----------------------------------------------------------------------------
+// Finalization mix - force all bits of a hash block to avalanche
+
+FORCE_INLINE uint32_t fmix ( uint32_t h )
+{
+  h ^= h >> 16;
+  h *= 0x85ebca6b;
+  h ^= h >> 13;
+  h *= 0xc2b2ae35;
+  h ^= h >> 16;
+
+  return h;
+}
+
+//----------
+
+FORCE_INLINE uint64_t fmix ( uint64_t k )
+{
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xff51afd7ed558ccd);
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
+  k ^= k >> 33;
+
+  return k;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_32 ( const void * key, int len,
+                          uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 4;
+
+  uint32_t h1 = seed;
+
+  uint32_t c1 = 0xcc9e2d51;
+  uint32_t c2 = 0x1b873593;
+
+  //----------
+  // body
+
+  const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
+
+  for(int i = -nblocks; i; i++)
+  {
+    uint32_t k1 = getblock(blocks,i);
+
+    k1 *= c1;
+    k1 = ROTL32(k1,15);
+    k1 *= c2;
+    
+    h1 ^= k1;
+    h1 = ROTL32(h1,13); 
+    h1 = h1*5+0xe6546b64;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
+
+  uint32_t k1 = 0;
+
+  switch(len & 3)
+  {
+  case 3: k1 ^= tail[2] << 16;
+  case 2: k1 ^= tail[1] << 8;
+  case 1: k1 ^= tail[0];
+          k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len;
+
+  h1 = fmix(h1);
+
+  *(uint32_t*)out = h1;
+} 
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_128 ( const void * key, const int len,
+                           uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 16;
+
+  uint32_t h1 = seed;
+  uint32_t h2 = seed;
+  uint32_t h3 = seed;
+  uint32_t h4 = seed;
+
+  uint32_t c1 = 0x239b961b; 
+  uint32_t c2 = 0xab0e9789;
+  uint32_t c3 = 0x38b34ae5; 
+  uint32_t c4 = 0xa1e38b93;
+
+  //----------
+  // body
+
+  const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
+
+  for(int i = -nblocks; i; i++)
+  {
+    uint32_t k1 = getblock(blocks,i*4+0);
+    uint32_t k2 = getblock(blocks,i*4+1);
+    uint32_t k3 = getblock(blocks,i*4+2);
+    uint32_t k4 = getblock(blocks,i*4+3);
+
+    k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+
+    h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
+
+    k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
+
+    h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
+
+    k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
+
+    h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
+
+    k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
+
+    h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+  uint32_t k1 = 0;
+  uint32_t k2 = 0;
+  uint32_t k3 = 0;
+  uint32_t k4 = 0;
+
+  switch(len & 15)
+  {
+  case 15: k4 ^= tail[14] << 16;
+  case 14: k4 ^= tail[13] << 8;
+  case 13: k4 ^= tail[12] << 0;
+           k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
+
+  case 12: k3 ^= tail[11] << 24;
+  case 11: k3 ^= tail[10] << 16;
+  case 10: k3 ^= tail[ 9] << 8;
+  case  9: k3 ^= tail[ 8] << 0;
+           k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
+
+  case  8: k2 ^= tail[ 7] << 24;
+  case  7: k2 ^= tail[ 6] << 16;
+  case  6: k2 ^= tail[ 5] << 8;
+  case  5: k2 ^= tail[ 4] << 0;
+           k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
+
+  case  4: k1 ^= tail[ 3] << 24;
+  case  3: k1 ^= tail[ 2] << 16;
+  case  2: k1 ^= tail[ 1] << 8;
+  case  1: k1 ^= tail[ 0] << 0;
+           k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
+
+  h1 += h2; h1 += h3; h1 += h4;
+  h2 += h1; h3 += h1; h4 += h1;
+
+  h1 = fmix(h1);
+  h2 = fmix(h2);
+  h3 = fmix(h3);
+  h4 = fmix(h4);
+
+  h1 += h2; h1 += h3; h1 += h4;
+  h2 += h1; h3 += h1; h4 += h1;
+
+  ((uint32_t*)out)[0] = h1;
+  ((uint32_t*)out)[1] = h2;
+  ((uint32_t*)out)[2] = h3;
+  ((uint32_t*)out)[3] = h4;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x64_128 ( const void * key, const int len,
+                           const uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 16;
+
+  uint64_t h1 = seed;
+  uint64_t h2 = seed;
+
+  uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
+  uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
+
+  //----------
+  // body
+
+  const uint64_t * blocks = (const uint64_t *)(data);
+
+  for(int i = 0; i < nblocks; i++)
+  {
+    uint64_t k1 = getblock(blocks,i*2+0);
+    uint64_t k2 = getblock(blocks,i*2+1);
+
+    k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+
+    h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
+
+    k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+    h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+  uint64_t k1 = 0;
+  uint64_t k2 = 0;
+
+  switch(len & 15)
+  {
+  case 15: k2 ^= uint64_t(tail[14]) << 48;
+  case 14: k2 ^= uint64_t(tail[13]) << 40;
+  case 13: k2 ^= uint64_t(tail[12]) << 32;
+  case 12: k2 ^= uint64_t(tail[11]) << 24;
+  case 11: k2 ^= uint64_t(tail[10]) << 16;
+  case 10: k2 ^= uint64_t(tail[ 9]) << 8;
+  case  9: k2 ^= uint64_t(tail[ 8]) << 0;
+           k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+  case  8: k1 ^= uint64_t(tail[ 7]) << 56;
+  case  7: k1 ^= uint64_t(tail[ 6]) << 48;
+  case  6: k1 ^= uint64_t(tail[ 5]) << 40;
+  case  5: k1 ^= uint64_t(tail[ 4]) << 32;
+  case  4: k1 ^= uint64_t(tail[ 3]) << 24;
+  case  3: k1 ^= uint64_t(tail[ 2]) << 16;
+  case  2: k1 ^= uint64_t(tail[ 1]) << 8;
+  case  1: k1 ^= uint64_t(tail[ 0]) << 0;
+           k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len; h2 ^= len;
+
+  h1 += h2;
+  h2 += h1;
+
+  h1 = fmix(h1);
+  h2 = fmix(h2);
+
+  h1 += h2;
+  h2 += h1;
+
+  ((uint64_t*)out)[0] = h1;
+  ((uint64_t*)out)[1] = h2;
+}
+
+//-----------------------------------------------------------------------------
+
diff --git a/MurmurHash3.h b/MurmurHash3.h
index 58e9820..54e9d3f 100644
--- a/MurmurHash3.h
+++ b/MurmurHash3.h
@@ -1,37 +1,37 @@
-//-----------------------------------------------------------------------------

-// MurmurHash3 was written by Austin Appleby, and is placed in the public

-// domain. The author hereby disclaims copyright to this source code.

-

-#ifndef _MURMURHASH3_H_

-#define _MURMURHASH3_H_

-

-//-----------------------------------------------------------------------------

-// Platform-specific functions and macros

-

-// Microsoft Visual Studio

-

-#if defined(_MSC_VER)

-

-typedef unsigned char uint8_t;

-typedef unsigned long uint32_t;

-typedef unsigned __int64 uint64_t;

-

-// Other compilers

-

-#else	// defined(_MSC_VER)

-

-#include <stdint.h>

-

-#endif // !defined(_MSC_VER)

-

-//-----------------------------------------------------------------------------

-

-void MurmurHash3_x86_32  ( const void * key, int len, uint32_t seed, void * out );

-

-void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );

-

-void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );

-

-//-----------------------------------------------------------------------------

-

-#endif // _MURMURHASH3_H_

+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#ifndef _MURMURHASH3_H_
+#define _MURMURHASH3_H_
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+typedef unsigned char uint8_t;
+typedef unsigned long uint32_t;
+typedef unsigned __int64 uint64_t;
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#include <stdint.h>
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_32  ( const void * key, int len, uint32_t seed, void * out );
+
+void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
+
+void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
+
+//-----------------------------------------------------------------------------
+
+#endif // _MURMURHASH3_H_
diff --git a/Platform.cpp b/Platform.cpp
index d90dab8..d7f5fb8 100644
--- a/Platform.cpp
+++ b/Platform.cpp
@@ -1,42 +1,42 @@
-#include "Platform.h"

-

-#include <stdio.h>

-

-void testRDTSC ( void )

-{

-  int64_t temp = rdtsc();

-

-  printf("%d",(int)temp);

-}

-

-#if defined(_MSC_VER)

-

-#include <windows.h>

-

-void SetAffinity ( int cpu )

-{

-  SetProcessAffinityMask(GetCurrentProcess(),cpu);

-  SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST);

-}

-

-#else

-

-#include <sched.h>

-

-void SetAffinity ( int /*cpu*/ )

-{

-#ifndef __CYGWIN__

-  cpu_set_t mask;

-    

-  CPU_ZERO(&mask);

-    

-  CPU_SET(2,&mask);

-    

-  if( sched_setaffinity(0,sizeof(mask),&mask) == -1)

-  {

-    printf("WARNING: Could not set CPU affinity\n");

-  }

-#endif

-}

-

-#endif

+#include "Platform.h"
+
+#include <stdio.h>
+
+void testRDTSC ( void )
+{
+  int64_t temp = rdtsc();
+
+  printf("%d",(int)temp);
+}
+
+#if defined(_MSC_VER)
+
+#include <windows.h>
+
+void SetAffinity ( int cpu )
+{
+  SetProcessAffinityMask(GetCurrentProcess(),cpu);
+  SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST);
+}
+
+#else
+
+#include <sched.h>
+
+void SetAffinity ( int /*cpu*/ )
+{
+#ifndef __CYGWIN__
+  cpu_set_t mask;
+    
+  CPU_ZERO(&mask);
+    
+  CPU_SET(2,&mask);
+    
+  if( sched_setaffinity(0,sizeof(mask),&mask) == -1)
+  {
+    printf("WARNING: Could not set CPU affinity\n");
+  }
+#endif
+}
+
+#endif
diff --git a/Platform.h b/Platform.h
index 8bb0d58..fcb68e8 100644
--- a/Platform.h
+++ b/Platform.h
@@ -1,85 +1,94 @@
-//-----------------------------------------------------------------------------

-// Platform-specific functions and macros

-

-#pragma once

-

-void SetAffinity ( int cpu );

-

-//-----------------------------------------------------------------------------

-// Microsoft Visual Studio

-

-#if defined(_MSC_VER)

-

-#define FORCE_INLINE	__forceinline

-#define	NEVER_INLINE  __declspec(noinline)

-

-#include <stdlib.h>

-#include <math.h>   // Has to be included before intrin.h or VC complains about 'ceil'

-#include <intrin.h> // for __rdtsc

-#include "pstdint.h"

-

-#define ROTL32(x,y)	_rotl(x,y)

-#define ROTL64(x,y)	_rotl64(x,y)

-#define ROTR32(x,y)	_rotr(x,y)

-#define ROTR64(x,y)	_rotr64(x,y)

-

-#pragma warning(disable : 4127) // "conditional expression is constant" in the if()s for avalanchetest

-#pragma warning(disable : 4100)

-#pragma warning(disable : 4702)

-

-#define BIG_CONSTANT(x) (x)

-

-// RDTSC == Read Time Stamp Counter

-

-#define rdtsc() __rdtsc()

-

-//-----------------------------------------------------------------------------

-// Other compilers

-

-#else	//	defined(_MSC_VER)

-

-#include <stdint.h>

-

-#define	FORCE_INLINE __attribute__((always_inline))

-#define	NEVER_INLINE __attribute__((noinline))

-

-inline uint32_t rotl32 ( uint32_t x, int8_t r )

-{

-  return (x << r) | (x >> (32 - r));

-}

-

-inline uint64_t rotl64 ( uint64_t x, int8_t r )

-{

-  return (x << r) | (x >> (64 - r));

-}

-

-inline uint32_t rotr32 ( uint32_t x, int8_t r )

-{

-  return (x >> r) | (x << (32 - r));

-}

-

-inline uint64_t rotr64 ( uint64_t x, int8_t r )

-{

-  return (x >> r) | (x << (64 - r));

-}

-

-#define	ROTL32(x,y)	rotl32(x,y)

-#define ROTL64(x,y)	rotl64(x,y)

-#define	ROTR32(x,y)	rotr32(x,y)

-#define ROTR64(x,y)	rotr64(x,y)

-

-#define BIG_CONSTANT(x) (x##LLU)

-

-__inline__ unsigned long long int rdtsc()

-{

-    unsigned long long int x;

-    __asm__ volatile ("rdtsc" : "=A" (x));

-    return x;

-}

-

-#include <strings.h>

-#define _stricmp strcasecmp

-

-#endif	//	!defined(_MSC_VER)

-

-//-----------------------------------------------------------------------------

+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+#pragma once
+
+void SetAffinity ( int cpu );
+
+//-----------------------------------------------------------------------------
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define FORCE_INLINE	__forceinline
+#define	NEVER_INLINE  __declspec(noinline)
+
+#include <stdlib.h>
+#include <math.h>   // Has to be included before intrin.h or VC complains about 'ceil'
+#include <intrin.h> // for __rdtsc
+#include "pstdint.h"
+
+#define ROTL32(x,y)	_rotl(x,y)
+#define ROTL64(x,y)	_rotl64(x,y)
+#define ROTR32(x,y)	_rotr(x,y)
+#define ROTR64(x,y)	_rotr64(x,y)
+
+#pragma warning(disable : 4127) // "conditional expression is constant" in the if()s for avalanchetest
+#pragma warning(disable : 4100)
+#pragma warning(disable : 4702)
+
+#define BIG_CONSTANT(x) (x)
+
+// RDTSC == Read Time Stamp Counter
+
+#define rdtsc() __rdtsc()
+
+//-----------------------------------------------------------------------------
+// Other compilers
+
+#else	//	defined(_MSC_VER)
+
+#include <stdint.h>
+
+#define	FORCE_INLINE __attribute__((always_inline))
+#define	NEVER_INLINE __attribute__((noinline))
+
+inline uint32_t rotl32 ( uint32_t x, int8_t r )
+{
+  return (x << r) | (x >> (32 - r));
+}
+
+inline uint64_t rotl64 ( uint64_t x, int8_t r )
+{
+  return (x << r) | (x >> (64 - r));
+}
+
+inline uint32_t rotr32 ( uint32_t x, int8_t r )
+{
+  return (x >> r) | (x << (32 - r));
+}
+
+inline uint64_t rotr64 ( uint64_t x, int8_t r )
+{
+  return (x >> r) | (x << (64 - r));
+}
+
+#define	ROTL32(x,y)	rotl32(x,y)
+#define ROTL64(x,y)	rotl64(x,y)
+#define	ROTR32(x,y)	rotr32(x,y)
+#define ROTR64(x,y)	rotr64(x,y)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+__inline__ unsigned long long int rdtsc()
+{
+#ifdef __x86_64__
+    unsigned int a, d;
+    __asm__ volatile ("rdtsc" : "=a" (a), "=d" (d));
+    return (unsigned long)a | ((unsigned long)d << 32);
+#else
+#ifndef __i386__
+#error Must be x86 either 32-bit or 64-bit.
+#endif
+    unsigned long long int x;
+    __asm__ volatile ("rdtsc" : "=A" (x));
+    return x;
+#endif
+}
+
+#include <strings.h>
+#define _stricmp strcasecmp
+
+#endif	//	!defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
diff --git a/Random.cpp b/Random.cpp
index e98f5ef..87de595 100644
--- a/Random.cpp
+++ b/Random.cpp
@@ -1,8 +1,8 @@
-#include "Random.h"

-

-Rand g_rand1(1);

-Rand g_rand2(2);

-Rand g_rand3(3);

-Rand g_rand4(4);

-

-//-----------------------------------------------------------------------------

+#include "Random.h"
+
+Rand g_rand1(1);
+Rand g_rand2(2);
+Rand g_rand3(3);
+Rand g_rand4(4);
+
+//-----------------------------------------------------------------------------
diff --git a/Random.h b/Random.h
index e5a78fb..7e0df3f 100644
--- a/Random.h
+++ b/Random.h
@@ -1,117 +1,117 @@
-#pragma once

-

-#include "Types.h"

-

-//-----------------------------------------------------------------------------

-// Xorshift RNG based on code by George Marsaglia

-// http://en.wikipedia.org/wiki/Xorshift

-

-struct Rand

-{

-  uint32_t x;

-  uint32_t y;

-  uint32_t z;

-  uint32_t w;

-

-  Rand()

-  {

-    reseed(uint32_t(0));

-  }

-

-  Rand( uint32_t seed )

-  {

-    reseed(seed);

-  }

-

-  void reseed ( uint32_t seed )

-  {

-    x = 0x498b3bc5 ^ seed;

-    y = 0;

-    z = 0;

-    w = 0;

-

-    for(int i = 0; i < 10; i++) mix();

-  }

-

-  void reseed ( uint64_t seed )

-  {

-    x = 0x498b3bc5 ^ (uint32_t)(seed >>  0);

-    y = 0x5a05089a ^ (uint32_t)(seed >> 32);

-    z = 0;

-    w = 0;

-

-    for(int i = 0; i < 10; i++) mix();

-  }

-

-  //-----------------------------------------------------------------------------

-

-  void mix ( void )

-  {

-    uint32_t t = x ^ (x << 11);

-    x = y; y = z; z = w;

-    w = w ^ (w >> 19) ^ t ^ (t >> 8); 

-  }

-

-  uint32_t rand_u32 ( void )

-  {

-    mix();

-

-    return x;

-  }

-

-  uint64_t rand_u64 ( void ) 

-  {

-    mix();

-

-    uint64_t a = x;

-    uint64_t b = y;

-

-    return (a << 32) | b;

-  }

-

-  void rand_p ( void * blob, int bytes )

-  {

-    uint32_t * blocks = reinterpret_cast<uint32_t*>(blob);

-

-    while(bytes >= 4)

-    {

-      blocks[0] = rand_u32();

-      blocks++;

-      bytes -= 4;

-    }

-

-    uint8_t * tail = reinterpret_cast<uint8_t*>(blocks);

-

-    for(int i = 0; i < bytes; i++)

-    {

-      tail[i] = (uint8_t)rand_u32();

-    }

-  }

-};

-

-//-----------------------------------------------------------------------------

-

-extern Rand g_rand1;

-

-inline uint32_t rand_u32 ( void ) { return g_rand1.rand_u32(); }

-inline uint64_t rand_u64 ( void ) { return g_rand1.rand_u64(); }

-

-inline void rand_p ( void * blob, int bytes )

-{

-  uint32_t * blocks = (uint32_t*)blob;

-

-  while(bytes >= 4)

-  {

-    *blocks++ = rand_u32();

-    bytes -= 4;

-  }

-

-  uint8_t * tail = (uint8_t*)blocks;

-

-  for(int i = 0; i < bytes; i++)

-  {

-    tail[i] = (uint8_t)rand_u32();

-  }

-}

-

-//-----------------------------------------------------------------------------

+#pragma once
+
+#include "Types.h"
+
+//-----------------------------------------------------------------------------
+// Xorshift RNG based on code by George Marsaglia
+// http://en.wikipedia.org/wiki/Xorshift
+
+struct Rand
+{
+  uint32_t x;
+  uint32_t y;
+  uint32_t z;
+  uint32_t w;
+
+  Rand()
+  {
+    reseed(uint32_t(0));
+  }
+
+  Rand( uint32_t seed )
+  {
+    reseed(seed);
+  }
+
+  void reseed ( uint32_t seed )
+  {
+    x = 0x498b3bc5 ^ seed;
+    y = 0;
+    z = 0;
+    w = 0;
+
+    for(int i = 0; i < 10; i++) mix();
+  }
+
+  void reseed ( uint64_t seed )
+  {
+    x = 0x498b3bc5 ^ (uint32_t)(seed >>  0);
+    y = 0x5a05089a ^ (uint32_t)(seed >> 32);
+    z = 0;
+    w = 0;
+
+    for(int i = 0; i < 10; i++) mix();
+  }
+
+  //-----------------------------------------------------------------------------
+
+  void mix ( void )
+  {
+    uint32_t t = x ^ (x << 11);
+    x = y; y = z; z = w;
+    w = w ^ (w >> 19) ^ t ^ (t >> 8); 
+  }
+
+  uint32_t rand_u32 ( void )
+  {
+    mix();
+
+    return x;
+  }
+
+  uint64_t rand_u64 ( void ) 
+  {
+    mix();
+
+    uint64_t a = x;
+    uint64_t b = y;
+
+    return (a << 32) | b;
+  }
+
+  void rand_p ( void * blob, int bytes )
+  {
+    uint32_t * blocks = reinterpret_cast<uint32_t*>(blob);
+
+    while(bytes >= 4)
+    {
+      blocks[0] = rand_u32();
+      blocks++;
+      bytes -= 4;
+    }
+
+    uint8_t * tail = reinterpret_cast<uint8_t*>(blocks);
+
+    for(int i = 0; i < bytes; i++)
+    {
+      tail[i] = (uint8_t)rand_u32();
+    }
+  }
+};
+
+//-----------------------------------------------------------------------------
+
+extern Rand g_rand1;
+
+inline uint32_t rand_u32 ( void ) { return g_rand1.rand_u32(); }
+inline uint64_t rand_u64 ( void ) { return g_rand1.rand_u64(); }
+
+inline void rand_p ( void * blob, int bytes )
+{
+  uint32_t * blocks = (uint32_t*)blob;
+
+  while(bytes >= 4)
+  {
+    *blocks++ = rand_u32();
+    bytes -= 4;
+  }
+
+  uint8_t * tail = (uint8_t*)blocks;
+
+  for(int i = 0; i < bytes; i++)
+  {
+    tail[i] = (uint8_t)rand_u32();
+  }
+}
+
+//-----------------------------------------------------------------------------
diff --git a/SpeedTest.cpp b/SpeedTest.cpp
index 2265389..d91f6e4 100644
--- a/SpeedTest.cpp
+++ b/SpeedTest.cpp
@@ -1,242 +1,242 @@
-#include "SpeedTest.h"

-

-#include "Random.h"

-

-#include <stdio.h>   // for printf

-#include <memory.h>  // for memset

-#include <math.h>    // for sqrt

-#include <algorithm> // for sort

-

-//-----------------------------------------------------------------------------

-// We view our timing values as a series of random variables V that has been

-// contaminated with occasional outliers due to cache misses, thread

-// preemption, etcetera. To filter out the outliers, we search for the largest

-// subset of V such that all its values are within three standard deviations

-// of the mean.

-

-double CalcMean ( std::vector<double> & v )

-{

-  double mean = 0;

-  

-  for(int i = 0; i < (int)v.size(); i++)

-  {

-    mean += v[i];

-  }

-  

-  mean /= double(v.size());

-  

-  return mean;

-}

-

-double CalcMean ( std::vector<double> & v, int a, int b )

-{

-  double mean = 0;

-  

-  for(int i = a; i <= b; i++)

-  {

-    mean += v[i];

-  }

-  

-  mean /= (b-a+1);

-  

-  return mean;

-}

-

-double CalcStdv ( std::vector<double> & v, int a, int b )

-{

-  double mean = CalcMean(v,a,b);

-

-  double stdv = 0;

-  

-  for(int i = a; i <= b; i++)

-  {

-    double x = v[i] - mean;

-    

-    stdv += x*x;

-  }

-  

-  stdv = sqrt(stdv / (b-a+1));

-  

-  return stdv;

-}

-

-// Return true if the largest value in v[0,len) is more than three

-// standard deviations from the mean

-

-bool ContainsOutlier ( std::vector<double> & v, size_t len )

-{

-  double mean = 0;

-  

-  for(size_t i = 0; i < len; i++)

-  {

-    mean += v[i];

-  }

-  

-  mean /= double(len);

-  

-  double stdv = 0;

-  

-  for(size_t i = 0; i < len; i++)

-  {

-    double x = v[i] - mean;

-    stdv += x*x;

-  }

-  

-  stdv = sqrt(stdv / double(len));

-

-  double cutoff = mean + stdv*3;

-  

-  return v[len-1] > cutoff;  

-}

-

-// Do a binary search to find the largest subset of v that does not contain

-// outliers.

-

-void FilterOutliers ( std::vector<double> & v )

-{

-  std::sort(v.begin(),v.end());

-  

-  size_t len = 0;

-  

-  for(size_t x = 0x40000000; x; x = x >> 1 )

-  {

-    if((len | x) >= v.size()) continue;

-    

-    if(!ContainsOutlier(v,len | x))

-    {

-      len |= x;

-    }

-  }

-  

-  v.resize(len);

-}

-

-// Iteratively tighten the set to find a subset that does not contain

-// outliers. I'm not positive this works correctly in all cases.

-

-void FilterOutliers2 ( std::vector<double> & v )

-{

-  std::sort(v.begin(),v.end());

-  

-  int a = 0;

-  int b = (int)(v.size() - 1);

-  

-  for(int i = 0; i < 10; i++)

-  {

-    //printf("%d %d\n",a,b);

-  

-    double mean = CalcMean(v,a,b);

-    double stdv = CalcStdv(v,a,b);

-    

-    double cutA = mean - stdv*3;  

-    double cutB = mean + stdv*3;

-    

-    while((a < b) && (v[a] < cutA)) a++;

-    while((b > a) && (v[b] > cutB)) b--;

-  }

-  

-  std::vector<double> v2;

-  

-  v2.insert(v2.begin(),v.begin()+a,v.begin()+b+1);

-  

-  v.swap(v2);

-}

-

-//-----------------------------------------------------------------------------

-// We really want the rdtsc() calls to bracket the function call as tightly

-// as possible, but that's hard to do portably. We'll try and get as close as

-// possible by marking the function as NEVER_INLINE (to keep the optimizer from

-// moving it) and marking the timing variables as "volatile register".

-

-NEVER_INLINE int64_t timehash ( pfHash hash, const void * key, int len, int seed )

-{

-  volatile register int64_t begin,end;

-  

-  uint32_t temp[16];

-  

-  begin = rdtsc();

-  

-  hash(key,len,seed,temp);

-  

-  end = rdtsc();

-  

-  return end-begin;

-}

-

-//-----------------------------------------------------------------------------

-

-double SpeedTest ( pfHash hash, uint32_t seed, const int trials, const int blocksize, const int align )

-{

-  Rand r(seed);

-  

-  uint8_t * buf = new uint8_t[blocksize + 512];

-

-  uint64_t t1 = reinterpret_cast<uint64_t>(buf);

-  

-  t1 = (t1 + 255) & BIG_CONSTANT(0xFFFFFFFFFFFFFF00);

-  t1 += align;

-  

-  uint8_t * block = reinterpret_cast<uint8_t*>(t1);

-

-  r.rand_p(block,blocksize);

-

-  //----------

-

-  std::vector<double> times;

-  times.reserve(trials);

-

-  for(int itrial = 0; itrial < trials; itrial++)

-  {

-    r.rand_p(block,blocksize);

-    

-    double t = (double)timehash(hash,block,blocksize,itrial);

-    

-    if(t > 0) times.push_back(t);

-  }

-

-  //----------

-  

-  std::sort(times.begin(),times.end());

-  

-  FilterOutliers(times);

-  

-  delete [] buf;

-  

-  return CalcMean(times);

-}

-

-//-----------------------------------------------------------------------------

-// 256k blocks seem to give the best results.

-

-void BulkSpeedTest ( pfHash hash, uint32_t seed )

-{

-  const int trials = 2999;

-  const int blocksize = 256 * 1024;

-

-  printf("Bulk speed test - %d-byte keys\n",blocksize);

-

-  for(int align = 0; align < 8; align++)

-  {

-    double cycles = SpeedTest(hash,seed,trials,blocksize,align);

-    

-    double bestbpc = double(blocksize)/cycles;

-    

-    double bestbps = (bestbpc * 3000000000.0 / 1048576.0);

-    printf("Alignment %2d - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",align,bestbpc,bestbps);

-  }

-}

-

-//-----------------------------------------------------------------------------

-

-void TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bool verbose, double & /*outCycles*/ )

-{

-  const int trials = 999999;

-

-  if(verbose) printf("Small key speed test - %4d-byte keys - ",keysize);

-  

-  double cycles = SpeedTest(hash,seed,trials,keysize,0);

-  

-  printf("%8.2f cycles/hash\n",cycles);  

-}

-

-//-----------------------------------------------------------------------------

+#include "SpeedTest.h"
+
+#include "Random.h"
+
+#include <stdio.h>   // for printf
+#include <memory.h>  // for memset
+#include <math.h>    // for sqrt
+#include <algorithm> // for sort
+
+//-----------------------------------------------------------------------------
+// We view our timing values as a series of random variables V that has been
+// contaminated with occasional outliers due to cache misses, thread
+// preemption, etcetera. To filter out the outliers, we search for the largest
+// subset of V such that all its values are within three standard deviations
+// of the mean.
+
+double CalcMean ( std::vector<double> & v )
+{
+  double mean = 0;
+  
+  for(int i = 0; i < (int)v.size(); i++)
+  {
+    mean += v[i];
+  }
+  
+  mean /= double(v.size());
+  
+  return mean;
+}
+
+double CalcMean ( std::vector<double> & v, int a, int b )
+{
+  double mean = 0;
+  
+  for(int i = a; i <= b; i++)
+  {
+    mean += v[i];
+  }
+  
+  mean /= (b-a+1);
+  
+  return mean;
+}
+
+double CalcStdv ( std::vector<double> & v, int a, int b )
+{
+  double mean = CalcMean(v,a,b);
+
+  double stdv = 0;
+  
+  for(int i = a; i <= b; i++)
+  {
+    double x = v[i] - mean;
+    
+    stdv += x*x;
+  }
+  
+  stdv = sqrt(stdv / (b-a+1));
+  
+  return stdv;
+}
+
+// Return true if the largest value in v[0,len) is more than three
+// standard deviations from the mean
+
+bool ContainsOutlier ( std::vector<double> & v, size_t len )
+{
+  double mean = 0;
+  
+  for(size_t i = 0; i < len; i++)
+  {
+    mean += v[i];
+  }
+  
+  mean /= double(len);
+  
+  double stdv = 0;
+  
+  for(size_t i = 0; i < len; i++)
+  {
+    double x = v[i] - mean;
+    stdv += x*x;
+  }
+  
+  stdv = sqrt(stdv / double(len));
+
+  double cutoff = mean + stdv*3;
+  
+  return v[len-1] > cutoff;  
+}
+
+// Do a binary search to find the largest subset of v that does not contain
+// outliers.
+
+void FilterOutliers ( std::vector<double> & v )
+{
+  std::sort(v.begin(),v.end());
+  
+  size_t len = 0;
+  
+  for(size_t x = 0x40000000; x; x = x >> 1 )
+  {
+    if((len | x) >= v.size()) continue;
+    
+    if(!ContainsOutlier(v,len | x))
+    {
+      len |= x;
+    }
+  }
+  
+  v.resize(len);
+}
+
+// Iteratively tighten the set to find a subset that does not contain
+// outliers. I'm not positive this works correctly in all cases.
+
+void FilterOutliers2 ( std::vector<double> & v )
+{
+  std::sort(v.begin(),v.end());
+  
+  int a = 0;
+  int b = (int)(v.size() - 1);
+  
+  for(int i = 0; i < 10; i++)
+  {
+    //printf("%d %d\n",a,b);
+  
+    double mean = CalcMean(v,a,b);
+    double stdv = CalcStdv(v,a,b);
+    
+    double cutA = mean - stdv*3;  
+    double cutB = mean + stdv*3;
+    
+    while((a < b) && (v[a] < cutA)) a++;
+    while((b > a) && (v[b] > cutB)) b--;
+  }
+  
+  std::vector<double> v2;
+  
+  v2.insert(v2.begin(),v.begin()+a,v.begin()+b+1);
+  
+  v.swap(v2);
+}
+
+//-----------------------------------------------------------------------------
+// We really want the rdtsc() calls to bracket the function call as tightly
+// as possible, but that's hard to do portably. We'll try and get as close as
+// possible by marking the function as NEVER_INLINE (to keep the optimizer from
+// moving it) and marking the timing variables as "volatile register".
+
+NEVER_INLINE int64_t timehash ( pfHash hash, const void * key, int len, int seed )
+{
+  volatile register int64_t begin,end;
+  
+  uint32_t temp[16];
+  
+  begin = rdtsc();
+  
+  hash(key,len,seed,temp);
+  
+  end = rdtsc();
+  
+  return end-begin;
+}
+
+//-----------------------------------------------------------------------------
+
+double SpeedTest ( pfHash hash, uint32_t seed, const int trials, const int blocksize, const int align )
+{
+  Rand r(seed);
+  
+  uint8_t * buf = new uint8_t[blocksize + 512];
+
+  uint64_t t1 = reinterpret_cast<uint64_t>(buf);
+  
+  t1 = (t1 + 255) & BIG_CONSTANT(0xFFFFFFFFFFFFFF00);
+  t1 += align;
+  
+  uint8_t * block = reinterpret_cast<uint8_t*>(t1);
+
+  r.rand_p(block,blocksize);
+
+  //----------
+
+  std::vector<double> times;
+  times.reserve(trials);
+
+  for(int itrial = 0; itrial < trials; itrial++)
+  {
+    r.rand_p(block,blocksize);
+    
+    double t = (double)timehash(hash,block,blocksize,itrial);
+    
+    if(t > 0) times.push_back(t);
+  }
+
+  //----------
+  
+  std::sort(times.begin(),times.end());
+  
+  FilterOutliers(times);
+  
+  delete [] buf;
+  
+  return CalcMean(times);
+}
+
+//-----------------------------------------------------------------------------
+// 256k blocks seem to give the best results.
+
+void BulkSpeedTest ( pfHash hash, uint32_t seed )
+{
+  const int trials = 2999;
+  const int blocksize = 256 * 1024;
+
+  printf("Bulk speed test - %d-byte keys\n",blocksize);
+
+  for(int align = 0; align < 8; align++)
+  {
+    double cycles = SpeedTest(hash,seed,trials,blocksize,align);
+    
+    double bestbpc = double(blocksize)/cycles;
+    
+    double bestbps = (bestbpc * 3000000000.0 / 1048576.0);
+    printf("Alignment %2d - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",align,bestbpc,bestbps);
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+void TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bool verbose, double & /*outCycles*/ )
+{
+  const int trials = 999999;
+
+  if(verbose) printf("Small key speed test - %4d-byte keys - ",keysize);
+  
+  double cycles = SpeedTest(hash,seed,trials,keysize,0);
+  
+  printf("%8.2f cycles/hash\n",cycles);  
+}
+
+//-----------------------------------------------------------------------------
diff --git a/SpeedTest.h b/SpeedTest.h
index b881a78..7bd2167 100644
--- a/SpeedTest.h
+++ b/SpeedTest.h
@@ -1,8 +1,8 @@
-#pragma once

-

-#include "Types.h"

-

-void BulkSpeedTest ( pfHash hash, uint32_t seed );

-void TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bool verbose, double & outCycles );

-

-//-----------------------------------------------------------------------------

+#pragma once
+
+#include "Types.h"
+
+void BulkSpeedTest ( pfHash hash, uint32_t seed );
+void TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bool verbose, double & outCycles );
+
+//-----------------------------------------------------------------------------
diff --git a/Spooky.cpp b/Spooky.cpp
new file mode 100644
index 0000000..47f5d75
--- /dev/null
+++ b/Spooky.cpp
@@ -0,0 +1,347 @@
+// Spooky Hash
+// A 128-bit noncryptographic hash, for checksums and table lookup
+// By Bob Jenkins.  Public domain.
+//   Oct 31 2010: published framework, disclaimer ShortHash isn't right
+//   Nov 7 2010: disabled ShortHash
+//   Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again
+
+#include <memory.h>
+#include "Spooky.h"
+
+#define ALLOW_UNALIGNED_READS 1
+
+//
+// short hash ... it could be used on any message, 
+// but it's used by Spooky just for short messages.
+//
+void SpookyHash::Short(
+    const void *message,
+    size_t length,
+    uint64 *hash1,
+    uint64 *hash2)
+{
+    uint64 buf[sc_numVars];
+    union 
+    { 
+        const uint8 *p8; 
+        uint32 *p32;
+        uint64 *p64; 
+        size_t i; 
+    } u;
+
+    u.p8 = (const uint8 *)message;
+    
+    if (!ALLOW_UNALIGNED_READS && (u.i & 0x7))
+    {
+        memcpy(buf, message, length);
+        u.p64 = buf;
+    }
+
+    size_t remainder = length%32;
+    uint64 a=*hash1;
+    uint64 b=*hash2;
+    uint64 c=sc_const;
+    uint64 d=sc_const;
+
+    if (length > 15)
+    {
+        const uint64 *end = u.p64 + (length/32)*4;
+        
+        // handle all complete sets of 32 bytes
+        for (; u.p64 < end; u.p64 += 4)
+        {
+            c += u.p64[0];
+            d += u.p64[1];
+            ShortMix(a,b,c,d);
+            a += u.p64[2];
+            b += u.p64[3];
+        }
+        
+        //Handle the case of 16+ remaining bytes.
+        if (remainder >= 16)
+        {
+            c += u.p64[0];
+            d += u.p64[1];
+            ShortMix(a,b,c,d);
+            u.p64 += 2;
+            remainder -= 16;
+        }
+    }
+    
+    // Handle the last 0..15 bytes, and its length
+    d = ((uint64)length) << 56;
+    switch (remainder)
+    {
+    case 15:
+    d += ((uint64)u.p8[14]) << 48;
+    case 14:
+        d += ((uint64)u.p8[13]) << 40;
+    case 13:
+        d += ((uint64)u.p8[12]) << 32;
+    case 12:
+        d += u.p32[2];
+        c += u.p64[0];
+        break;
+    case 11:
+        d += ((uint64)u.p8[10]) << 16;
+    case 10:
+        d += ((uint64)u.p8[9]) << 8;
+    case 9:
+        d += (uint64)u.p8[8];
+    case 8:
+        c += u.p64[0];
+        break;
+    case 7:
+        c += ((uint64)u.p8[6]) << 48;
+    case 6:
+        c += ((uint64)u.p8[5]) << 40;
+    case 5:
+        c += ((uint64)u.p8[4]) << 32;
+    case 4:
+        c += u.p32[0];
+        break;
+    case 3:
+        c += ((uint64)u.p8[2]) << 16;
+    case 2:
+        c += ((uint64)u.p8[1]) << 8;
+    case 1:
+        c += (uint64)u.p8[0];
+        break;
+    case 0:
+        c += sc_const;
+        d += sc_const;
+    }
+    ShortEnd(a,b,c,d);
+    *hash1 = a;
+    *hash2 = b;
+}
+
+
+
+
+// do the whole hash in one call
+void SpookyHash::Hash128(
+    const void *message, 
+    size_t length, 
+    uint64 *hash1, 
+    uint64 *hash2)
+{
+    if (length < sc_bufSize)
+    {
+        Short(message, length, hash1, hash2);
+        return;
+    }
+
+    uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11;
+    uint64 buf[sc_numVars];
+    uint64 *end;
+    union 
+    { 
+        const uint8 *p8; 
+        uint64 *p64; 
+        size_t i; 
+    } u;
+    size_t remainder;
+    
+    h0=h3=h6=h9  = *hash1;
+    h1=h4=h7=h10 = *hash2;
+    h2=h5=h8=h11 = sc_const;
+    
+    u.p8 = (const uint8 *)message;
+    end = u.p64 + (length/sc_blockSize)*sc_numVars;
+
+    // handle all whole sc_blockSize blocks of bytes
+    if (ALLOW_UNALIGNED_READS || ((u.i & 0x7) == 0))
+    {
+        while (u.p64 < end)
+        { 
+            Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+	    u.p64 += sc_numVars;
+        }
+    }
+    else
+    {
+        while (u.p64 < end)
+        {
+            memcpy(buf, u.p64, sc_blockSize);
+            Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+	    u.p64 += sc_numVars;
+        }
+    }
+
+    // handle the last partial block of sc_blockSize bytes
+    remainder = (length - ((const uint8 *)end-(const uint8 *)message));
+    memcpy(buf, end, remainder);
+    memset(((uint8 *)buf)+remainder, 0, sc_blockSize-remainder);
+    ((uint8 *)buf)[sc_blockSize-1] = remainder;
+    Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+    
+    // do some final mixing 
+    End(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+    *hash1 = h0;
+    *hash2 = h1;
+}
+
+
+
+// init spooky state
+void SpookyHash::Init(uint64 seed1, uint64 seed2)
+{
+    m_length = 0;
+    m_remainder = 0;
+    m_state[0] = seed1;
+    m_state[1] = seed2;
+}
+
+
+// add a message fragment to the state
+void SpookyHash::Update(const void *message, size_t length)
+{
+    uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11;
+    size_t newLength = length + m_remainder;
+    uint8  remainder;
+    union 
+    { 
+        const uint8 *p8; 
+        uint64 *p64; 
+        size_t i; 
+    } u;
+    const uint64 *end;
+    
+    // Is this message fragment too short?  If it is, stuff it away.
+    if (newLength < sc_bufSize)
+    {
+        memcpy(&((uint8 *)m_data)[m_remainder], message, length);
+        m_length = length + m_length;
+        m_remainder = (uint8)newLength;
+        return;
+    }
+    
+    // init the variables
+    if (m_length < sc_bufSize)
+    {
+        h0=h3=h6=h9  = m_state[0];
+        h1=h4=h7=h10 = m_state[1];
+        h2=h5=h8=h11 = sc_const;
+    }
+    else
+    {
+        h0 = m_state[0];
+        h1 = m_state[1];
+        h2 = m_state[2];
+        h3 = m_state[3];
+        h4 = m_state[4];
+        h5 = m_state[5];
+        h6 = m_state[6];
+        h7 = m_state[7];
+        h8 = m_state[8];
+        h9 = m_state[9];
+        h10 = m_state[10];
+        h11 = m_state[11];
+    }
+    m_length = length + m_length;
+    
+    // if we've got anything stuffed away, use it now
+    if (m_remainder)
+    {
+        uint8 prefix = sc_bufSize-m_remainder;
+        memcpy(&(((uint8 *)m_data)[m_remainder]), message, prefix);
+        u.p64 = m_data;
+        Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+        Mix(&u.p64[sc_numVars], h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+        u.p8 = ((const uint8 *)message) + prefix;
+        length -= prefix;
+    }
+    else
+    {
+        u.p8 = (const uint8 *)message;
+    }
+    
+    // handle all whole blocks of sc_blockSize bytes
+    end = u.p64 + (length/sc_blockSize)*sc_numVars;
+    remainder = (uint8)(length-((const uint8 *)end-u.p8));
+    if (ALLOW_UNALIGNED_READS || (u.i & 0x7) == 0)
+    {
+        while (u.p64 < end)
+        { 
+            Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+	    u.p64 += sc_numVars;
+        }
+    }
+    else
+    {
+        while (u.p64 < end)
+        { 
+            memcpy(m_data, u.p8, sc_blockSize);
+            Mix(m_data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+	    u.p64 += sc_numVars;
+        }
+    }
+
+    // stuff away the last few bytes
+    m_remainder = remainder;
+    memcpy(m_data, end, remainder);
+    
+    // stuff away the variables
+    m_state[0] = h0;
+    m_state[1] = h1;
+    m_state[2] = h2;
+    m_state[3] = h3;
+    m_state[4] = h4;
+    m_state[5] = h5;
+    m_state[6] = h6;
+    m_state[7] = h7;
+    m_state[8] = h8;
+    m_state[9] = h9;
+    m_state[10] = h10;
+    m_state[11] = h11;
+}
+
+
+// report the hash for the concatenation of all message fragments so far
+void SpookyHash::Final(uint64 *hash1, uint64 *hash2)
+{
+    // init the variables
+    if (m_length < sc_bufSize)
+    {
+        Short( m_data, m_length, hash1, hash2);
+        return;
+    }
+    
+    const uint64 *data = (const uint64 *)m_data;
+    uint8 remainder = m_remainder;
+    
+    uint64 h0 = m_state[0];
+    uint64 h1 = m_state[1];
+    uint64 h2 = m_state[2];
+    uint64 h3 = m_state[3];
+    uint64 h4 = m_state[4];
+    uint64 h5 = m_state[5];
+    uint64 h6 = m_state[6];
+    uint64 h7 = m_state[7];
+    uint64 h8 = m_state[8];
+    uint64 h9 = m_state[9];
+    uint64 h10 = m_state[10];
+    uint64 h11 = m_state[11];
+
+    if (remainder >= sc_blockSize)
+    {
+        // m_data can contain two blocks; handle any whole first block
+        Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+	data += sc_numVars;
+	remainder -= sc_blockSize;
+    }
+
+    // mix in the last partial block, and the length mod sc_blockSize
+    memset(&((uint8 *)data)[remainder], 0, (sc_blockSize-remainder));
+
+    ((uint8 *)data)[sc_blockSize-1] = remainder;
+    Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+    
+    // do some final mixing
+    End(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+
+    *hash1 = h0;
+    *hash2 = h1;
+}
+
diff --git a/Spooky.h b/Spooky.h
new file mode 100644
index 0000000..cafd52e
--- /dev/null
+++ b/Spooky.h
@@ -0,0 +1,293 @@
+//
+// SpookyHash: a 128-bit noncryptographic hash function
+// By Bob Jenkins, public domain
+//   Oct 31 2010: alpha, framework + SpookyHash::Mix appears right
+//   Oct 31 2011: alpha again, Mix only good to 2^^69 but rest appears right
+//   Dec 31 2011: beta, improved Mix, tested it for 2-bit deltas
+//   Feb  2 2012: production, same bits as beta
+//   Feb  5 2012: adjusted definitions of uint* to be more portable
+// 
+// Up to 4 bytes/cycle for long messages.  Reasonably fast for short messages.
+// All 1 or 2 bit deltas achieve avalanche within 1% bias per output bit.
+//
+// This was developed for and tested on 64-bit x86-compatible processors.
+// It assumes the processor is little-endian.  There is a macro
+// controlling whether unaligned reads are allowed (by default they are).
+// This should be an equally good hash on big-endian machines, but it will
+// compute different results on them than on little-endian machines.
+//
+// Google's CityHash has similar specs to SpookyHash, and CityHash is faster
+// on some platforms.  MD4 and MD5 also have similar specs, but they are orders
+// of magnitude slower.  CRCs are two or more times slower, but unlike 
+// SpookyHash, they have nice math for combining the CRCs of pieces to form 
+// the CRCs of wholes.  There are also cryptographic hashes, but those are even 
+// slower than MD5.
+//
+
+#include <stddef.h>
+
+#ifdef _MSC_VER
+# define INLINE __forceinline
+  typedef  unsigned __int64 uint64;
+  typedef  unsigned __int32 uint32;
+  typedef  unsigned __int16 uint16;
+  typedef  unsigned __int8  uint8;
+#else
+# include <stdint.h>
+# define INLINE inline
+  typedef  uint64_t  uint64;
+  typedef  uint32_t  uint32;
+  typedef  uint16_t  uint16;
+  typedef  uint8_t   uint8;
+#endif
+
+
+class SpookyHash
+{
+public:
+    //
+    // SpookyHash: hash a single message in one call, produce 128-bit output
+    //
+    static void Hash128(
+        const void *message,  // message to hash
+        size_t length,        // length of message in bytes
+        uint64 *hash1,        // in/out: in seed 1, out hash value 1
+        uint64 *hash2);       // in/out: in seed 2, out hash value 2
+
+    //
+    // Hash64: hash a single message in one call, return 64-bit output
+    //
+    static uint64 Hash64(
+        const void *message,  // message to hash
+        size_t length,        // length of message in bytes
+        uint64 seed)          // seed
+    {
+        uint64 hash1 = seed;
+        Hash128(message, length, &hash1, &seed);
+        return hash1;
+    }
+
+    //
+    // Hash32: hash a single message in one call, produce 32-bit output
+    //
+    static uint32 Hash32(
+        const void *message,  // message to hash
+        size_t length,        // length of message in bytes
+        uint32 seed)          // seed
+    {
+        uint64 hash1 = seed, hash2 = seed;
+        Hash128(message, length, &hash1, &hash2);
+        return (uint32)hash1;
+    }
+
+    //
+    // Init: initialize the context of a SpookyHash
+    //
+    void Init(
+        uint64 seed1,       // any 64-bit value will do, including 0
+        uint64 seed2);      // different seeds produce independent hashes
+    
+    //
+    // Update: add a piece of a message to a SpookyHash state
+    //
+    void Update(
+        const void *message,  // message fragment
+        size_t length);       // length of message fragment in bytes
+
+
+    //
+    // Final: compute the hash for the current SpookyHash state
+    //
+    // This does not modify the state; you can keep updating it afterward
+    //
+    // The result is the same as if SpookyHash() had been called with
+    // all the pieces concatenated into one message.
+    //
+    void Final(
+        uint64 *hash1,    // out only: first 64 bits of hash value.
+        uint64 *hash2);   // out only: second 64 bits of hash value.
+
+    //
+    // left rotate a 64-bit value by k bytes
+    //
+    static INLINE uint64 Rot64(uint64 x, int k)
+    {
+        return (x << k) | (x >> (64 - k));
+    }
+
+    //
+    // This is used if the input is 96 bytes long or longer.
+    //
+    // The internal state is fully overwritten every 96 bytes.
+    // Every input bit appears to cause at least 128 bits of entropy
+    // before 96 other bytes are combined, when run forward or backward
+    //   For every input bit,
+    //   Two inputs differing in just that input bit
+    //   Where "differ" means xor or subtraction
+    //   And the base value is random
+    //   When run forward or backwards one Mix
+    // I tried 3 pairs of each; they all differed by at least 212 bits.
+    //
+    static INLINE void Mix(
+        const uint64 *data, 
+        uint64 &s0, uint64 &s1, uint64 &s2, uint64 &s3,
+        uint64 &s4, uint64 &s5, uint64 &s6, uint64 &s7,
+        uint64 &s8, uint64 &s9, uint64 &s10,uint64 &s11)
+    {
+      s0 += data[0];    s2 ^= s10;    s11 ^= s0;    s0 = Rot64(s0,11);    s11 += s1;
+      s1 += data[1];    s3 ^= s11;    s0 ^= s1;    s1 = Rot64(s1,32);    s0 += s2;
+      s2 += data[2];    s4 ^= s0;    s1 ^= s2;    s2 = Rot64(s2,43);    s1 += s3;
+      s3 += data[3];    s5 ^= s1;    s2 ^= s3;    s3 = Rot64(s3,31);    s2 += s4;
+      s4 += data[4];    s6 ^= s2;    s3 ^= s4;    s4 = Rot64(s4,17);    s3 += s5;
+      s5 += data[5];    s7 ^= s3;    s4 ^= s5;    s5 = Rot64(s5,28);    s4 += s6;
+      s6 += data[6];    s8 ^= s4;    s5 ^= s6;    s6 = Rot64(s6,39);    s5 += s7;
+      s7 += data[7];    s9 ^= s5;    s6 ^= s7;    s7 = Rot64(s7,57);    s6 += s8;
+      s8 += data[8];    s10 ^= s6;    s7 ^= s8;    s8 = Rot64(s8,55);    s7 += s9;
+      s9 += data[9];    s11 ^= s7;    s8 ^= s9;    s9 = Rot64(s9,54);    s8 += s10;
+      s10 += data[10];    s0 ^= s8;    s9 ^= s10;    s10 = Rot64(s10,22);    s9 += s11;
+      s11 += data[11];    s1 ^= s9;    s10 ^= s11;    s11 = Rot64(s11,46);    s10 += s0;
+    }
+
+    //
+    // Mix all 12 inputs together so that h0, h1 are a hash of them all.
+    //
+    // For two inputs differing in just the input bits
+    // Where "differ" means xor or subtraction
+    // And the base value is random, or a counting value starting at that bit
+    // The final result will have each bit of h0, h1 flip
+    // For every input bit,
+    // with probability 50 +- .3%
+    // For every pair of input bits,
+    // with probability 50 +- 3%
+    //
+    // This does not rely on the last Mix() call having already mixed some.
+    // Two iterations was almost good enough for a 64-bit result, but a
+    // 128-bit result is reported, so End() does three iterations.
+    //
+    static INLINE void EndPartial(
+        uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3,
+        uint64 &h4, uint64 &h5, uint64 &h6, uint64 &h7, 
+        uint64 &h8, uint64 &h9, uint64 &h10,uint64 &h11)
+    {
+        h11+= h1;    h2 ^= h11;   h1 = Rot64(h1,44);
+	h0 += h2;    h3 ^= h0;    h2 = Rot64(h2,15);
+	h1 += h3;    h4 ^= h1;    h3 = Rot64(h3,34);
+	h2 += h4;    h5 ^= h2;    h4 = Rot64(h4,21);
+	h3 += h5;    h6 ^= h3;    h5 = Rot64(h5,38);
+	h4 += h6;    h7 ^= h4;    h6 = Rot64(h6,33);
+	h5 += h7;    h8 ^= h5;    h7 = Rot64(h7,10);
+	h6 += h8;    h9 ^= h6;    h8 = Rot64(h8,13);
+	h7 += h9;    h10^= h7;    h9 = Rot64(h9,38);
+	h8 += h10;   h11^= h8;    h10= Rot64(h10,53);
+	h9 += h11;   h0 ^= h9;    h11= Rot64(h11,42);
+	h10+= h0;    h1 ^= h10;   h0 = Rot64(h0,54);
+    }
+
+    static INLINE void End(
+        uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3,
+        uint64 &h4, uint64 &h5, uint64 &h6, uint64 &h7, 
+        uint64 &h8, uint64 &h9, uint64 &h10,uint64 &h11)
+    {
+        EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+        EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+        EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+    }
+
+    //
+    // The goal is for each bit of the input to expand into 128 bits of 
+    //   apparent entropy before it is fully overwritten.
+    // n trials both set and cleared at least m bits of h0 h1 h2 h3
+    //   n: 2   m: 29
+    //   n: 3   m: 46
+    //   n: 4   m: 57
+    //   n: 5   m: 107
+    //   n: 6   m: 146
+    //   n: 7   m: 152
+    // when run forwards or backwards
+    // for all 1-bit and 2-bit diffs
+    // with diffs defined by either xor or subtraction
+    // with a base of all zeros plus a counter, or plus another bit, or random
+    //
+    static INLINE void ShortMix(uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3)
+    {
+        h2 = Rot64(h2,50);  h2 += h3;  h0 ^= h2;
+        h3 = Rot64(h3,52);  h3 += h0;  h1 ^= h3;
+        h0 = Rot64(h0,30);  h0 += h1;  h2 ^= h0;
+        h1 = Rot64(h1,41);  h1 += h2;  h3 ^= h1;
+        h2 = Rot64(h2,54);  h2 += h3;  h0 ^= h2;
+        h3 = Rot64(h3,48);  h3 += h0;  h1 ^= h3;
+        h0 = Rot64(h0,38);  h0 += h1;  h2 ^= h0;
+        h1 = Rot64(h1,37);  h1 += h2;  h3 ^= h1;
+        h2 = Rot64(h2,62);  h2 += h3;  h0 ^= h2;
+        h3 = Rot64(h3,34);  h3 += h0;  h1 ^= h3;
+        h0 = Rot64(h0,5);   h0 += h1;  h2 ^= h0;
+        h1 = Rot64(h1,36);  h1 += h2;  h3 ^= h1;
+    }
+
+    //
+    // Mix all 4 inputs together so that h0, h1 are a hash of them all.
+    //
+    // For two inputs differing in just the input bits
+    // Where "differ" means xor or subtraction
+    // And the base value is random, or a counting value starting at that bit
+    // The final result will have each bit of h0, h1 flip
+    // For every input bit,
+    // with probability 50 +- .3% (it is probably better than that)
+    // For every pair of input bits,
+    // with probability 50 +- .75% (the worst case is approximately that)
+    //
+    static INLINE void ShortEnd(uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3)
+    {
+        h3 ^= h2;  h2 = Rot64(h2,15);  h3 += h2;
+        h0 ^= h3;  h3 = Rot64(h3,52);  h0 += h3;
+        h1 ^= h0;  h0 = Rot64(h0,26);  h1 += h0;
+        h2 ^= h1;  h1 = Rot64(h1,51);  h2 += h1;
+        h3 ^= h2;  h2 = Rot64(h2,28);  h3 += h2;
+        h0 ^= h3;  h3 = Rot64(h3,9);   h0 += h3;
+        h1 ^= h0;  h0 = Rot64(h0,47);  h1 += h0;
+        h2 ^= h1;  h1 = Rot64(h1,54);  h2 += h1;
+        h3 ^= h2;  h2 = Rot64(h2,32);  h3 += h2;
+        h0 ^= h3;  h3 = Rot64(h3,25);  h0 += h3;
+        h1 ^= h0;  h0 = Rot64(h0,63);  h1 += h0;
+    }
+    
+private:
+
+    //
+    // Short is used for messages under 192 bytes in length
+    // Short has a low startup cost, the normal mode is good for long
+    // keys, the cost crossover is at about 192 bytes.  The two modes were
+    // held to the same quality bar.
+    // 
+    static void Short(
+        const void *message,
+        size_t length,
+        uint64 *hash1,
+        uint64 *hash2);
+
+    // number of uint64's in internal state
+    static const size_t sc_numVars = 12;
+
+    // size of the internal state
+    static const size_t sc_blockSize = sc_numVars*8;
+
+    // size of buffer of unhashed data, in bytes
+    static const size_t sc_bufSize = 2*sc_blockSize;
+
+    //
+    // sc_const: a constant which:
+    //  * is not zero
+    //  * is odd
+    //  * is a not-very-regular mix of 1's and 0's
+    //  * does not need any other special mathematical properties
+    //
+    static const uint64 sc_const = 0xdeadbeefdeadbeefLL;
+
+    uint64 m_data[2*sc_numVars];   // unhashed data, for partial messages
+    uint64 m_state[sc_numVars];  // internal state of the hash
+    size_t m_length;             // total length of the input so far
+    uint8  m_remainder;          // length of unhashed data stashed in m_data
+};
+
+
+
diff --git a/SpookyTest.cpp b/SpookyTest.cpp
new file mode 100644
index 0000000..df9021e
--- /dev/null
+++ b/SpookyTest.cpp
@@ -0,0 +1,16 @@
+#include "Spooky.h"
+
+void SpookyHash32_test(const void *key, int len, uint32_t seed, void *out) {
+  *(uint32_t*)out = SpookyHash::Hash32(key, len, seed);
+}
+
+void SpookyHash64_test(const void *key, int len, uint32_t seed, void *out) {
+  *(uint64_t*)out = SpookyHash::Hash64(key, len, seed);
+}
+
+void SpookyHash128_test(const void *key, int len, uint32_t seed, void *out) {
+  uint64_t h1 = seed, h2 = seed;
+  SpookyHash::Hash128(key, len, &h1, &h2);
+  ((uint64_t*)out)[0] = h1;
+  ((uint64_t*)out)[1] = h2;
+}
diff --git a/Stats.cpp b/Stats.cpp
index 55e99fc..4452290 100644
--- a/Stats.cpp
+++ b/Stats.cpp
@@ -1,99 +1,99 @@
-#include "Stats.h"

-

-//-----------------------------------------------------------------------------

-

-double chooseK ( int n, int k )

-{

-  if(k > (n - k)) k = n - k;

-

-  double c = 1;

-

-  for(int i = 0; i < k; i++)

-  {

-    double t = double(n-i) / double(i+1);

-

-    c *= t;

-  }

-

-    return c;

-}

-

-double chooseUpToK ( int n, int k )

-{

-  double c = 0;

-

-  for(int i = 1; i <= k; i++)

-  {

-    c += chooseK(n,i);

-  }

-

-  return c;

-}

-

-//-----------------------------------------------------------------------------

-// Distribution "score"

-// TODO - big writeup of what this score means

-

-// Basically, we're computing a constant that says "The test distribution is as

-// uniform, RMS-wise, as a random distribution restricted to (1-X)*100 percent of

-// the bins. This makes for a nice uniform way to rate a distribution that isn't

-// dependent on the number of bins or the number of keys

-

-// (as long as # keys > # bins * 3 or so, otherwise random fluctuations show up

-// as distribution weaknesses)

-

-double calcScore ( const int * bins, const int bincount, const int keycount )

-{

-  double n = bincount;

-  double k = keycount;

-

-  // compute rms value

-

-  double r = 0;

-

-  for(int i = 0; i < bincount; i++)

-  {

-    double b = bins[i];

-

-    r += b*b;

-  }

-

-  r = sqrt(r / n);

-

-  // compute fill factor

-

-  double f = (k*k - 1) / (n*r*r - k);

-

-  // rescale to (0,1) with 0 = good, 1 = bad

-

-  return 1 - (f / n);

-}

-

-

-//----------------------------------------------------------------------------

-

-void plot ( double n )

-{

-  double n2 = n * 1;

-

-  if(n2 < 0) n2 = 0;

-

-  n2 *= 100;

-

-  if(n2 > 64) n2 = 64;

-

-  int n3 = (int)n2;

-

-  if(n3 == 0)

-    printf(".");

-  else

-  {

-    char x = '0' + char(n3);

-

-    if(x > '9') x = 'X';

-

-    printf("%c",x);

-  }

-}

-

-//-----------------------------------------------------------------------------

+#include "Stats.h"
+
+//-----------------------------------------------------------------------------
+
+double chooseK ( int n, int k )
+{
+  if(k > (n - k)) k = n - k;
+
+  double c = 1;
+
+  for(int i = 0; i < k; i++)
+  {
+    double t = double(n-i) / double(i+1);
+
+    c *= t;
+  }
+
+    return c;
+}
+
+double chooseUpToK ( int n, int k )
+{
+  double c = 0;
+
+  for(int i = 1; i <= k; i++)
+  {
+    c += chooseK(n,i);
+  }
+
+  return c;
+}
+
+//-----------------------------------------------------------------------------
+// Distribution "score"
+// TODO - big writeup of what this score means
+
+// Basically, we're computing a constant that says "The test distribution is as
+// uniform, RMS-wise, as a random distribution restricted to (1-X)*100 percent of
+// the bins. This makes for a nice uniform way to rate a distribution that isn't
+// dependent on the number of bins or the number of keys
+
+// (as long as # keys > # bins * 3 or so, otherwise random fluctuations show up
+// as distribution weaknesses)
+
+double calcScore ( const int * bins, const int bincount, const int keycount )
+{
+  double n = bincount;
+  double k = keycount;
+
+  // compute rms value
+
+  double r = 0;
+
+  for(int i = 0; i < bincount; i++)
+  {
+    double b = bins[i];
+
+    r += b*b;
+  }
+
+  r = sqrt(r / n);
+
+  // compute fill factor
+
+  double f = (k*k - 1) / (n*r*r - k);
+
+  // rescale to (0,1) with 0 = good, 1 = bad
+
+  return 1 - (f / n);
+}
+
+
+//----------------------------------------------------------------------------
+
+void plot ( double n )
+{
+  double n2 = n * 1;
+
+  if(n2 < 0) n2 = 0;
+
+  n2 *= 100;
+
+  if(n2 > 64) n2 = 64;
+
+  int n3 = (int)n2;
+
+  if(n3 == 0)
+    printf(".");
+  else
+  {
+    char x = '0' + char(n3);
+
+    if(x > '9') x = 'X';
+
+    printf("%c",x);
+  }
+}
+
+//-----------------------------------------------------------------------------
diff --git a/Stats.h b/Stats.h
index c80393e..3565e80 100644
--- a/Stats.h
+++ b/Stats.h
@@ -1,388 +1,388 @@
-#pragma once

-

-#include "Types.h"

-

-#include <math.h>

-#include <vector>

-#include <map>

-#include <algorithm>   // for std::sort

-#include <string.h>    // for memset

-#include <stdio.h>     // for printf

-

-double calcScore ( const int * bins, const int bincount, const int ballcount );

-

-void plot ( double n );

-

-inline double ExpectedCollisions ( double balls, double bins )

-{

-  return balls - bins + bins * pow(1 - 1/bins,balls);

-}

-

-double chooseK ( int b, int k );

-double chooseUpToK ( int n, int k );

-

-//-----------------------------------------------------------------------------

-

-inline uint32_t f3mix ( uint32_t k )

-{

-  k ^= k >> 16;

-  k *= 0x85ebca6b;

-  k ^= k >> 13;

-  k *= 0xc2b2ae35;

-  k ^= k >> 16;

-

-  return k;

-}

-

-//-----------------------------------------------------------------------------

-// Sort the hash list, count the total number of collisions and return

-// the first N collisions for further processing

-

-template< typename hashtype >

-int FindCollisions ( std::vector<hashtype> & hashes, 

-                     HashSet<hashtype> & collisions,

-                     int maxCollisions )

-{

-  int collcount = 0;

-

-  std::sort(hashes.begin(),hashes.end());

-

-  for(size_t i = 1; i < hashes.size(); i++)

-  {

-    if(hashes[i] == hashes[i-1])

-    {

-      collcount++;

-

-      if((int)collisions.size() < maxCollisions)

-      {

-        collisions.insert(hashes[i]);

-      }

-    }

-  }

-

-  return collcount;

-}

-

-//-----------------------------------------------------------------------------

-

-template < class keytype, typename hashtype >

-int PrintCollisions ( hashfunc<hashtype> hash, std::vector<keytype> & keys )

-{

-  int collcount = 0;

-

-  typedef std::map<hashtype,keytype> htab;

-  htab tab;

-

-  for(size_t i = 1; i < keys.size(); i++)

-  {

-    keytype & k1 = keys[i];

-

-    hashtype h = hash(&k1,sizeof(keytype),0);

-

-    typename htab::iterator it = tab.find(h);

-

-    if(it != tab.end())

-    {

-      keytype & k2 = (*it).second;

-

-      printf("A: ");

-      printbits(&k1,sizeof(keytype));

-      printf("B: ");

-      printbits(&k2,sizeof(keytype));

-    }

-    else

-    {

-      tab.insert( std::make_pair(h,k1) );

-    }

-  }

-

-  return collcount;

-}

-

-//----------------------------------------------------------------------------

-// Measure the distribution "score" for each possible N-bit span up to 20 bits

-

-template< typename hashtype >

-double TestDistribution ( std::vector<hashtype> & hashes, bool drawDiagram )

-{

-  printf("Testing distribution - ");

-

-  if(drawDiagram) printf("\n");

-

-  const int hashbits = sizeof(hashtype) * 8;

-

-  int maxwidth = 20;

-

-  // We need at least 5 keys per bin to reliably test distribution biases

-  // down to 1%, so don't bother to test sparser distributions than that

-

-  while(double(hashes.size()) / double(1 << maxwidth) < 5.0)

-  {

-    maxwidth--;

-  }

-

-  std::vector<int> bins;

-  bins.resize(1 << maxwidth);

-

-  double worst = 0;

-  int worstStart = -1;

-  int worstWidth = -1;

-

-  for(int start = 0; start < hashbits; start++)

-  {

-    int width = maxwidth;

-    int bincount = (1 << width);

-

-    memset(&bins[0],0,sizeof(int)*bincount);

-

-    for(size_t j = 0; j < hashes.size(); j++)

-    {

-      hashtype & hash = hashes[j];

-

-      uint32_t index = window(&hash,sizeof(hash),start,width);

-

-      bins[index]++;

-    }

-

-    // Test the distribution, then fold the bins in half,

-    // repeat until we're down to 256 bins

-

-    if(drawDiagram) printf("[");

-

-    while(bincount >= 256)

-    {

-      double n = calcScore(&bins[0],bincount,(int)hashes.size());

-

-      if(drawDiagram) plot(n);

-

-      if(n > worst)

-      {

-        worst = n;

-        worstStart = start;

-        worstWidth = width;

-      }

-

-      width--;

-      bincount /= 2;

-

-      if(width < 8) break;

-

-      for(int i = 0; i < bincount; i++)

-      {

-        bins[i] += bins[i+bincount];

-      }

-    }

-

-    if(drawDiagram) printf("]\n");

-  }

-

-  double pct = worst * 100.0;

-

-  printf("Worst bias is the %3d-bit window at bit %3d - %5.3f%%",worstWidth,worstStart,pct);

-  if(pct >= 1.0) printf(" !!!!! ");

-  printf("\n");

-

-  return worst;

-}

-

-//----------------------------------------------------------------------------

-

-template < typename hashtype >

-bool TestHashList ( std::vector<hashtype> & hashes, std::vector<hashtype> & collisions, bool testDist, bool drawDiagram )

-{

-  bool result = true;

-

-  {

-    size_t count = hashes.size();

-

-    double expected = (double(count) * double(count-1)) / pow(2.0,double(sizeof(hashtype) * 8 + 1));

-

-    printf("Testing collisions   - Expected %8.2f, ",expected);

-

-    double collcount = 0;

-

-    HashSet<hashtype> collisions;

-

-    collcount = FindCollisions(hashes,collisions,1000);

-

-    printf("actual %8.2f (%5.2fx)",collcount, collcount / expected);

-

-    if(sizeof(hashtype) == sizeof(uint32_t))

-    {

-    // 2x expected collisions = fail

-

-    // #TODO - collision failure cutoff needs to be expressed as a standard deviation instead

-    // of a scale factor, otherwise we fail erroneously if there are a small expected number

-    // of collisions

-

-    if(double(collcount) / double(expected) > 2.0)

-    {

-      printf(" !!!!! ");

-      result = false;

-    }

-    }

-    else

-    {

-      // For all hashes larger than 32 bits, _any_ collisions are a failure.

-      

-      if(collcount > 0)

-      {

-        printf(" !!!!! ");

-        result = false;

-      }

-    }

-

-    printf("\n");

-  }

-

-  //----------

-

-  if(testDist)

-  {

-    TestDistribution(hashes,drawDiagram);

-  }

-

-  return result;

-}

-

-//----------

-

-template < typename hashtype >

-bool TestHashList ( std::vector<hashtype> & hashes, bool /*testColl*/, bool testDist, bool drawDiagram )

-{

-  std::vector<hashtype> collisions;

-

-  return TestHashList(hashes,collisions,testDist,drawDiagram);

-}

-

-//-----------------------------------------------------------------------------

-

-template < class keytype, typename hashtype >

-bool TestKeyList ( hashfunc<hashtype> hash, std::vector<keytype> & keys, bool testColl, bool testDist, bool drawDiagram )

-{

-  int keycount = (int)keys.size();

-

-  std::vector<hashtype> hashes;

-

-  hashes.resize(keycount);

-

-  printf("Hashing");

-

-  for(int i = 0; i < keycount; i++)

-  {

-    if(i % (keycount / 10) == 0) printf(".");

-

-    keytype & k = keys[i];

-

-    hash(&k,sizeof(k),0,&hashes[i]);

-  }

-

-  printf("\n");

-

-  bool result = TestHashList(hashes,testColl,testDist,drawDiagram);

-

-  printf("\n");

-

-  return result;

-}

-

-//-----------------------------------------------------------------------------

-// Bytepair test - generate 16-bit indices from all possible non-overlapping

-// 8-bit sections of the hash value, check distribution on all of them.

-

-// This is a very good test for catching weak intercorrelations between bits - 

-// much harder to pass than the normal distribution test. However, it doesn't

-// really model the normal usage of hash functions in hash table lookup, so

-// I'm not sure it's that useful (and hash functions that fail this test but

-// pass the normal distribution test still work well in practice)

-

-template < typename hashtype >

-double TestDistributionBytepairs ( std::vector<hashtype> & hashes, bool drawDiagram )

-{

-  const int nbytes = sizeof(hashtype);

-  const int hashbits = nbytes * 8;

-  

-  const int nbins = 65536;

-

-  std::vector<int> bins(nbins,0);

-

-  double worst = 0;

-

-  for(int a = 0; a < hashbits; a++)

-  {

-    if(drawDiagram) if((a % 8 == 0) && (a > 0)) printf("\n");

-

-    if(drawDiagram) printf("[");

-

-    for(int b = 0; b < hashbits; b++)

-    {

-      if(drawDiagram) if((b % 8 == 0) && (b > 0)) printf(" ");

-

-      bins.clear();

-      bins.resize(nbins,0);

-

-      for(size_t i = 0; i < hashes.size(); i++)

-      {

-        hashtype & hash = hashes[i];

-

-        uint32_t pa = window(&hash,sizeof(hash),a,8);

-        uint32_t pb = window(&hash,sizeof(hash),b,8);

-

-        bins[pa | (pb << 8)]++;

-      }

-

-      double s = calcScore(bins,bins.size(),hashes.size());

-

-      if(drawDiagram) plot(s);

-

-      if(s > worst)

-      {

-        worst = s;

-      }

-    }

-

-    if(drawDiagram) printf("]\n");

-  }

-

-  return worst;

-}

-

-//-----------------------------------------------------------------------------

-// Simplified test - only check 64k distributions, and only on byte boundaries

-

-template < typename hashtype >

-void TestDistributionFast ( std::vector<hashtype> & hashes, double & dworst, double & davg )

-{

-  const int hashbits = sizeof(hashtype) * 8;

-  const int nbins = 65536;

-  

-  std::vector<int> bins(nbins,0);

-

-  dworst = -1.0e90;

-  davg = 0;

-

-  for(int start = 0; start < hashbits; start += 8)

-  {

-    bins.clear();

-    bins.resize(nbins,0);

-

-    for(size_t j = 0; j < hashes.size(); j++)

-    {

-      hashtype & hash = hashes[j];

-

-      uint32_t index = window(&hash,sizeof(hash),start,16);

-

-      bins[index]++;

-    }

-

-    double n = calcScore(&bins.front(),(int)bins.size(),(int)hashes.size());

-    

-    davg += n;

-

-    if(n > dworst) dworst = n;

-  }

-

-  davg /= double(hashbits/8);

-}

-

-//-----------------------------------------------------------------------------

+#pragma once
+
+#include "Types.h"
+
+#include <math.h>
+#include <vector>
+#include <map>
+#include <algorithm>   // for std::sort
+#include <string.h>    // for memset
+#include <stdio.h>     // for printf
+
+double calcScore ( const int * bins, const int bincount, const int ballcount );
+
+void plot ( double n );
+
+inline double ExpectedCollisions ( double balls, double bins )
+{
+  return balls - bins + bins * pow(1 - 1/bins,balls);
+}
+
+double chooseK ( int b, int k );
+double chooseUpToK ( int n, int k );
+
+//-----------------------------------------------------------------------------
+
+inline uint32_t f3mix ( uint32_t k )
+{
+  k ^= k >> 16;
+  k *= 0x85ebca6b;
+  k ^= k >> 13;
+  k *= 0xc2b2ae35;
+  k ^= k >> 16;
+
+  return k;
+}
+
+//-----------------------------------------------------------------------------
+// Sort the hash list, count the total number of collisions and return
+// the first N collisions for further processing
+
+template< typename hashtype >
+int FindCollisions ( std::vector<hashtype> & hashes, 
+                     HashSet<hashtype> & collisions,
+                     int maxCollisions )
+{
+  int collcount = 0;
+
+  std::sort(hashes.begin(),hashes.end());
+
+  for(size_t i = 1; i < hashes.size(); i++)
+  {
+    if(hashes[i] == hashes[i-1])
+    {
+      collcount++;
+
+      if((int)collisions.size() < maxCollisions)
+      {
+        collisions.insert(hashes[i]);
+      }
+    }
+  }
+
+  return collcount;
+}
+
+//-----------------------------------------------------------------------------
+
+template < class keytype, typename hashtype >
+int PrintCollisions ( hashfunc<hashtype> hash, std::vector<keytype> & keys )
+{
+  int collcount = 0;
+
+  typedef std::map<hashtype,keytype> htab;
+  htab tab;
+
+  for(size_t i = 1; i < keys.size(); i++)
+  {
+    keytype & k1 = keys[i];
+
+    hashtype h = hash(&k1,sizeof(keytype),0);
+
+    typename htab::iterator it = tab.find(h);
+
+    if(it != tab.end())
+    {
+      keytype & k2 = (*it).second;
+
+      printf("A: ");
+      printbits(&k1,sizeof(keytype));
+      printf("B: ");
+      printbits(&k2,sizeof(keytype));
+    }
+    else
+    {
+      tab.insert( std::make_pair(h,k1) );
+    }
+  }
+
+  return collcount;
+}
+
+//----------------------------------------------------------------------------
+// Measure the distribution "score" for each possible N-bit span up to 20 bits
+
+template< typename hashtype >
+double TestDistribution ( std::vector<hashtype> & hashes, bool drawDiagram )
+{
+  printf("Testing distribution - ");
+
+  if(drawDiagram) printf("\n");
+
+  const int hashbits = sizeof(hashtype) * 8;
+
+  int maxwidth = 20;
+
+  // We need at least 5 keys per bin to reliably test distribution biases
+  // down to 1%, so don't bother to test sparser distributions than that
+
+  while(double(hashes.size()) / double(1 << maxwidth) < 5.0)
+  {
+    maxwidth--;
+  }
+
+  std::vector<int> bins;
+  bins.resize(1 << maxwidth);
+
+  double worst = 0;
+  int worstStart = -1;
+  int worstWidth = -1;
+
+  for(int start = 0; start < hashbits; start++)
+  {
+    int width = maxwidth;
+    int bincount = (1 << width);
+
+    memset(&bins[0],0,sizeof(int)*bincount);
+
+    for(size_t j = 0; j < hashes.size(); j++)
+    {
+      hashtype & hash = hashes[j];
+
+      uint32_t index = window(&hash,sizeof(hash),start,width);
+
+      bins[index]++;
+    }
+
+    // Test the distribution, then fold the bins in half,
+    // repeat until we're down to 256 bins
+
+    if(drawDiagram) printf("[");
+
+    while(bincount >= 256)
+    {
+      double n = calcScore(&bins[0],bincount,(int)hashes.size());
+
+      if(drawDiagram) plot(n);
+
+      if(n > worst)
+      {
+        worst = n;
+        worstStart = start;
+        worstWidth = width;
+      }
+
+      width--;
+      bincount /= 2;
+
+      if(width < 8) break;
+
+      for(int i = 0; i < bincount; i++)
+      {
+        bins[i] += bins[i+bincount];
+      }
+    }
+
+    if(drawDiagram) printf("]\n");
+  }
+
+  double pct = worst * 100.0;
+
+  printf("Worst bias is the %3d-bit window at bit %3d - %5.3f%%",worstWidth,worstStart,pct);
+  if(pct >= 1.0) printf(" !!!!! ");
+  printf("\n");
+
+  return worst;
+}
+
+//----------------------------------------------------------------------------
+
+template < typename hashtype >
+bool TestHashList ( std::vector<hashtype> & hashes, std::vector<hashtype> & collisions, bool testDist, bool drawDiagram )
+{
+  bool result = true;
+
+  {
+    size_t count = hashes.size();
+
+    double expected = (double(count) * double(count-1)) / pow(2.0,double(sizeof(hashtype) * 8 + 1));
+
+    printf("Testing collisions   - Expected %8.2f, ",expected);
+
+    double collcount = 0;
+
+    HashSet<hashtype> collisions;
+
+    collcount = FindCollisions(hashes,collisions,1000);
+
+    printf("actual %8.2f (%5.2fx)",collcount, collcount / expected);
+
+    if(sizeof(hashtype) == sizeof(uint32_t))
+    {
+    // 2x expected collisions = fail
+
+    // #TODO - collision failure cutoff needs to be expressed as a standard deviation instead
+    // of a scale factor, otherwise we fail erroneously if there are a small expected number
+    // of collisions
+
+    if(double(collcount) / double(expected) > 2.0)
+    {
+      printf(" !!!!! ");
+      result = false;
+    }
+    }
+    else
+    {
+      // For all hashes larger than 32 bits, _any_ collisions are a failure.
+      
+      if(collcount > 0)
+      {
+        printf(" !!!!! ");
+        result = false;
+      }
+    }
+
+    printf("\n");
+  }
+
+  //----------
+
+  if(testDist)
+  {
+    TestDistribution(hashes,drawDiagram);
+  }
+
+  return result;
+}
+
+//----------
+
+template < typename hashtype >
+bool TestHashList ( std::vector<hashtype> & hashes, bool /*testColl*/, bool testDist, bool drawDiagram )
+{
+  std::vector<hashtype> collisions;
+
+  return TestHashList(hashes,collisions,testDist,drawDiagram);
+}
+
+//-----------------------------------------------------------------------------
+
+template < class keytype, typename hashtype >
+bool TestKeyList ( hashfunc<hashtype> hash, std::vector<keytype> & keys, bool testColl, bool testDist, bool drawDiagram )
+{
+  int keycount = (int)keys.size();
+
+  std::vector<hashtype> hashes;
+
+  hashes.resize(keycount);
+
+  printf("Hashing");
+
+  for(int i = 0; i < keycount; i++)
+  {
+    if(i % (keycount / 10) == 0) printf(".");
+
+    keytype & k = keys[i];
+
+    hash(&k,sizeof(k),0,&hashes[i]);
+  }
+
+  printf("\n");
+
+  bool result = TestHashList(hashes,testColl,testDist,drawDiagram);
+
+  printf("\n");
+
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+// Bytepair test - generate 16-bit indices from all possible non-overlapping
+// 8-bit sections of the hash value, check distribution on all of them.
+
+// This is a very good test for catching weak intercorrelations between bits - 
+// much harder to pass than the normal distribution test. However, it doesn't
+// really model the normal usage of hash functions in hash table lookup, so
+// I'm not sure it's that useful (and hash functions that fail this test but
+// pass the normal distribution test still work well in practice)
+
+template < typename hashtype >
+double TestDistributionBytepairs ( std::vector<hashtype> & hashes, bool drawDiagram )
+{
+  const int nbytes = sizeof(hashtype);
+  const int hashbits = nbytes * 8;
+  
+  const int nbins = 65536;
+
+  std::vector<int> bins(nbins,0);
+
+  double worst = 0;
+
+  for(int a = 0; a < hashbits; a++)
+  {
+    if(drawDiagram) if((a % 8 == 0) && (a > 0)) printf("\n");
+
+    if(drawDiagram) printf("[");
+
+    for(int b = 0; b < hashbits; b++)
+    {
+      if(drawDiagram) if((b % 8 == 0) && (b > 0)) printf(" ");
+
+      bins.clear();
+      bins.resize(nbins,0);
+
+      for(size_t i = 0; i < hashes.size(); i++)
+      {
+        hashtype & hash = hashes[i];
+
+        uint32_t pa = window(&hash,sizeof(hash),a,8);
+        uint32_t pb = window(&hash,sizeof(hash),b,8);
+
+        bins[pa | (pb << 8)]++;
+      }
+
+      double s = calcScore(bins,bins.size(),hashes.size());
+
+      if(drawDiagram) plot(s);
+
+      if(s > worst)
+      {
+        worst = s;
+      }
+    }
+
+    if(drawDiagram) printf("]\n");
+  }
+
+  return worst;
+}
+
+//-----------------------------------------------------------------------------
+// Simplified test - only check 64k distributions, and only on byte boundaries
+
+template < typename hashtype >
+void TestDistributionFast ( std::vector<hashtype> & hashes, double & dworst, double & davg )
+{
+  const int hashbits = sizeof(hashtype) * 8;
+  const int nbins = 65536;
+  
+  std::vector<int> bins(nbins,0);
+
+  dworst = -1.0e90;
+  davg = 0;
+
+  for(int start = 0; start < hashbits; start += 8)
+  {
+    bins.clear();
+    bins.resize(nbins,0);
+
+    for(size_t j = 0; j < hashes.size(); j++)
+    {
+      hashtype & hash = hashes[j];
+
+      uint32_t index = window(&hash,sizeof(hash),start,16);
+
+      bins[index]++;
+    }
+
+    double n = calcScore(&bins.front(),(int)bins.size(),(int)hashes.size());
+    
+    davg += n;
+
+    if(n > dworst) dworst = n;
+  }
+
+  davg /= double(hashbits/8);
+}
+
+//-----------------------------------------------------------------------------
diff --git a/SuperFastHash.cpp b/SuperFastHash.cpp
index 38d030d..1f6d39a 100644
--- a/SuperFastHash.cpp
+++ b/SuperFastHash.cpp
@@ -1,76 +1,76 @@
-#include "Platform.h"

-#include <stdio.h> // for NULL

-

-/* By Paul Hsieh (C) 2004, 2005.  Covered under the Paul Hsieh derivative 

-   license. See: 

-   http://www.azillionmonkeys.com/qed/weblicense.html for license details.

-

-   http://www.azillionmonkeys.com/qed/hash.html */

-

-/*

-#undef get16bits

-#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \

-  || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)

-#define get16bits(d) (*((const uint16_t *) (d)))

-#endif

-

-#if !defined (get16bits)

-#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\

-                       +(uint32_t)(((const uint8_t *)(d))[0]) )

-#endif

-*/

-

-FORCE_INLINE uint16_t get16bits ( const void * p )

-{

-  return *(const uint16_t*)p;

-}

-

-uint32_t SuperFastHash (const signed char * data, int len) {

-uint32_t hash = 0, tmp;

-int rem;

-

-  if (len <= 0 || data == NULL) return 0;

-

-  rem = len & 3;

-  len >>= 2;

-

-  /* Main loop */

-  for (;len > 0; len--) {

-    hash  += get16bits (data);

-    tmp    = (get16bits (data+2) << 11) ^ hash;

-    hash   = (hash << 16) ^ tmp;

-    data  += 2*sizeof (uint16_t);

-    hash  += hash >> 11;

-  }

-

-  /* Handle end cases */

-  switch (rem) {

-    case 3:	hash += get16bits (data);

-        hash ^= hash << 16;

-        hash ^= data[sizeof (uint16_t)] << 18;

-        hash += hash >> 11;

-        break;

-    case 2:	hash += get16bits (data);

-        hash ^= hash << 11;

-        hash += hash >> 17;

-        break;

-    case 1: hash += *data;

-        hash ^= hash << 10;

-        hash += hash >> 1;

-  }

-

-  /* Force "avalanching" of final 127 bits */

-  hash ^= hash << 3;

-  hash += hash >> 5;

-  hash ^= hash << 4;

-  hash += hash >> 17;

-  hash ^= hash << 25;

-  hash += hash >> 6;

-

-  return hash;

-}

-

-void SuperFastHash     ( const void * key, int len, uint32_t /*seed*/, void * out )

-{

-  *(uint32_t*)out = SuperFastHash((const signed char*)key,len);

-}

+#include "Platform.h"
+#include <stdio.h> // for NULL
+
+/* By Paul Hsieh (C) 2004, 2005.  Covered under the Paul Hsieh derivative 
+   license. See: 
+   http://www.azillionmonkeys.com/qed/weblicense.html for license details.
+
+   http://www.azillionmonkeys.com/qed/hash.html */
+
+/*
+#undef get16bits
+#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
+  || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
+#define get16bits(d) (*((const uint16_t *) (d)))
+#endif
+
+#if !defined (get16bits)
+#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
+                       +(uint32_t)(((const uint8_t *)(d))[0]) )
+#endif
+*/
+
+FORCE_INLINE uint16_t get16bits ( const void * p )
+{
+  return *(const uint16_t*)p;
+}
+
+uint32_t SuperFastHash (const signed char * data, int len) {
+uint32_t hash = 0, tmp;
+int rem;
+
+  if (len <= 0 || data == NULL) return 0;
+
+  rem = len & 3;
+  len >>= 2;
+
+  /* Main loop */
+  for (;len > 0; len--) {
+    hash  += get16bits (data);
+    tmp    = (get16bits (data+2) << 11) ^ hash;
+    hash   = (hash << 16) ^ tmp;
+    data  += 2*sizeof (uint16_t);
+    hash  += hash >> 11;
+  }
+
+  /* Handle end cases */
+  switch (rem) {
+    case 3:	hash += get16bits (data);
+        hash ^= hash << 16;
+        hash ^= data[sizeof (uint16_t)] << 18;
+        hash += hash >> 11;
+        break;
+    case 2:	hash += get16bits (data);
+        hash ^= hash << 11;
+        hash += hash >> 17;
+        break;
+    case 1: hash += *data;
+        hash ^= hash << 10;
+        hash += hash >> 1;
+  }
+
+  /* Force "avalanching" of final 127 bits */
+  hash ^= hash << 3;
+  hash += hash >> 5;
+  hash ^= hash << 4;
+  hash += hash >> 17;
+  hash ^= hash << 25;
+  hash += hash >> 6;
+
+  return hash;
+}
+
+void SuperFastHash     ( const void * key, int len, uint32_t /*seed*/, void * out )
+{
+  *(uint32_t*)out = SuperFastHash((const signed char*)key,len);
+}
diff --git a/Types.cpp b/Types.cpp
index f4c9b05..6ad5312 100644
--- a/Types.cpp
+++ b/Types.cpp
@@ -1,148 +1,148 @@
-#include "Types.h"

-

-#include "Random.h"

-

-#include <stdio.h>

-

-uint32_t MurmurOAAT ( const void * blob, int len, uint32_t seed );

-

-//-----------------------------------------------------------------------------

-

-#if defined(_MSC_VER)

-#pragma optimize( "", off )

-#endif

-

-void blackhole ( uint32_t )

-{

-}

-

-uint32_t whitehole ( void )

-{

-  return 0;

-}

-

-#if defined(_MSC_VER)

-#pragma optimize( "", on ) 

-#endif

-

-uint32_t g_verify = 1;

-

-void MixVCode ( const void * blob, int len )

-{

-	g_verify = MurmurOAAT(blob,len,g_verify);

-}

-

-//-----------------------------------------------------------------------------

-

-bool isprime ( uint32_t x )

-{

-  uint32_t p[] = 

-  {

-    2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,

-    103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,

-    199,211,223,227,229,233,239,241,251

-  };

-

-  for(size_t i=0; i < sizeof(p)/sizeof(uint32_t); i++)

-  { 

-    if((x % p[i]) == 0)

-    {

-      return false;

-    }

-  } 

-

-  for(int i = 257; i < 65536; i += 2) 

-  { 

-    if((x % i) == 0)

-    {

-      return false;

-    }

-  }

-

-  return true;

-}

-

-void GenerateMixingConstants ( void )

-{

-  Rand r(8350147);

-

-  int count = 0;

-

-  int trials = 0;

-  int bitfail = 0;

-  int popfail = 0;

-  int matchfail = 0;

-  int primefail = 0;

-

-  //for(uint32_t x = 1; x; x++)

-  while(count < 100)

-  {

-    //if(x % 100000000 == 0) printf(".");

-

-    trials++;

-    uint32_t b = r.rand_u32();

-    //uint32_t b = x;

-

-    //----------

-    // must have between 14 and 18 set bits

-

-    if(popcount(b) < 16) { b = 0; popfail++; }

-    if(popcount(b) > 16) { b = 0; popfail++; }

-

-    if(b == 0) continue;

-

-    //----------

-    // must have 3-5 bits set per 8-bit window

-

-    for(int i = 0; i < 32; i++)

-    {

-      uint32_t c = ROTL32(b,i) & 0xFF;

-

-      if(popcount(c) < 3) { b = 0; bitfail++; break; }

-      if(popcount(c) > 5) { b = 0; bitfail++; break; }

-    }

-

-    if(b == 0) continue;

-

-    //----------

-    // all 8-bit windows must be different

-

-    uint8_t match[256];

-

-    memset(match,0,256);

-

-    for(int i = 0; i < 32; i++)

-    {

-      uint32_t c = ROTL32(b,i) & 0xFF;

-      

-      if(match[c]) { b = 0; matchfail++; break; }

-

-      match[c] = 1;

-    }

-

-    if(b == 0) continue;

-

-    //----------

-    // must be prime

-

-    if(!isprime(b))

-    {

-      b = 0;

-      primefail++;

-    }

-

-    if(b == 0) continue;

-

-    //----------

-

-    if(b)

-    {

-      printf("0x%08x : 0x%08x\n",b,~b);

-      count++;

-    }

-  }

-

-  printf("%d %d %d %d %d %d\n",trials,popfail,bitfail,matchfail,primefail,count);

-}

-

-//-----------------------------------------------------------------------------

+#include "Types.h"
+
+#include "Random.h"
+
+#include <stdio.h>
+
+uint32_t MurmurOAAT ( const void * blob, int len, uint32_t seed );
+
+//-----------------------------------------------------------------------------
+
+#if defined(_MSC_VER)
+#pragma optimize( "", off )
+#endif
+
+void blackhole ( uint32_t )
+{
+}
+
+uint32_t whitehole ( void )
+{
+  return 0;
+}
+
+#if defined(_MSC_VER)
+#pragma optimize( "", on ) 
+#endif
+
+uint32_t g_verify = 1;
+
+void MixVCode ( const void * blob, int len )
+{
+	g_verify = MurmurOAAT(blob,len,g_verify);
+}
+
+//-----------------------------------------------------------------------------
+
+bool isprime ( uint32_t x )
+{
+  uint32_t p[] = 
+  {
+    2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,
+    103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,
+    199,211,223,227,229,233,239,241,251
+  };
+
+  for(size_t i=0; i < sizeof(p)/sizeof(uint32_t); i++)
+  { 
+    if((x % p[i]) == 0)
+    {
+      return false;
+    }
+  } 
+
+  for(int i = 257; i < 65536; i += 2) 
+  { 
+    if((x % i) == 0)
+    {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+void GenerateMixingConstants ( void )
+{
+  Rand r(8350147);
+
+  int count = 0;
+
+  int trials = 0;
+  int bitfail = 0;
+  int popfail = 0;
+  int matchfail = 0;
+  int primefail = 0;
+
+  //for(uint32_t x = 1; x; x++)
+  while(count < 100)
+  {
+    //if(x % 100000000 == 0) printf(".");
+
+    trials++;
+    uint32_t b = r.rand_u32();
+    //uint32_t b = x;
+
+    //----------
+    // must have between 14 and 18 set bits
+
+    if(popcount(b) < 16) { b = 0; popfail++; }
+    if(popcount(b) > 16) { b = 0; popfail++; }
+
+    if(b == 0) continue;
+
+    //----------
+    // must have 3-5 bits set per 8-bit window
+
+    for(int i = 0; i < 32; i++)
+    {
+      uint32_t c = ROTL32(b,i) & 0xFF;
+
+      if(popcount(c) < 3) { b = 0; bitfail++; break; }
+      if(popcount(c) > 5) { b = 0; bitfail++; break; }
+    }
+
+    if(b == 0) continue;
+
+    //----------
+    // all 8-bit windows must be different
+
+    uint8_t match[256];
+
+    memset(match,0,256);
+
+    for(int i = 0; i < 32; i++)
+    {
+      uint32_t c = ROTL32(b,i) & 0xFF;
+      
+      if(match[c]) { b = 0; matchfail++; break; }
+
+      match[c] = 1;
+    }
+
+    if(b == 0) continue;
+
+    //----------
+    // must be prime
+
+    if(!isprime(b))
+    {
+      b = 0;
+      primefail++;
+    }
+
+    if(b == 0) continue;
+
+    //----------
+
+    if(b)
+    {
+      printf("0x%08x : 0x%08x\n",b,~b);
+      count++;
+    }
+  }
+
+  printf("%d %d %d %d %d %d\n",trials,popfail,bitfail,matchfail,primefail,count);
+}
+
+//-----------------------------------------------------------------------------
diff --git a/Types.h b/Types.h
index ee7ae9d..91e7206 100644
--- a/Types.h
+++ b/Types.h
@@ -1,374 +1,374 @@
-#pragma once

-

-#include "Platform.h"

-#include "Bitvec.h"

-

-#include <memory.h>

-#include <vector>

-#include <map>

-#include <set>

-

-//-----------------------------------------------------------------------------

-// If the optimizer detects that a value in a speed test is constant or unused,

-// the optimizer may remove references to it or otherwise create code that

-// would not occur in a real-world application. To prevent the optimizer from

-// doing this we declare two trivial functions that either sink or source data,

-// and bar the compiler from optimizing them.

-

-void     blackhole ( uint32_t x );

-uint32_t whitehole ( void );

-

-//-----------------------------------------------------------------------------

-// We want to verify that every test produces the same result on every platform

-// To do this, we hash the results of every test to produce an overall

-// verification value for the whole test suite. If two runs produce the same

-// verification value, then every test in both run produced the same results

-

-extern uint32_t g_verify;

-

-// Mix the given blob of data into the verification code

-

-void MixVCode ( const void * blob, int len );

-

-

-//-----------------------------------------------------------------------------

-

-typedef void (*pfHash) ( const void * blob, const int len, const uint32_t seed, void * out );

-

-struct ByteVec : public std::vector<uint8_t>

-{

-  ByteVec ( const void * key, int len )

-  {

-    resize(len);

-    memcpy(&front(),key,len);

-  }

-};

-

-template< typename hashtype, typename keytype >

-struct CollisionMap : public std::map< hashtype, std::vector<keytype> >

-{

-};

-

-template< typename hashtype >

-struct HashSet : public std::set<hashtype>

-{

-};

-

-//-----------------------------------------------------------------------------

-

-template < class T >

-class hashfunc

-{

-public:

-

-  hashfunc ( pfHash h ) : m_hash(h)

-  {

-  }

-

-  inline void operator () ( const void * key, const int len, const uint32_t seed, uint32_t * out )

-  {

-    m_hash(key,len,seed,out);

-  }

-

-  inline operator pfHash ( void ) const

-  {

-    return m_hash;

-  }

-

-  inline T operator () ( const void * key, const int len, const uint32_t seed ) 

-  {

-    T result;

-

-    m_hash(key,len,seed,(uint32_t*)&result);

-

-    return result;

-  }

-

-  pfHash m_hash;

-};

-

-//-----------------------------------------------------------------------------

-// Key-processing callback objects. Simplifies keyset testing a bit.

-

-struct KeyCallback

-{

-  KeyCallback() : m_count(0)

-  {

-  }

-

-  virtual ~KeyCallback()

-  {

-  }

-

-  virtual void operator() ( const void * key, int len )

-  {

-    m_count++;

-  }

-

-  virtual void reserve ( int keycount )

-  {

-  };

-

-  int m_count;

-};

-

-//----------

-

-template<typename hashtype>

-struct HashCallback : public KeyCallback

-{

-  typedef std::vector<hashtype> hashvec;

-

-  HashCallback ( pfHash hash, hashvec & hashes ) : m_hashes(hashes), m_pfHash(hash)

-  {

-    m_hashes.clear();

-  }

-

-  virtual void operator () ( const void * key, int len )

-  {

-    size_t newsize = m_hashes.size() + 1;

-    

-    m_hashes.resize(newsize);

-

-    m_pfHash(key,len,0,&m_hashes.back());

-  }

-

-  virtual void reserve ( int keycount )

-  {

-    m_hashes.reserve(keycount);

-  }

-

-  hashvec & m_hashes;

-  pfHash m_pfHash;

-

-  //----------

-

-private:

-

-  HashCallback & operator = ( const HashCallback & );

-};

-

-//----------

-

-template<typename hashtype>

-struct CollisionCallback : public KeyCallback

-{

-  typedef HashSet<hashtype> hashset;

-  typedef CollisionMap<hashtype,ByteVec> collmap;

-

-  CollisionCallback ( pfHash hash, hashset & collisions, collmap & cmap ) 

-  : m_pfHash(hash), 

-    m_collisions(collisions),

-    m_collmap(cmap)

-  {

-  }

-

-  virtual void operator () ( const void * key, int len )

-  {

-    hashtype h;

-

-    m_pfHash(key,len,0,&h);

-    

-    if(m_collisions.count(h))

-    {

-      m_collmap[h].push_back( ByteVec(key,len) );

-    }

-  }

-

-  //----------

-

-  pfHash m_pfHash;

-  hashset & m_collisions;

-  collmap & m_collmap;

-

-private:

-

-  CollisionCallback & operator = ( const CollisionCallback & c );

-};

-

-//-----------------------------------------------------------------------------

-

-template < int _bits >

-class Blob

-{

-public:

-

-  Blob()

-  {

-    for(size_t i = 0; i < sizeof(bytes); i++)

-    {

-      bytes[i] = 0;

-    }

-  }

-

-  Blob ( int x )

-  {

-    for(size_t i = 0; i < sizeof(bytes); i++)

-    {

-      bytes[i] = 0;

-    }

-

-    *(int*)bytes = x;

-  }

-

-  Blob ( const Blob & k )

-  {

-    for(size_t i = 0; i < sizeof(bytes); i++)

-    {

-      bytes[i] = k.bytes[i];

-    }

-  }

-

-  Blob & operator = ( const Blob & k )

-  {

-    for(size_t i = 0; i < sizeof(bytes); i++)

-    {

-      bytes[i] = k.bytes[i];

-    }

-

-    return *this;

-  }

-

-  Blob ( uint64_t a, uint64_t b )

-  {

-    uint64_t t[2] = {a,b};

-    set(&t,16);

-  }

-

-  void set ( const void * blob, size_t len )

-  {

-    const uint8_t * k = (const uint8_t*)blob;

-

-    len = len > sizeof(bytes) ? sizeof(bytes) : len;

-

-    for(size_t i = 0; i < len; i++)

-    {

-      bytes[i] = k[i];

-    }

-

-    for(size_t i = len; i < sizeof(bytes); i++)

-    {

-      bytes[i] = 0;

-    }

-  }

-

-  uint8_t & operator [] ( int i )

-  {

-    return bytes[i];

-  }

-

-  const uint8_t & operator [] ( int i ) const

-  {

-    return bytes[i];

-  }

-

-  //----------

-  // boolean operations

-  

-  bool operator < ( const Blob & k ) const

-  {

-    for(size_t i = 0; i < sizeof(bytes); i++)

-    {

-      if(bytes[i] < k.bytes[i]) return true;

-      if(bytes[i] > k.bytes[i]) return false;

-    }

-

-    return false;

-  }

-

-  bool operator == ( const Blob & k ) const

-  {

-    for(size_t i = 0; i < sizeof(bytes); i++)

-    {

-      if(bytes[i] != k.bytes[i]) return false;

-    }

-

-    return true;

-  }

-

-  bool operator != ( const Blob & k ) const

-  {

-    return !(*this == k);

-  }

-

-  //----------

-  // bitwise operations

-

-  Blob operator ^ ( const Blob & k ) const 

-  {

-    Blob t;

-

-    for(size_t i = 0; i < sizeof(bytes); i++)

-    {

-      t.bytes[i] = bytes[i] ^ k.bytes[i];

-    }

-

-    return t;

-  }

-

-  Blob & operator ^= ( const Blob & k )

-  {

-    for(size_t i = 0; i < sizeof(bytes); i++)

-    {

-      bytes[i] ^= k.bytes[i];

-    }

-

-    return *this;

-  }

-

-  int operator & ( int x )

-  {

-    return (*(int*)bytes) & x;

-  }

-

-  Blob & operator &= ( const Blob & k )

-  {

-    for(size_t i = 0; i < sizeof(bytes); i++)

-    {

-      bytes[i] &= k.bytes[i];

-    }

-  }

-

-  Blob operator << ( int c )

-  {

-    Blob t = *this;

-

-    lshift(&t.bytes[0],sizeof(bytes),c);

-

-    return t;

-  }

-

-  Blob operator >> ( int c )

-  {

-    Blob t = *this;

-

-    rshift(&t.bytes[0],sizeof(bytes),c);

-

-    return t;

-  }

-

-  Blob & operator <<= ( int c )

-  {

-    lshift(&bytes[0],sizeof(bytes),c);

-

-    return *this;

-  }

-

-  Blob & operator >>= ( int c )

-  {

-    rshift(&bytes[0],sizeof(bytes),c);

-

-    return *this;

-  }

-

-  //----------

-  

-private:

-

-  uint8_t bytes[(_bits+7)/8];

-};

-

-typedef Blob<128> uint128_t;

-typedef Blob<256> uint256_t;

-

-//-----------------------------------------------------------------------------

+#pragma once
+
+#include "Platform.h"
+#include "Bitvec.h"
+
+#include <memory.h>
+#include <vector>
+#include <map>
+#include <set>
+
+//-----------------------------------------------------------------------------
+// If the optimizer detects that a value in a speed test is constant or unused,
+// the optimizer may remove references to it or otherwise create code that
+// would not occur in a real-world application. To prevent the optimizer from
+// doing this we declare two trivial functions that either sink or source data,
+// and bar the compiler from optimizing them.
+
+void     blackhole ( uint32_t x );
+uint32_t whitehole ( void );
+
+//-----------------------------------------------------------------------------
+// We want to verify that every test produces the same result on every platform
+// To do this, we hash the results of every test to produce an overall
+// verification value for the whole test suite. If two runs produce the same
+// verification value, then every test in both run produced the same results
+
+extern uint32_t g_verify;
+
+// Mix the given blob of data into the verification code
+
+void MixVCode ( const void * blob, int len );
+
+
+//-----------------------------------------------------------------------------
+
+typedef void (*pfHash) ( const void * blob, const int len, const uint32_t seed, void * out );
+
+struct ByteVec : public std::vector<uint8_t>
+{
+  ByteVec ( const void * key, int len )
+  {
+    resize(len);
+    memcpy(&front(),key,len);
+  }
+};
+
+template< typename hashtype, typename keytype >
+struct CollisionMap : public std::map< hashtype, std::vector<keytype> >
+{
+};
+
+template< typename hashtype >
+struct HashSet : public std::set<hashtype>
+{
+};
+
+//-----------------------------------------------------------------------------
+
+template < class T >
+class hashfunc
+{
+public:
+
+  hashfunc ( pfHash h ) : m_hash(h)
+  {
+  }
+
+  inline void operator () ( const void * key, const int len, const uint32_t seed, uint32_t * out )
+  {
+    m_hash(key,len,seed,out);
+  }
+
+  inline operator pfHash ( void ) const
+  {
+    return m_hash;
+  }
+
+  inline T operator () ( const void * key, const int len, const uint32_t seed ) 
+  {
+    T result;
+
+    m_hash(key,len,seed,(uint32_t*)&result);
+
+    return result;
+  }
+
+  pfHash m_hash;
+};
+
+//-----------------------------------------------------------------------------
+// Key-processing callback objects. Simplifies keyset testing a bit.
+
+struct KeyCallback
+{
+  KeyCallback() : m_count(0)
+  {
+  }
+
+  virtual ~KeyCallback()
+  {
+  }
+
+  virtual void operator() ( const void * key, int len )
+  {
+    m_count++;
+  }
+
+  virtual void reserve ( int keycount )
+  {
+  };
+
+  int m_count;
+};
+
+//----------
+
+template<typename hashtype>
+struct HashCallback : public KeyCallback
+{
+  typedef std::vector<hashtype> hashvec;
+
+  HashCallback ( pfHash hash, hashvec & hashes ) : m_hashes(hashes), m_pfHash(hash)
+  {
+    m_hashes.clear();
+  }
+
+  virtual void operator () ( const void * key, int len )
+  {
+    size_t newsize = m_hashes.size() + 1;
+    
+    m_hashes.resize(newsize);
+
+    m_pfHash(key,len,0,&m_hashes.back());
+  }
+
+  virtual void reserve ( int keycount )
+  {
+    m_hashes.reserve(keycount);
+  }
+
+  hashvec & m_hashes;
+  pfHash m_pfHash;
+
+  //----------
+
+private:
+
+  HashCallback & operator = ( const HashCallback & );
+};
+
+//----------
+
+template<typename hashtype>
+struct CollisionCallback : public KeyCallback
+{
+  typedef HashSet<hashtype> hashset;
+  typedef CollisionMap<hashtype,ByteVec> collmap;
+
+  CollisionCallback ( pfHash hash, hashset & collisions, collmap & cmap ) 
+  : m_pfHash(hash), 
+    m_collisions(collisions),
+    m_collmap(cmap)
+  {
+  }
+
+  virtual void operator () ( const void * key, int len )
+  {
+    hashtype h;
+
+    m_pfHash(key,len,0,&h);
+    
+    if(m_collisions.count(h))
+    {
+      m_collmap[h].push_back( ByteVec(key,len) );
+    }
+  }
+
+  //----------
+
+  pfHash m_pfHash;
+  hashset & m_collisions;
+  collmap & m_collmap;
+
+private:
+
+  CollisionCallback & operator = ( const CollisionCallback & c );
+};
+
+//-----------------------------------------------------------------------------
+
+template < int _bits >
+class Blob
+{
+public:
+
+  Blob()
+  {
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      bytes[i] = 0;
+    }
+  }
+
+  Blob ( int x )
+  {
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      bytes[i] = 0;
+    }
+
+    *(int*)bytes = x;
+  }
+
+  Blob ( const Blob & k )
+  {
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      bytes[i] = k.bytes[i];
+    }
+  }
+
+  Blob & operator = ( const Blob & k )
+  {
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      bytes[i] = k.bytes[i];
+    }
+
+    return *this;
+  }
+
+  Blob ( uint64_t a, uint64_t b )
+  {
+    uint64_t t[2] = {a,b};
+    set(&t,16);
+  }
+
+  void set ( const void * blob, size_t len )
+  {
+    const uint8_t * k = (const uint8_t*)blob;
+
+    len = len > sizeof(bytes) ? sizeof(bytes) : len;
+
+    for(size_t i = 0; i < len; i++)
+    {
+      bytes[i] = k[i];
+    }
+
+    for(size_t i = len; i < sizeof(bytes); i++)
+    {
+      bytes[i] = 0;
+    }
+  }
+
+  uint8_t & operator [] ( int i )
+  {
+    return bytes[i];
+  }
+
+  const uint8_t & operator [] ( int i ) const
+  {
+    return bytes[i];
+  }
+
+  //----------
+  // boolean operations
+  
+  bool operator < ( const Blob & k ) const
+  {
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      if(bytes[i] < k.bytes[i]) return true;
+      if(bytes[i] > k.bytes[i]) return false;
+    }
+
+    return false;
+  }
+
+  bool operator == ( const Blob & k ) const
+  {
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      if(bytes[i] != k.bytes[i]) return false;
+    }
+
+    return true;
+  }
+
+  bool operator != ( const Blob & k ) const
+  {
+    return !(*this == k);
+  }
+
+  //----------
+  // bitwise operations
+
+  Blob operator ^ ( const Blob & k ) const 
+  {
+    Blob t;
+
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      t.bytes[i] = bytes[i] ^ k.bytes[i];
+    }
+
+    return t;
+  }
+
+  Blob & operator ^= ( const Blob & k )
+  {
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      bytes[i] ^= k.bytes[i];
+    }
+
+    return *this;
+  }
+
+  int operator & ( int x )
+  {
+    return (*(int*)bytes) & x;
+  }
+
+  Blob & operator &= ( const Blob & k )
+  {
+    for(size_t i = 0; i < sizeof(bytes); i++)
+    {
+      bytes[i] &= k.bytes[i];
+    }
+  }
+
+  Blob operator << ( int c )
+  {
+    Blob t = *this;
+
+    lshift(&t.bytes[0],sizeof(bytes),c);
+
+    return t;
+  }
+
+  Blob operator >> ( int c )
+  {
+    Blob t = *this;
+
+    rshift(&t.bytes[0],sizeof(bytes),c);
+
+    return t;
+  }
+
+  Blob & operator <<= ( int c )
+  {
+    lshift(&bytes[0],sizeof(bytes),c);
+
+    return *this;
+  }
+
+  Blob & operator >>= ( int c )
+  {
+    rshift(&bytes[0],sizeof(bytes),c);
+
+    return *this;
+  }
+
+  //----------
+  
+private:
+
+  uint8_t bytes[(_bits+7)/8];
+};
+
+typedef Blob<128> uint128_t;
+typedef Blob<256> uint256_t;
+
+//-----------------------------------------------------------------------------
diff --git a/crc.cpp b/crc.cpp
index 76fcfa0..d4d6b84 100644
--- a/crc.cpp
+++ b/crc.cpp
@@ -1,100 +1,100 @@
-#include "Platform.h"

-

-/*

- * This file is derived from crc32.c from the zlib-1.1.3 distribution

- * by Jean-loup Gailly and Mark Adler.

- */

-

-/* crc32.c -- compute the CRC-32 of a data stream

- * Copyright (C) 1995-1998 Mark Adler

- * For conditions of distribution and use, see copyright notice in zlib.h

- */

-

-

-/* ========================================================================

- * Table of CRC-32's of all single-byte values (made by make_crc_table)

- */

-static const uint32_t crc_table[256] = {

-  0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,

-  0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,

-  0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,

-  0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,

-  0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,

-  0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,

-  0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,

-  0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,

-  0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,

-  0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,

-  0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,

-  0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,

-  0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,

-  0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,

-  0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,

-  0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,

-  0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,

-  0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,

-  0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,

-  0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,

-  0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,

-  0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,

-  0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,

-  0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,

-  0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,

-  0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,

-  0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,

-  0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,

-  0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,

-  0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,

-  0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,

-  0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,

-  0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,

-  0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,

-  0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,

-  0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,

-  0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,

-  0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,

-  0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,

-  0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,

-  0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,

-  0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,

-  0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,

-  0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,

-  0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,

-  0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,

-  0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,

-  0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,

-  0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,

-  0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,

-  0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,

-  0x2d02ef8dL

-};

-

-/* ========================================================================= */

-

-#define DO1(buf) crc = crc_table[((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8);

-#define DO2(buf)  DO1(buf); DO1(buf);

-#define DO4(buf)  DO2(buf); DO2(buf);

-#define DO8(buf)  DO4(buf); DO4(buf);

-

-/* ========================================================================= */

-

-void crc32 ( const void * key, int len, uint32_t seed, void * out )

-{

-  uint8_t * buf = (uint8_t*)key;

-  uint32_t crc = seed ^ 0xffffffffL;

-

-  while (len >= 8)

-  {

-    DO8(buf);

-    len -= 8;

-  }

-

-  while(len--)

-  {

-    DO1(buf);

-  } 

-

-  crc ^= 0xffffffffL;

-

-  *(uint32_t*)out = crc;

-}

+#include "Platform.h"
+
+/*
+ * This file is derived from crc32.c from the zlib-1.1.3 distribution
+ * by Jean-loup Gailly and Mark Adler.
+ */
+
+/* crc32.c -- compute the CRC-32 of a data stream
+ * Copyright (C) 1995-1998 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+
+/* ========================================================================
+ * Table of CRC-32's of all single-byte values (made by make_crc_table)
+ */
+static const uint32_t crc_table[256] = {
+  0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
+  0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
+  0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
+  0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
+  0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
+  0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
+  0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
+  0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
+  0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
+  0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
+  0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
+  0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
+  0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
+  0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
+  0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
+  0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
+  0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
+  0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
+  0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
+  0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
+  0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
+  0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
+  0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
+  0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
+  0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
+  0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
+  0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
+  0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
+  0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
+  0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
+  0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
+  0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
+  0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
+  0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
+  0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
+  0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
+  0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
+  0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
+  0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
+  0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
+  0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
+  0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
+  0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
+  0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
+  0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
+  0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
+  0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
+  0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
+  0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
+  0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
+  0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
+  0x2d02ef8dL
+};
+
+/* ========================================================================= */
+
+#define DO1(buf) crc = crc_table[((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8);
+#define DO2(buf)  DO1(buf); DO1(buf);
+#define DO4(buf)  DO2(buf); DO2(buf);
+#define DO8(buf)  DO4(buf); DO4(buf);
+
+/* ========================================================================= */
+
+void crc32 ( const void * key, int len, uint32_t seed, void * out )
+{
+  uint8_t * buf = (uint8_t*)key;
+  uint32_t crc = seed ^ 0xffffffffL;
+
+  while (len >= 8)
+  {
+    DO8(buf);
+    len -= 8;
+  }
+
+  while(len--)
+  {
+    DO1(buf);
+  } 
+
+  crc ^= 0xffffffffL;
+
+  *(uint32_t*)out = crc;
+}
diff --git a/lookup3.cpp b/lookup3.cpp
index 60087f1..63f00f8 100644
--- a/lookup3.cpp
+++ b/lookup3.cpp
@@ -1,72 +1,72 @@
-// lookup3 by Bob Jekins, code is public domain.

-

-#include "Platform.h"

-

-#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))

-

-#define mix(a,b,c) \

-{ \

-  a -= c;  a ^= rot(c, 4);  c += b; \

-  b -= a;  b ^= rot(a, 6);  a += c; \

-  c -= b;  c ^= rot(b, 8);  b += a; \

-  a -= c;  a ^= rot(c,16);  c += b; \

-  b -= a;  b ^= rot(a,19);  a += c; \

-  c -= b;  c ^= rot(b, 4);  b += a; \

-}

-

-#define final(a,b,c) \

-{ \

-  c ^= b; c -= rot(b,14); \

-  a ^= c; a -= rot(c,11); \

-  b ^= a; b -= rot(a,25); \

-  c ^= b; c -= rot(b,16); \

-  a ^= c; a -= rot(c,4);  \

-  b ^= a; b -= rot(a,14); \

-  c ^= b; c -= rot(b,24); \

-}

-

-uint32_t lookup3 ( const void * key, int length, uint32_t initval )

-{

-  uint32_t a,b,c;                                          /* internal state */

-

-  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;

-

-  const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */

-

-  /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */

-  while (length > 12)

-  {

-    a += k[0];

-    b += k[1];

-    c += k[2];

-    mix(a,b,c);

-    length -= 12;

-    k += 3;

-  }

-

-  switch(length)

-  {

-    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;

-    case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;

-    case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;

-    case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;

-    case 8 : b+=k[1]; a+=k[0]; break;

-    case 7 : b+=k[1]&0xffffff; a+=k[0]; break;

-    case 6 : b+=k[1]&0xffff; a+=k[0]; break;

-    case 5 : b+=k[1]&0xff; a+=k[0]; break;

-    case 4 : a+=k[0]; break;

-    case 3 : a+=k[0]&0xffffff; break;

-    case 2 : a+=k[0]&0xffff; break;

-    case 1 : a+=k[0]&0xff; break;

-    case 0 : { return c; }              /* zero length strings require no mixing */

-  }

-

-  final(a,b,c);

-

-  return c;

-}

-

-void lookup3_test ( const void * key, int len, uint32_t seed, void * out )

-{

-  *(uint32_t*)out = lookup3(key,len,seed);

-}

+// lookup3 by Bob Jekins, code is public domain.
+
+#include "Platform.h"
+
+#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
+
+#define mix(a,b,c) \
+{ \
+  a -= c;  a ^= rot(c, 4);  c += b; \
+  b -= a;  b ^= rot(a, 6);  a += c; \
+  c -= b;  c ^= rot(b, 8);  b += a; \
+  a -= c;  a ^= rot(c,16);  c += b; \
+  b -= a;  b ^= rot(a,19);  a += c; \
+  c -= b;  c ^= rot(b, 4);  b += a; \
+}
+
+#define final(a,b,c) \
+{ \
+  c ^= b; c -= rot(b,14); \
+  a ^= c; a -= rot(c,11); \
+  b ^= a; b -= rot(a,25); \
+  c ^= b; c -= rot(b,16); \
+  a ^= c; a -= rot(c,4);  \
+  b ^= a; b -= rot(a,14); \
+  c ^= b; c -= rot(b,24); \
+}
+
+uint32_t lookup3 ( const void * key, int length, uint32_t initval )
+{
+  uint32_t a,b,c;                                          /* internal state */
+
+  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
+
+  const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
+
+  /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
+  while (length > 12)
+  {
+    a += k[0];
+    b += k[1];
+    c += k[2];
+    mix(a,b,c);
+    length -= 12;
+    k += 3;
+  }
+
+  switch(length)
+  {
+    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+    case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
+    case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
+    case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
+    case 8 : b+=k[1]; a+=k[0]; break;
+    case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
+    case 6 : b+=k[1]&0xffff; a+=k[0]; break;
+    case 5 : b+=k[1]&0xff; a+=k[0]; break;
+    case 4 : a+=k[0]; break;
+    case 3 : a+=k[0]&0xffffff; break;
+    case 2 : a+=k[0]&0xffff; break;
+    case 1 : a+=k[0]&0xff; break;
+    case 0 : { return c; }              /* zero length strings require no mixing */
+  }
+
+  final(a,b,c);
+
+  return c;
+}
+
+void lookup3_test ( const void * key, int len, uint32_t seed, void * out )
+{
+  *(uint32_t*)out = lookup3(key,len,seed);
+}
diff --git a/main.cpp b/main.cpp
index bf25ce0..19c605b 100644
--- a/main.cpp
+++ b/main.cpp
@@ -1,590 +1,595 @@
-#include "Platform.h"

-#include "Hashes.h"

-#include "KeysetTest.h"

-#include "SpeedTest.h"

-#include "AvalancheTest.h"

-#include "DifferentialTest.h"

-

-#include <stdio.h>

-#include <time.h>

-

-//-----------------------------------------------------------------------------

-// Configuration. TODO - move these to command-line flags

-

-bool g_testAll = false;

-

-bool g_testSanity      = false;

-bool g_testSpeed       = false;

-bool g_testDiff        = false;

-bool g_testDiffDist    = false;

-bool g_testAvalanche   = false;

-bool g_testBIC         = false;

-bool g_testCyclic      = false;

-bool g_testTwoBytes    = false;

-bool g_testSparse      = false;

-bool g_testPermutation = false;

-bool g_testWindow      = false;

-bool g_testText        = false;

-bool g_testZeroes      = false;

-bool g_testSeed        = false;

-

-//-----------------------------------------------------------------------------

-// This is the list of all hashes that SMHasher can test.

-

-struct HashInfo

-{

-  pfHash hash;

-  int hashbits;

-  uint32_t verification;

-  const char * name;

-  const char * desc;

-};

-

-HashInfo g_hashes[] =

-{

-  { DoNothingHash,        32, 0x00000000, "donothing32", "Do-Nothing function (only valid for measuring call overhead)" },

-  { DoNothingHash,        64, 0x00000000, "donothing64", "Do-Nothing function (only valid for measuring call overhead)" },

-  { DoNothingHash,       128, 0x00000000, "donothing128", "Do-Nothing function (only valid for measuring call overhead)" },

-

-  { crc32,                32, 0x3719DB20, "crc32",       "CRC-32" },

-

-  { md5_32,               32, 0xC10C356B, "md5_32a",     "MD5, first 32 bits of result" },

-  { sha1_32a,             32, 0xF9376EA7, "sha1_32a",    "SHA1, first 32 bits of result" },

-

-  { FNV,                  32, 0xE3CBBE91, "FNV",         "Fowler-Noll-Vo hash, 32-bit" },

-  { lookup3_test,         32, 0x3D83917A, "lookup3",     "Bob Jenkins' lookup3" },

-  { SuperFastHash,        32, 0x980ACD1D, "superfast",   "Paul Hsieh's SuperFastHash" },

-  { MurmurOAAT_test,      32, 0x5363BD98, "MurmurOAAT",  "Murmur one-at-a-time" },

-  { Crap8_test,           32, 0x743E97A1, "Crap8",       "Crap8" },

-

-  { CityHash64_test,      64, 0x45754A6F, "City64",      "Google CityHash128WithSeed" },

-  { CityHash128_test,    128, 0x94B0EF46, "City128",     "Google CityHash128WithSeed" },

-  

-  // MurmurHash2

-

-  { MurmurHash2_test,     32, 0x27864C1E, "Murmur2",     "MurmurHash2 for x86, 32-bit" },

-  { MurmurHash2A_test,    32, 0x7FBD4396, "Murmur2A",    "MurmurHash2A for x86, 32-bit" },

-  { MurmurHash64A_test,   64, 0x1F0D3804, "Murmur2B",    "MurmurHash2 for x64, 64-bit" },

-  { MurmurHash64B_test,   64, 0xDD537C05, "Murmur2C",    "MurmurHash2 for x86, 64-bit" },

-

-  // MurmurHash3

-

-  { MurmurHash3_x86_32,   32, 0xB0F57EE3, "Murmur3A",    "MurmurHash3 for x86, 32-bit" },

-  { MurmurHash3_x86_128, 128, 0xB3ECE62A, "Murmur3C",    "MurmurHash3 for x86, 128-bit" },

-  { MurmurHash3_x64_128, 128, 0x6384BA69, "Murmur3F",    "MurmurHash3 for x64, 128-bit" },

-

-};

-

-HashInfo * findHash ( const char * name )

-{

-  for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)

-  {

-    if(_stricmp(name,g_hashes[i].name) == 0) return &g_hashes[i];

-  }

-

-  return NULL;

-}

-

-//-----------------------------------------------------------------------------

-// Self-test on startup - verify that all installed hashes work correctly.

-

-void SelfTest ( void )

-{

-  bool pass = true;

-

-  for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)

-  {

-    HashInfo * info = & g_hashes[i];

-

-    pass &= VerificationTest(info->hash,info->hashbits,info->verification,false);

-  }

-

-  if(!pass)

-  {

-    printf("Self-test FAILED!\n");

-

-    for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)

-    {

-      HashInfo * info = & g_hashes[i];

-

-      printf("%16s - ",info->name);

-      pass &= VerificationTest(info->hash,info->hashbits,info->verification,true);

-    }

-

-    exit(1);

-  }

-}

-

-//----------------------------------------------------------------------------

-

-template < typename hashtype >

-void test ( hashfunc<hashtype> hash, HashInfo * info )

-{

-  const int hashbits = sizeof(hashtype) * 8;

-

-  printf("-------------------------------------------------------------------------------\n");

-  printf("--- Testing %s (%s)\n\n",info->name,info->desc);

-

-  //-----------------------------------------------------------------------------

-  // Sanity tests

-

-  if(g_testSanity || g_testAll)

-  {

-    printf("[[[ Sanity Tests ]]]\n\n");

-

-    VerificationTest(hash,hashbits,info->verification,true);

-    SanityTest(hash,hashbits);

-    AppendedZeroesTest(hash,hashbits);

-    printf("\n");

-  }

-

-  //-----------------------------------------------------------------------------

-  // Speed tests

-

-  if(g_testSpeed || g_testAll)

-  {

-    printf("[[[ Speed Tests ]]]\n\n");

-

-    BulkSpeedTest(info->hash,info->verification);

-    printf("\n");

-

-    for(int i = 1; i < 32; i++)

-    {

-      double cycles;

-

-      TinySpeedTest(hashfunc<hashtype>(info->hash),sizeof(hashtype),i,info->verification,true,cycles);

-    }

-

-    printf("\n");

-  }

-

-  //-----------------------------------------------------------------------------

-  // Differential tests

-

-  if(g_testDiff || g_testAll)

-  {

-    printf("[[[ Differential Tests ]]]\n\n");

-

-    bool result = true;

-    bool dumpCollisions = false;

-

-    result &= DiffTest< Blob<64>,  hashtype >(hash,5,1000,dumpCollisions);

-    result &= DiffTest< Blob<128>, hashtype >(hash,4,1000,dumpCollisions);

-    result &= DiffTest< Blob<256>, hashtype >(hash,3,1000,dumpCollisions);

-

-    if(!result) printf("*********FAIL*********\n");

-    printf("\n");

-  }

-

-  //-----------------------------------------------------------------------------

-  // Differential-distribution tests

-

-  if(g_testDiffDist /*|| g_testAll*/)

-  {

-    printf("[[[ Differential Distribution Tests ]]]\n\n");

-

-    bool result = true;

-

-    result &= DiffDistTest2<uint64_t,hashtype>(hash);

-

-    printf("\n");

-  }

-

-  //-----------------------------------------------------------------------------

-  // Avalanche tests

-

-  if(g_testAvalanche || g_testAll)

-  {

-    printf("[[[ Avalanche Tests ]]]\n\n");

-

-    bool result = true;

-

-    result &= AvalancheTest< Blob< 32>, hashtype > (hash,300000);

-    result &= AvalancheTest< Blob< 40>, hashtype > (hash,300000);

-    result &= AvalancheTest< Blob< 48>, hashtype > (hash,300000);

-    result &= AvalancheTest< Blob< 56>, hashtype > (hash,300000);

-

-    result &= AvalancheTest< Blob< 64>, hashtype > (hash,300000);

-    result &= AvalancheTest< Blob< 72>, hashtype > (hash,300000);

-    result &= AvalancheTest< Blob< 80>, hashtype > (hash,300000);

-    result &= AvalancheTest< Blob< 88>, hashtype > (hash,300000);

-

-    result &= AvalancheTest< Blob< 96>, hashtype > (hash,300000);

-    result &= AvalancheTest< Blob<104>, hashtype > (hash,300000);

-    result &= AvalancheTest< Blob<112>, hashtype > (hash,300000);

-    result &= AvalancheTest< Blob<120>, hashtype > (hash,300000);

-

-    result &= AvalancheTest< Blob<128>, hashtype > (hash,300000);

-    result &= AvalancheTest< Blob<136>, hashtype > (hash,300000);

-    result &= AvalancheTest< Blob<144>, hashtype > (hash,300000);

-    result &= AvalancheTest< Blob<152>, hashtype > (hash,300000);

-

-    if(!result) printf("*********FAIL*********\n");

-    printf("\n");

-  }

-

-  //-----------------------------------------------------------------------------

-  // Bit Independence Criteria. Interesting, but doesn't tell us much about

-  // collision or distribution.

-

-  if(g_testBIC)

-  {

-    printf("[[[ Bit Independence Criteria ]]]\n\n");

-

-    bool result = true;

-

-    //result &= BicTest<uint64_t,hashtype>(hash,2000000);

-    BicTest3<Blob<88>,hashtype>(hash,2000000);

-

-    if(!result) printf("*********FAIL*********\n");

-    printf("\n");

-  }

-

-  //-----------------------------------------------------------------------------

-  // Keyset 'Cyclic' - keys of the form "abcdabcdabcd..."

-

-  if(g_testCyclic || g_testAll)

-  {

-    printf("[[[ Keyset 'Cyclic' Tests ]]]\n\n");

-

-    bool result = true;

-    bool drawDiagram = false;

-

-    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+0,8,10000000,drawDiagram);

-    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+1,8,10000000,drawDiagram);

-    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+2,8,10000000,drawDiagram);

-    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+3,8,10000000,drawDiagram);

-    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+4,8,10000000,drawDiagram);

-

-    if(!result) printf("*********FAIL*********\n");

-    printf("\n");

-  }

-

-  //-----------------------------------------------------------------------------

-  // Keyset 'TwoBytes' - all keys up to N bytes containing two non-zero bytes

-

-  // This generates some huge keysets, 128-bit tests will take ~1.3 gigs of RAM.

-

-  if(g_testTwoBytes || g_testAll)

-  {

-    printf("[[[ Keyset 'TwoBytes' Tests ]]]\n\n");

-

-    bool result = true;

-    bool drawDiagram = false;

-

-    for(int i = 4; i <= 20; i += 4)

-    {

-      result &= TwoBytesTest2<hashtype>(hash,i,drawDiagram);

-    }

-

-    if(!result) printf("*********FAIL*********\n");

-    printf("\n");

-  }

-

-  //-----------------------------------------------------------------------------

-  // Keyset 'Sparse' - keys with all bits 0 except a few

-

-  if(g_testSparse || g_testAll)

-  {

-    printf("[[[ Keyset 'Sparse' Tests ]]]\n\n");

-

-    bool result = true;

-    bool drawDiagram = false;

-

-    result &= SparseKeyTest<  32,hashtype>(hash,6,true,true,true,drawDiagram);

-    result &= SparseKeyTest<  40,hashtype>(hash,6,true,true,true,drawDiagram);

-    result &= SparseKeyTest<  48,hashtype>(hash,5,true,true,true,drawDiagram);

-    result &= SparseKeyTest<  56,hashtype>(hash,5,true,true,true,drawDiagram);

-    result &= SparseKeyTest<  64,hashtype>(hash,5,true,true,true,drawDiagram);

-    result &= SparseKeyTest<  96,hashtype>(hash,4,true,true,true,drawDiagram);

-    result &= SparseKeyTest< 256,hashtype>(hash,3,true,true,true,drawDiagram);

-    result &= SparseKeyTest<2048,hashtype>(hash,2,true,true,true,drawDiagram);

-

-    if(!result) printf("*********FAIL*********\n");

-    printf("\n");

-  }

-

-  //-----------------------------------------------------------------------------

-  // Keyset 'Permutation' - all possible combinations of a set of blocks

-

-  if(g_testPermutation || g_testAll)

-  {

-    {

-      // This one breaks lookup3, surprisingly

-

-      printf("[[[ Keyset 'Combination Lowbits' Tests ]]]\n\n");

-

-      bool result = true;

-      bool drawDiagram = false;

-

-      uint32_t blocks[] =

-      {

-        0x00000000,

-

-        0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,

-      };

-

-      result &= CombinationKeyTest<hashtype>(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);

-

-      if(!result) printf("*********FAIL*********\n");

-      printf("\n");

-    }

-

-    {

-      printf("[[[ Keyset 'Combination Highbits' Tests ]]]\n\n");

-

-      bool result = true;

-      bool drawDiagram = false;

-

-      uint32_t blocks[] =

-      {

-        0x00000000,

-

-        0x20000000, 0x40000000, 0x60000000, 0x80000000, 0xA0000000, 0xC0000000, 0xE0000000

-      };

-

-      result &= CombinationKeyTest<hashtype>(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);

-

-      if(!result) printf("*********FAIL*********\n");

-      printf("\n");

-    }

-

-    {

-      printf("[[[ Keyset 'Combination 0x8000000' Tests ]]]\n\n");

-

-      bool result = true;

-      bool drawDiagram = false;

-

-      uint32_t blocks[] =

-      {

-        0x00000000,

-

-        0x80000000,

-      };

-

-      result &= CombinationKeyTest<hashtype>(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);

-

-      if(!result) printf("*********FAIL*********\n");

-      printf("\n");

-    }

-

-    {

-      printf("[[[ Keyset 'Combination 0x0000001' Tests ]]]\n\n");

-

-      bool result = true;

-      bool drawDiagram = false;

-

-      uint32_t blocks[] =

-      {

-        0x00000000,

-

-        0x00000001,

-      };

-

-      result &= CombinationKeyTest<hashtype>(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);

-

-      if(!result) printf("*********FAIL*********\n");

-      printf("\n");

-    }

-

-    {

-      printf("[[[ Keyset 'Combination Hi-Lo' Tests ]]]\n\n");

-

-      bool result = true;

-      bool drawDiagram = false;

-

-      uint32_t blocks[] =

-      {

-        0x00000000,

-

-        0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,

-

-        0x80000000, 0x40000000, 0xC0000000, 0x20000000, 0xA0000000, 0x60000000, 0xE0000000

-      };

-

-      result &= CombinationKeyTest<hashtype>(hash,6,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);

-

-      if(!result) printf("*********FAIL*********\n");

-      printf("\n");

-    }

-  }

-

-  //-----------------------------------------------------------------------------

-  // Keyset 'Window'

-

-  // Skip distribution test for these - they're too easy to distribute well,

-  // and it generates a _lot_ of testing

-

-  if(g_testWindow || g_testAll)

-  {

-    printf("[[[ Keyset 'Window' Tests ]]]\n\n");

-

-    bool result = true;

-    bool testCollision = true;

-    bool testDistribution = false;

-    bool drawDiagram = false;

-

-    result &= WindowedKeyTest< Blob<hashbits*2>, hashtype > ( hash, 20, testCollision, testDistribution, drawDiagram );

-

-    if(!result) printf("*********FAIL*********\n");

-    printf("\n");

-  }

-

-  //-----------------------------------------------------------------------------

-  // Keyset 'Text'

-

-  if(g_testText || g_testAll)

-  {

-    printf("[[[ Keyset 'Text' Tests ]]]\n\n");

-

-    bool result = true;

-    bool drawDiagram = false;

-

-    const char * alnum = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";

-

-    result &= TextKeyTest( hash, "Foo",    alnum,4, "Bar",    drawDiagram );

-    result &= TextKeyTest( hash, "FooBar", alnum,4, "",       drawDiagram );

-    result &= TextKeyTest( hash, "",       alnum,4, "FooBar", drawDiagram );

-

-    if(!result) printf("*********FAIL*********\n");

-    printf("\n");

-  }

-

-  //-----------------------------------------------------------------------------

-  // Keyset 'Zeroes'

-

-  if(g_testZeroes || g_testAll)

-  {

-    printf("[[[ Keyset 'Zeroes' Tests ]]]\n\n");

-

-    bool result = true;

-    bool drawDiagram = false;

-

-    result &= ZeroKeyTest<hashtype>( hash, drawDiagram );

-

-    if(!result) printf("*********FAIL*********\n");

-    printf("\n");

-  }

-

-  //-----------------------------------------------------------------------------

-  // Keyset 'Seed'

-

-  if(g_testSeed || g_testAll)

-  {

-    printf("[[[ Keyset 'Seed' Tests ]]]\n\n");

-

-    bool result = true;

-    bool drawDiagram = false;

-

-    result &= SeedTest<hashtype>( hash, 1000000, drawDiagram );

-

-    if(!result) printf("*********FAIL*********\n");

-    printf("\n");

-  }

-}

-

-//-----------------------------------------------------------------------------

-

-uint32_t g_inputVCode = 1;

-uint32_t g_outputVCode = 1;

-uint32_t g_resultVCode = 1;

-

-HashInfo * g_hashUnderTest = NULL;

-

-void VerifyHash ( const void * key, int len, uint32_t seed, void * out )

-{

-  g_inputVCode = MurmurOAAT(key,len,g_inputVCode);

-  g_inputVCode = MurmurOAAT(&seed,sizeof(uint32_t),g_inputVCode);

-

-  g_hashUnderTest->hash(key,len,seed,out);

-

-  g_outputVCode = MurmurOAAT(out,g_hashUnderTest->hashbits/8,g_outputVCode);

-}

-

-//-----------------------------------------------------------------------------

-

-void testHash ( const char * name )

-{

-  HashInfo * pInfo = findHash(name);

-

-  if(pInfo == NULL)

-  {

-    printf("Invalid hash '%s' specified\n",name);

-    return;

-  }

-  else

-  {

-    g_hashUnderTest = pInfo;

-

-    if(pInfo->hashbits == 32)

-    {

-      test<uint32_t>( VerifyHash, pInfo );

-    }

-    else if(pInfo->hashbits == 64)

-    {

-      test<uint64_t>( pInfo->hash, pInfo );

-    }

-    else if(pInfo->hashbits == 128)

-    {

-      test<uint128_t>( pInfo->hash, pInfo );

-    }

-    else if(pInfo->hashbits == 256)

-    {

-      test<uint256_t>( pInfo->hash, pInfo );

-    }

-    else

-    {

-      printf("Invalid hash bit width %d for hash '%s'",pInfo->hashbits,pInfo->name);

-    }

-  }

-}

-//-----------------------------------------------------------------------------

-

-int main ( int argc, char ** argv )

-{

-  const char * hashToTest = "murmur3a";

-

-  if(argc < 2)

-  {

-    printf("(No test hash given on command line, testing Murmur3_x86_32.)\n");

-  }

-  else

-  {

-    hashToTest = argv[1];

-  }

-

-  // Code runs on the 3rd CPU by default

-

-  SetAffinity((1 << 2));

-

-  SelfTest();

-

-  int timeBegin = clock();

-

-  g_testAll = true;

-

-  //g_testSanity = true;

-  //g_testSpeed = true;

-  //g_testAvalanche = true;

-  //g_testBIC = true;

-  //g_testCyclic = true;

-  //g_testTwoBytes = true;

-  //g_testDiff = true;

-  //g_testDiffDist = true;

-  //g_testSparse = true;

-  //g_testPermutation = true;

-  //g_testWindow = true;

-  //g_testZeroes = true;

-

-  testHash(hashToTest);

-

-  //----------

-

-  int timeEnd = clock();

-

-  printf("\n");

-  printf("Input vcode 0x%08x, Output vcode 0x%08x, Result vcode 0x%08x\n",g_inputVCode,g_outputVCode,g_resultVCode);

-  printf("Verification value is 0x%08x - Testing took %f seconds\n",g_verify,double(timeEnd-timeBegin)/double(CLOCKS_PER_SEC));

-  printf("-------------------------------------------------------------------------------\n");

-  return 0;

-}

+#include "Platform.h"
+#include "Hashes.h"
+#include "KeysetTest.h"
+#include "SpeedTest.h"
+#include "AvalancheTest.h"
+#include "DifferentialTest.h"
+
+#include <stdio.h>
+#include <time.h>
+
+//-----------------------------------------------------------------------------
+// Configuration. TODO - move these to command-line flags
+
+bool g_testAll = false;
+
+bool g_testSanity      = false;
+bool g_testSpeed       = false;
+bool g_testDiff        = false;
+bool g_testDiffDist    = false;
+bool g_testAvalanche   = false;
+bool g_testBIC         = false;
+bool g_testCyclic      = false;
+bool g_testTwoBytes    = false;
+bool g_testSparse      = false;
+bool g_testPermutation = false;
+bool g_testWindow      = false;
+bool g_testText        = false;
+bool g_testZeroes      = false;
+bool g_testSeed        = false;
+
+//-----------------------------------------------------------------------------
+// This is the list of all hashes that SMHasher can test.
+
+struct HashInfo
+{
+  pfHash hash;
+  int hashbits;
+  uint32_t verification;
+  const char * name;
+  const char * desc;
+};
+
+HashInfo g_hashes[] =
+{
+  { DoNothingHash,        32, 0x00000000, "donothing32", "Do-Nothing function (only valid for measuring call overhead)" },
+  { DoNothingHash,        64, 0x00000000, "donothing64", "Do-Nothing function (only valid for measuring call overhead)" },
+  { DoNothingHash,       128, 0x00000000, "donothing128", "Do-Nothing function (only valid for measuring call overhead)" },
+
+  { crc32,                32, 0x3719DB20, "crc32",       "CRC-32" },
+
+  { md5_32,               32, 0xC10C356B, "md5_32a",     "MD5, first 32 bits of result" },
+  { sha1_32a,             32, 0xF9376EA7, "sha1_32a",    "SHA1, first 32 bits of result" },
+
+  { FNV,                  32, 0xE3CBBE91, "FNV",         "Fowler-Noll-Vo hash, 32-bit" },
+  { Bernstein,            32, 0xBDB4B640, "bernstein",   "Bernstein, 32-bit" },
+  { lookup3_test,         32, 0x3D83917A, "lookup3",     "Bob Jenkins' lookup3" },
+  { SuperFastHash,        32, 0x980ACD1D, "superfast",   "Paul Hsieh's SuperFastHash" },
+  { MurmurOAAT_test,      32, 0x5363BD98, "MurmurOAAT",  "Murmur one-at-a-time" },
+  { Crap8_test,           32, 0x743E97A1, "Crap8",       "Crap8" },
+
+  { CityHash64_test,      64, 0x25A20825, "City64",      "Google CityHash64WithSeed" },
+  { CityHash128_test,    128, 0x6531F54E, "City128",     "Google CityHash128WithSeed" },
+
+  { SpookyHash64_test,    32, 0x3F798BBB, "Spooky32",    "Bob Jenkins' SpookyHash, 32-bit result" },
+  { SpookyHash64_test,    64, 0xA7F955F1, "Spooky64",    "Bob Jenkins' SpookyHash, 64-bit result" },
+  { SpookyHash128_test,  128, 0x8D263080, "Spooky128",   "Bob Jenkins' SpookyHash, 128-bit result" },
+
+  // MurmurHash2
+
+  { MurmurHash2_test,     32, 0x27864C1E, "Murmur2",     "MurmurHash2 for x86, 32-bit" },
+  { MurmurHash2A_test,    32, 0x7FBD4396, "Murmur2A",    "MurmurHash2A for x86, 32-bit" },
+  { MurmurHash64A_test,   64, 0x1F0D3804, "Murmur2B",    "MurmurHash2 for x64, 64-bit" },
+  { MurmurHash64B_test,   64, 0xDD537C05, "Murmur2C",    "MurmurHash2 for x86, 64-bit" },
+
+  // MurmurHash3
+
+  { MurmurHash3_x86_32,   32, 0xB0F57EE3, "Murmur3A",    "MurmurHash3 for x86, 32-bit" },
+  { MurmurHash3_x86_128, 128, 0xB3ECE62A, "Murmur3C",    "MurmurHash3 for x86, 128-bit" },
+  { MurmurHash3_x64_128, 128, 0x6384BA69, "Murmur3F",    "MurmurHash3 for x64, 128-bit" },
+
+};
+
+HashInfo * findHash ( const char * name )
+{
+  for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
+  {
+    if(_stricmp(name,g_hashes[i].name) == 0) return &g_hashes[i];
+  }
+
+  return NULL;
+}
+
+//-----------------------------------------------------------------------------
+// Self-test on startup - verify that all installed hashes work correctly.
+
+void SelfTest ( void )
+{
+  bool pass = true;
+
+  for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
+  {
+    HashInfo * info = & g_hashes[i];
+
+    pass &= VerificationTest(info->hash,info->hashbits,info->verification,false);
+  }
+
+  if(!pass)
+  {
+    printf("Self-test FAILED!\n");
+
+    for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
+    {
+      HashInfo * info = & g_hashes[i];
+
+      printf("%16s - ",info->name);
+      pass &= VerificationTest(info->hash,info->hashbits,info->verification,true);
+    }
+
+    exit(1);
+  }
+}
+
+//----------------------------------------------------------------------------
+
+template < typename hashtype >
+void test ( hashfunc<hashtype> hash, HashInfo * info )
+{
+  const int hashbits = sizeof(hashtype) * 8;
+
+  printf("-------------------------------------------------------------------------------\n");
+  printf("--- Testing %s (%s)\n\n",info->name,info->desc);
+
+  //-----------------------------------------------------------------------------
+  // Sanity tests
+
+  if(g_testSanity || g_testAll)
+  {
+    printf("[[[ Sanity Tests ]]]\n\n");
+
+    VerificationTest(hash,hashbits,info->verification,true);
+    SanityTest(hash,hashbits);
+    AppendedZeroesTest(hash,hashbits);
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Speed tests
+
+  if(g_testSpeed || g_testAll)
+  {
+    printf("[[[ Speed Tests ]]]\n\n");
+
+    BulkSpeedTest(info->hash,info->verification);
+    printf("\n");
+
+    for(int i = 1; i < 32; i++)
+    {
+      double cycles;
+
+      TinySpeedTest(hashfunc<hashtype>(info->hash),sizeof(hashtype),i,info->verification,true,cycles);
+    }
+
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Differential tests
+
+  if(g_testDiff || g_testAll)
+  {
+    printf("[[[ Differential Tests ]]]\n\n");
+
+    bool result = true;
+    bool dumpCollisions = false;
+
+    result &= DiffTest< Blob<64>,  hashtype >(hash,5,1000,dumpCollisions);
+    result &= DiffTest< Blob<128>, hashtype >(hash,4,1000,dumpCollisions);
+    result &= DiffTest< Blob<256>, hashtype >(hash,3,1000,dumpCollisions);
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Differential-distribution tests
+
+  if(g_testDiffDist /*|| g_testAll*/)
+  {
+    printf("[[[ Differential Distribution Tests ]]]\n\n");
+
+    bool result = true;
+
+    result &= DiffDistTest2<uint64_t,hashtype>(hash);
+
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Avalanche tests
+
+  if(g_testAvalanche || g_testAll)
+  {
+    printf("[[[ Avalanche Tests ]]]\n\n");
+
+    bool result = true;
+
+    result &= AvalancheTest< Blob< 32>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob< 40>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob< 48>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob< 56>, hashtype > (hash,300000);
+
+    result &= AvalancheTest< Blob< 64>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob< 72>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob< 80>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob< 88>, hashtype > (hash,300000);
+
+    result &= AvalancheTest< Blob< 96>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob<104>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob<112>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob<120>, hashtype > (hash,300000);
+
+    result &= AvalancheTest< Blob<128>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob<136>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob<144>, hashtype > (hash,300000);
+    result &= AvalancheTest< Blob<152>, hashtype > (hash,300000);
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Bit Independence Criteria. Interesting, but doesn't tell us much about
+  // collision or distribution.
+
+  if(g_testBIC)
+  {
+    printf("[[[ Bit Independence Criteria ]]]\n\n");
+
+    bool result = true;
+
+    //result &= BicTest<uint64_t,hashtype>(hash,2000000);
+    BicTest3<Blob<88>,hashtype>(hash,2000000);
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'Cyclic' - keys of the form "abcdabcdabcd..."
+
+  if(g_testCyclic || g_testAll)
+  {
+    printf("[[[ Keyset 'Cyclic' Tests ]]]\n\n");
+
+    bool result = true;
+    bool drawDiagram = false;
+
+    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+0,8,10000000,drawDiagram);
+    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+1,8,10000000,drawDiagram);
+    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+2,8,10000000,drawDiagram);
+    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+3,8,10000000,drawDiagram);
+    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+4,8,10000000,drawDiagram);
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'TwoBytes' - all keys up to N bytes containing two non-zero bytes
+
+  // This generates some huge keysets, 128-bit tests will take ~1.3 gigs of RAM.
+
+  if(g_testTwoBytes || g_testAll)
+  {
+    printf("[[[ Keyset 'TwoBytes' Tests ]]]\n\n");
+
+    bool result = true;
+    bool drawDiagram = false;
+
+    for(int i = 4; i <= 20; i += 4)
+    {
+      result &= TwoBytesTest2<hashtype>(hash,i,drawDiagram);
+    }
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'Sparse' - keys with all bits 0 except a few
+
+  if(g_testSparse || g_testAll)
+  {
+    printf("[[[ Keyset 'Sparse' Tests ]]]\n\n");
+
+    bool result = true;
+    bool drawDiagram = false;
+
+    result &= SparseKeyTest<  32,hashtype>(hash,6,true,true,true,drawDiagram);
+    result &= SparseKeyTest<  40,hashtype>(hash,6,true,true,true,drawDiagram);
+    result &= SparseKeyTest<  48,hashtype>(hash,5,true,true,true,drawDiagram);
+    result &= SparseKeyTest<  56,hashtype>(hash,5,true,true,true,drawDiagram);
+    result &= SparseKeyTest<  64,hashtype>(hash,5,true,true,true,drawDiagram);
+    result &= SparseKeyTest<  96,hashtype>(hash,4,true,true,true,drawDiagram);
+    result &= SparseKeyTest< 256,hashtype>(hash,3,true,true,true,drawDiagram);
+    result &= SparseKeyTest<2048,hashtype>(hash,2,true,true,true,drawDiagram);
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'Permutation' - all possible combinations of a set of blocks
+
+  if(g_testPermutation || g_testAll)
+  {
+    {
+      // This one breaks lookup3, surprisingly
+
+      printf("[[[ Keyset 'Combination Lowbits' Tests ]]]\n\n");
+
+      bool result = true;
+      bool drawDiagram = false;
+
+      uint32_t blocks[] =
+      {
+        0x00000000,
+
+        0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,
+      };
+
+      result &= CombinationKeyTest<hashtype>(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+      if(!result) printf("*********FAIL*********\n");
+      printf("\n");
+    }
+
+    {
+      printf("[[[ Keyset 'Combination Highbits' Tests ]]]\n\n");
+
+      bool result = true;
+      bool drawDiagram = false;
+
+      uint32_t blocks[] =
+      {
+        0x00000000,
+
+        0x20000000, 0x40000000, 0x60000000, 0x80000000, 0xA0000000, 0xC0000000, 0xE0000000
+      };
+
+      result &= CombinationKeyTest<hashtype>(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+      if(!result) printf("*********FAIL*********\n");
+      printf("\n");
+    }
+
+    {
+      printf("[[[ Keyset 'Combination 0x8000000' Tests ]]]\n\n");
+
+      bool result = true;
+      bool drawDiagram = false;
+
+      uint32_t blocks[] =
+      {
+        0x00000000,
+
+        0x80000000,
+      };
+
+      result &= CombinationKeyTest<hashtype>(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+      if(!result) printf("*********FAIL*********\n");
+      printf("\n");
+    }
+
+    {
+      printf("[[[ Keyset 'Combination 0x0000001' Tests ]]]\n\n");
+
+      bool result = true;
+      bool drawDiagram = false;
+
+      uint32_t blocks[] =
+      {
+        0x00000000,
+
+        0x00000001,
+      };
+
+      result &= CombinationKeyTest<hashtype>(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+      if(!result) printf("*********FAIL*********\n");
+      printf("\n");
+    }
+
+    {
+      printf("[[[ Keyset 'Combination Hi-Lo' Tests ]]]\n\n");
+
+      bool result = true;
+      bool drawDiagram = false;
+
+      uint32_t blocks[] =
+      {
+        0x00000000,
+
+        0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,
+
+        0x80000000, 0x40000000, 0xC0000000, 0x20000000, 0xA0000000, 0x60000000, 0xE0000000
+      };
+
+      result &= CombinationKeyTest<hashtype>(hash,6,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+      if(!result) printf("*********FAIL*********\n");
+      printf("\n");
+    }
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'Window'
+
+  // Skip distribution test for these - they're too easy to distribute well,
+  // and it generates a _lot_ of testing
+
+  if(g_testWindow || g_testAll)
+  {
+    printf("[[[ Keyset 'Window' Tests ]]]\n\n");
+
+    bool result = true;
+    bool testCollision = true;
+    bool testDistribution = false;
+    bool drawDiagram = false;
+
+    result &= WindowedKeyTest< Blob<hashbits*2>, hashtype > ( hash, 20, testCollision, testDistribution, drawDiagram );
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'Text'
+
+  if(g_testText || g_testAll)
+  {
+    printf("[[[ Keyset 'Text' Tests ]]]\n\n");
+
+    bool result = true;
+    bool drawDiagram = false;
+
+    const char * alnum = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
+
+    result &= TextKeyTest( hash, "Foo",    alnum,4, "Bar",    drawDiagram );
+    result &= TextKeyTest( hash, "FooBar", alnum,4, "",       drawDiagram );
+    result &= TextKeyTest( hash, "",       alnum,4, "FooBar", drawDiagram );
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'Zeroes'
+
+  if(g_testZeroes || g_testAll)
+  {
+    printf("[[[ Keyset 'Zeroes' Tests ]]]\n\n");
+
+    bool result = true;
+    bool drawDiagram = false;
+
+    result &= ZeroKeyTest<hashtype>( hash, drawDiagram );
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+
+  //-----------------------------------------------------------------------------
+  // Keyset 'Seed'
+
+  if(g_testSeed || g_testAll)
+  {
+    printf("[[[ Keyset 'Seed' Tests ]]]\n\n");
+
+    bool result = true;
+    bool drawDiagram = false;
+
+    result &= SeedTest<hashtype>( hash, 1000000, drawDiagram );
+
+    if(!result) printf("*********FAIL*********\n");
+    printf("\n");
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+uint32_t g_inputVCode = 1;
+uint32_t g_outputVCode = 1;
+uint32_t g_resultVCode = 1;
+
+HashInfo * g_hashUnderTest = NULL;
+
+void VerifyHash ( const void * key, int len, uint32_t seed, void * out )
+{
+  g_inputVCode = MurmurOAAT(key,len,g_inputVCode);
+  g_inputVCode = MurmurOAAT(&seed,sizeof(uint32_t),g_inputVCode);
+
+  g_hashUnderTest->hash(key,len,seed,out);
+
+  g_outputVCode = MurmurOAAT(out,g_hashUnderTest->hashbits/8,g_outputVCode);
+}
+
+//-----------------------------------------------------------------------------
+
+void testHash ( const char * name )
+{
+  HashInfo * pInfo = findHash(name);
+
+  if(pInfo == NULL)
+  {
+    printf("Invalid hash '%s' specified\n",name);
+    return;
+  }
+  else
+  {
+    g_hashUnderTest = pInfo;
+
+    if(pInfo->hashbits == 32)
+    {
+      test<uint32_t>( VerifyHash, pInfo );
+    }
+    else if(pInfo->hashbits == 64)
+    {
+      test<uint64_t>( pInfo->hash, pInfo );
+    }
+    else if(pInfo->hashbits == 128)
+    {
+      test<uint128_t>( pInfo->hash, pInfo );
+    }
+    else if(pInfo->hashbits == 256)
+    {
+      test<uint256_t>( pInfo->hash, pInfo );
+    }
+    else
+    {
+      printf("Invalid hash bit width %d for hash '%s'",pInfo->hashbits,pInfo->name);
+    }
+  }
+}
+//-----------------------------------------------------------------------------
+
+int main ( int argc, char ** argv )
+{
+  const char * hashToTest = "murmur3a";
+
+  if(argc < 2)
+  {
+    printf("(No test hash given on command line, testing Murmur3_x86_32.)\n");
+  }
+  else
+  {
+    hashToTest = argv[1];
+  }
+
+  // Code runs on the 3rd CPU by default
+
+  SetAffinity((1 << 2));
+
+  SelfTest();
+
+  int timeBegin = clock();
+
+  g_testAll = true;
+
+  //g_testSanity = true;
+  //g_testSpeed = true;
+  //g_testAvalanche = true;
+  //g_testBIC = true;
+  //g_testCyclic = true;
+  //g_testTwoBytes = true;
+  //g_testDiff = true;
+  //g_testDiffDist = true;
+  //g_testSparse = true;
+  //g_testPermutation = true;
+  //g_testWindow = true;
+  //g_testZeroes = true;
+
+  testHash(hashToTest);
+
+  //----------
+
+  int timeEnd = clock();
+
+  printf("\n");
+  printf("Input vcode 0x%08x, Output vcode 0x%08x, Result vcode 0x%08x\n",g_inputVCode,g_outputVCode,g_resultVCode);
+  printf("Verification value is 0x%08x - Testing took %f seconds\n",g_verify,double(timeEnd-timeBegin)/double(CLOCKS_PER_SEC));
+  printf("-------------------------------------------------------------------------------\n");
+  return 0;
+}
diff --git a/md5.cpp b/md5.cpp
index 43b870a..8e50c79 100644
--- a/md5.cpp
+++ b/md5.cpp
@@ -1,382 +1,382 @@
-#include <memory.h>

-#include "Types.h"

-

-// "Derived from the RSA Data Security, Inc. MD5 Message Digest Algorithm"

-

-/**

- * \brief          MD5 context structure

- */

-typedef struct

-{

-    unsigned long total[2];     /*!< number of bytes processed  */

-    unsigned long state[4];     /*!< intermediate digest state  */

-    unsigned char buffer[64];   /*!< data block being processed */

-

-    unsigned char ipad[64];     /*!< HMAC: inner padding        */

-    unsigned char opad[64];     /*!< HMAC: outer padding        */

-}

-md5_context;

-

-/**

- * \brief          MD5 context setup

- *

- * \param ctx      context to be initialized

- */

-void md5_starts( md5_context *ctx );

-

-/**

- * \brief          MD5 process buffer

- *

- * \param ctx      MD5 context

- * \param input    buffer holding the  data

- * \param ilen     length of the input data

- */

-void md5_update( md5_context *ctx, unsigned char *input, int ilen );

-

-/**

- * \brief          MD5 final digest

- *

- * \param ctx      MD5 context

- * \param output   MD5 checksum result

- */

-void md5_finish( md5_context *ctx, unsigned char output[16] );

-

-/**

- * \brief          Output = MD5( input buffer )

- *

- * \param input    buffer holding the  data

- * \param ilen     length of the input data

- * \param output   MD5 checksum result

- */

-void md5( unsigned char *input, int ilen, unsigned char output[16] );

-

-/**

- * \brief          Output = MD5( file contents )

- *

- * \param path     input file name

- * \param output   MD5 checksum result

- *

- * \return         0 if successful, 1 if fopen failed,

- *                 or 2 if fread failed

- */

-int md5_file( char *path, unsigned char output[16] );

-

-/**

- * \brief          MD5 HMAC context setup

- *

- * \param ctx      HMAC context to be initialized

- * \param key      HMAC secret key

- * \param keylen   length of the HMAC key

- */

-void md5_hmac_starts( md5_context *ctx, unsigned char *key, int keylen );

-

-/**

- * \brief          MD5 HMAC process buffer

- *

- * \param ctx      HMAC context

- * \param input    buffer holding the  data

- * \param ilen     length of the input data

- */

-void md5_hmac_update( md5_context *ctx, unsigned char *input, int ilen );

-

-/**

- * \brief          MD5 HMAC final digest

- *

- * \param ctx      HMAC context

- * \param output   MD5 HMAC checksum result

- */

-void md5_hmac_finish( md5_context *ctx, unsigned char output[16] );

-

-/**

- * \brief          Output = HMAC-MD5( hmac key, input buffer )

- *

- * \param key      HMAC secret key

- * \param keylen   length of the HMAC key

- * \param input    buffer holding the  data

- * \param ilen     length of the input data

- * \param output   HMAC-MD5 result

- */

-void md5_hmac( unsigned char *key, int keylen,

-               unsigned char *input, int ilen,

-               unsigned char output[16] );

-

-/**

- * \brief          Checkup routine

- *

- * \return         0 if successful, or 1 if the test failed

- */

-int md5_self_test( int verbose );

-

-/*

- * 32-bit integer manipulation macros (little endian)

- */

-#ifndef GET_ULONG_LE

-#define GET_ULONG_LE(n,b,i)                             \

-{                                                       \

-    (n) = ( (unsigned long) (b)[(i)    ]       )        \

-        | ( (unsigned long) (b)[(i) + 1] <<  8 )        \

-        | ( (unsigned long) (b)[(i) + 2] << 16 )        \

-        | ( (unsigned long) (b)[(i) + 3] << 24 );       \

-}

-#endif

-

-#ifndef PUT_ULONG_LE

-#define PUT_ULONG_LE(n,b,i)                             \

-{                                                       \

-    (b)[(i)    ] = (unsigned char) ( (n)       );       \

-    (b)[(i) + 1] = (unsigned char) ( (n) >>  8 );       \

-    (b)[(i) + 2] = (unsigned char) ( (n) >> 16 );       \

-    (b)[(i) + 3] = (unsigned char) ( (n) >> 24 );       \

-}

-#endif

-

-/*

- * MD5 context setup

- */

-void md5_starts( md5_context *ctx )

-{

-    ctx->total[0] = 0;

-    ctx->total[1] = 0;

-

-    ctx->state[0] = 0x67452301;

-    ctx->state[1] = 0xEFCDAB89;

-    ctx->state[2] = 0x98BADCFE;

-    ctx->state[3] = 0x10325476;

-}

-

-static void md5_process( md5_context *ctx, unsigned char data[64] )

-{

-    unsigned long X[16], A, B, C, D;

-

-    GET_ULONG_LE( X[ 0], data,  0 );

-    GET_ULONG_LE( X[ 1], data,  4 );

-    GET_ULONG_LE( X[ 2], data,  8 );

-    GET_ULONG_LE( X[ 3], data, 12 );

-    GET_ULONG_LE( X[ 4], data, 16 );

-    GET_ULONG_LE( X[ 5], data, 20 );

-    GET_ULONG_LE( X[ 6], data, 24 );

-    GET_ULONG_LE( X[ 7], data, 28 );

-    GET_ULONG_LE( X[ 8], data, 32 );

-    GET_ULONG_LE( X[ 9], data, 36 );

-    GET_ULONG_LE( X[10], data, 40 );

-    GET_ULONG_LE( X[11], data, 44 );

-    GET_ULONG_LE( X[12], data, 48 );

-    GET_ULONG_LE( X[13], data, 52 );

-    GET_ULONG_LE( X[14], data, 56 );

-    GET_ULONG_LE( X[15], data, 60 );

-

-#define S(x,n) ((x << n) | ((x & 0xFFFFFFFF) >> (32 - n)))

-

-#define P(a,b,c,d,k,s,t)                                \

-{                                                       \

-    a += F(b,c,d) + X[k] + t; a = S(a,s) + b;           \

-}

-

-    A = ctx->state[0];

-    B = ctx->state[1];

-    C = ctx->state[2];

-    D = ctx->state[3];

-

-#define F(x,y,z) (z ^ (x & (y ^ z)))

-

-    P( A, B, C, D,  0,  7, 0xD76AA478 );

-    P( D, A, B, C,  1, 12, 0xE8C7B756 );

-    P( C, D, A, B,  2, 17, 0x242070DB );

-    P( B, C, D, A,  3, 22, 0xC1BDCEEE );

-    P( A, B, C, D,  4,  7, 0xF57C0FAF );

-    P( D, A, B, C,  5, 12, 0x4787C62A );

-    P( C, D, A, B,  6, 17, 0xA8304613 );

-    P( B, C, D, A,  7, 22, 0xFD469501 );

-    P( A, B, C, D,  8,  7, 0x698098D8 );

-    P( D, A, B, C,  9, 12, 0x8B44F7AF );

-    P( C, D, A, B, 10, 17, 0xFFFF5BB1 );

-    P( B, C, D, A, 11, 22, 0x895CD7BE );

-    P( A, B, C, D, 12,  7, 0x6B901122 );

-    P( D, A, B, C, 13, 12, 0xFD987193 );

-    P( C, D, A, B, 14, 17, 0xA679438E );

-    P( B, C, D, A, 15, 22, 0x49B40821 );

-

-#undef F

-

-#define F(x,y,z) (y ^ (z & (x ^ y)))

-

-    P( A, B, C, D,  1,  5, 0xF61E2562 );

-    P( D, A, B, C,  6,  9, 0xC040B340 );

-    P( C, D, A, B, 11, 14, 0x265E5A51 );

-    P( B, C, D, A,  0, 20, 0xE9B6C7AA );

-    P( A, B, C, D,  5,  5, 0xD62F105D );

-    P( D, A, B, C, 10,  9, 0x02441453 );

-    P( C, D, A, B, 15, 14, 0xD8A1E681 );

-    P( B, C, D, A,  4, 20, 0xE7D3FBC8 );

-    P( A, B, C, D,  9,  5, 0x21E1CDE6 );

-    P( D, A, B, C, 14,  9, 0xC33707D6 );

-    P( C, D, A, B,  3, 14, 0xF4D50D87 );

-    P( B, C, D, A,  8, 20, 0x455A14ED );

-    P( A, B, C, D, 13,  5, 0xA9E3E905 );

-    P( D, A, B, C,  2,  9, 0xFCEFA3F8 );

-    P( C, D, A, B,  7, 14, 0x676F02D9 );

-    P( B, C, D, A, 12, 20, 0x8D2A4C8A );

-

-#undef F

-    

-#define F(x,y,z) (x ^ y ^ z)

-

-    P( A, B, C, D,  5,  4, 0xFFFA3942 );

-    P( D, A, B, C,  8, 11, 0x8771F681 );

-    P( C, D, A, B, 11, 16, 0x6D9D6122 );

-    P( B, C, D, A, 14, 23, 0xFDE5380C );

-    P( A, B, C, D,  1,  4, 0xA4BEEA44 );

-    P( D, A, B, C,  4, 11, 0x4BDECFA9 );

-    P( C, D, A, B,  7, 16, 0xF6BB4B60 );

-    P( B, C, D, A, 10, 23, 0xBEBFBC70 );

-    P( A, B, C, D, 13,  4, 0x289B7EC6 );

-    P( D, A, B, C,  0, 11, 0xEAA127FA );

-    P( C, D, A, B,  3, 16, 0xD4EF3085 );

-    P( B, C, D, A,  6, 23, 0x04881D05 );

-    P( A, B, C, D,  9,  4, 0xD9D4D039 );

-    P( D, A, B, C, 12, 11, 0xE6DB99E5 );

-    P( C, D, A, B, 15, 16, 0x1FA27CF8 );

-    P( B, C, D, A,  2, 23, 0xC4AC5665 );

-

-#undef F

-

-#define F(x,y,z) (y ^ (x | ~z))

-

-    P( A, B, C, D,  0,  6, 0xF4292244 );

-    P( D, A, B, C,  7, 10, 0x432AFF97 );

-    P( C, D, A, B, 14, 15, 0xAB9423A7 );

-    P( B, C, D, A,  5, 21, 0xFC93A039 );

-    P( A, B, C, D, 12,  6, 0x655B59C3 );

-    P( D, A, B, C,  3, 10, 0x8F0CCC92 );

-    P( C, D, A, B, 10, 15, 0xFFEFF47D );

-    P( B, C, D, A,  1, 21, 0x85845DD1 );

-    P( A, B, C, D,  8,  6, 0x6FA87E4F );

-    P( D, A, B, C, 15, 10, 0xFE2CE6E0 );

-    P( C, D, A, B,  6, 15, 0xA3014314 );

-    P( B, C, D, A, 13, 21, 0x4E0811A1 );

-    P( A, B, C, D,  4,  6, 0xF7537E82 );

-    P( D, A, B, C, 11, 10, 0xBD3AF235 );

-    P( C, D, A, B,  2, 15, 0x2AD7D2BB );

-    P( B, C, D, A,  9, 21, 0xEB86D391 );

-

-#undef F

-

-    ctx->state[0] += A;

-    ctx->state[1] += B;

-    ctx->state[2] += C;

-    ctx->state[3] += D;

-}

-

-/*

- * MD5 process buffer

- */

-void md5_update( md5_context *ctx, unsigned char *input, int ilen )

-{

-    int fill;

-    unsigned long left;

-

-    if( ilen <= 0 )

-        return;

-

-    left = ctx->total[0] & 0x3F;

-    fill = 64 - left;

-

-    ctx->total[0] += ilen;

-    ctx->total[0] &= 0xFFFFFFFF;

-

-    if( ctx->total[0] < (unsigned long) ilen )

-        ctx->total[1]++;

-

-    if( left && ilen >= fill )

-    {

-        memcpy( (void *) (ctx->buffer + left),

-                (void *) input, fill );

-        md5_process( ctx, ctx->buffer );

-        input += fill;

-        ilen  -= fill;

-        left = 0;

-    }

-

-    while( ilen >= 64 )

-    {

-        md5_process( ctx, input );

-        input += 64;

-        ilen  -= 64;

-    }

-

-    if( ilen > 0 )

-    {

-        memcpy( (void *) (ctx->buffer + left),

-                (void *) input, ilen );

-    }

-}

-

-static const unsigned char md5_padding[64] =

-{

- 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

-};

-

-/*

- * MD5 final digest

- */

-void md5_finish( md5_context *ctx, unsigned char output[16] )

-{

-    unsigned long last, padn;

-    unsigned long high, low;

-    unsigned char msglen[8];

-

-    high = ( ctx->total[0] >> 29 )

-         | ( ctx->total[1] <<  3 );

-    low  = ( ctx->total[0] <<  3 );

-

-    PUT_ULONG_LE( low,  msglen, 0 );

-    PUT_ULONG_LE( high, msglen, 4 );

-

-    last = ctx->total[0] & 0x3F;

-    padn = ( last < 56 ) ? ( 56 - last ) : ( 120 - last );

-

-    md5_update( ctx, (unsigned char *) md5_padding, padn );

-    md5_update( ctx, msglen, 8 );

-

-    PUT_ULONG_LE( ctx->state[0], output,  0 );

-    PUT_ULONG_LE( ctx->state[1], output,  4 );

-    PUT_ULONG_LE( ctx->state[2], output,  8 );

-    PUT_ULONG_LE( ctx->state[3], output, 12 );

-}

-

-/*

- * output = MD5( input buffer )

- */

-void md5( unsigned char *input, int ilen, unsigned char output[16] )

-{

-    md5_context ctx;

-

-    md5_starts( &ctx );

-    md5_update( &ctx, input, ilen );

-    md5_finish( &ctx, output );

-

-    memset( &ctx, 0, sizeof( md5_context ) );

-}

-

-unsigned int md5hash ( const void * input, int len, unsigned int /*seed*/ )

-{

-  unsigned int hash[4];

-

-  md5((unsigned char *)input,len,(unsigned char *)hash);

-

-  //return hash[0] ^ hash[1] ^ hash[2] ^ hash[3];

-

-  return hash[0];

-}	

-

-void md5_32            ( const void * key, int len, uint32_t /*seed*/, void * out )

-{

-  unsigned int hash[4];

-

-  md5((unsigned char*)key,len,(unsigned char*)hash);

-

-  *(uint32_t*)out = hash[0];

+#include <memory.h>
+#include "Types.h"
+
+// "Derived from the RSA Data Security, Inc. MD5 Message Digest Algorithm"
+
+/**
+ * \brief          MD5 context structure
+ */
+typedef struct
+{
+    unsigned long total[2];     /*!< number of bytes processed  */
+    unsigned long state[4];     /*!< intermediate digest state  */
+    unsigned char buffer[64];   /*!< data block being processed */
+
+    unsigned char ipad[64];     /*!< HMAC: inner padding        */
+    unsigned char opad[64];     /*!< HMAC: outer padding        */
+}
+md5_context;
+
+/**
+ * \brief          MD5 context setup
+ *
+ * \param ctx      context to be initialized
+ */
+void md5_starts( md5_context *ctx );
+
+/**
+ * \brief          MD5 process buffer
+ *
+ * \param ctx      MD5 context
+ * \param input    buffer holding the  data
+ * \param ilen     length of the input data
+ */
+void md5_update( md5_context *ctx, unsigned char *input, int ilen );
+
+/**
+ * \brief          MD5 final digest
+ *
+ * \param ctx      MD5 context
+ * \param output   MD5 checksum result
+ */
+void md5_finish( md5_context *ctx, unsigned char output[16] );
+
+/**
+ * \brief          Output = MD5( input buffer )
+ *
+ * \param input    buffer holding the  data
+ * \param ilen     length of the input data
+ * \param output   MD5 checksum result
+ */
+void md5( unsigned char *input, int ilen, unsigned char output[16] );
+
+/**
+ * \brief          Output = MD5( file contents )
+ *
+ * \param path     input file name
+ * \param output   MD5 checksum result
+ *
+ * \return         0 if successful, 1 if fopen failed,
+ *                 or 2 if fread failed
+ */
+int md5_file( char *path, unsigned char output[16] );
+
+/**
+ * \brief          MD5 HMAC context setup
+ *
+ * \param ctx      HMAC context to be initialized
+ * \param key      HMAC secret key
+ * \param keylen   length of the HMAC key
+ */
+void md5_hmac_starts( md5_context *ctx, unsigned char *key, int keylen );
+
+/**
+ * \brief          MD5 HMAC process buffer
+ *
+ * \param ctx      HMAC context
+ * \param input    buffer holding the  data
+ * \param ilen     length of the input data
+ */
+void md5_hmac_update( md5_context *ctx, unsigned char *input, int ilen );
+
+/**
+ * \brief          MD5 HMAC final digest
+ *
+ * \param ctx      HMAC context
+ * \param output   MD5 HMAC checksum result
+ */
+void md5_hmac_finish( md5_context *ctx, unsigned char output[16] );
+
+/**
+ * \brief          Output = HMAC-MD5( hmac key, input buffer )
+ *
+ * \param key      HMAC secret key
+ * \param keylen   length of the HMAC key
+ * \param input    buffer holding the  data
+ * \param ilen     length of the input data
+ * \param output   HMAC-MD5 result
+ */
+void md5_hmac( unsigned char *key, int keylen,
+               unsigned char *input, int ilen,
+               unsigned char output[16] );
+
+/**
+ * \brief          Checkup routine
+ *
+ * \return         0 if successful, or 1 if the test failed
+ */
+int md5_self_test( int verbose );
+
+/*
+ * 32-bit integer manipulation macros (little endian)
+ */
+#ifndef GET_ULONG_LE
+#define GET_ULONG_LE(n,b,i)                             \
+{                                                       \
+    (n) = ( (unsigned long) (b)[(i)    ]       )        \
+        | ( (unsigned long) (b)[(i) + 1] <<  8 )        \
+        | ( (unsigned long) (b)[(i) + 2] << 16 )        \
+        | ( (unsigned long) (b)[(i) + 3] << 24 );       \
+}
+#endif
+
+#ifndef PUT_ULONG_LE
+#define PUT_ULONG_LE(n,b,i)                             \
+{                                                       \
+    (b)[(i)    ] = (unsigned char) ( (n)       );       \
+    (b)[(i) + 1] = (unsigned char) ( (n) >>  8 );       \
+    (b)[(i) + 2] = (unsigned char) ( (n) >> 16 );       \
+    (b)[(i) + 3] = (unsigned char) ( (n) >> 24 );       \
+}
+#endif
+
+/*
+ * MD5 context setup
+ */
+void md5_starts( md5_context *ctx )
+{
+    ctx->total[0] = 0;
+    ctx->total[1] = 0;
+
+    ctx->state[0] = 0x67452301;
+    ctx->state[1] = 0xEFCDAB89;
+    ctx->state[2] = 0x98BADCFE;
+    ctx->state[3] = 0x10325476;
+}
+
+static void md5_process( md5_context *ctx, unsigned char data[64] )
+{
+    unsigned long X[16], A, B, C, D;
+
+    GET_ULONG_LE( X[ 0], data,  0 );
+    GET_ULONG_LE( X[ 1], data,  4 );
+    GET_ULONG_LE( X[ 2], data,  8 );
+    GET_ULONG_LE( X[ 3], data, 12 );
+    GET_ULONG_LE( X[ 4], data, 16 );
+    GET_ULONG_LE( X[ 5], data, 20 );
+    GET_ULONG_LE( X[ 6], data, 24 );
+    GET_ULONG_LE( X[ 7], data, 28 );
+    GET_ULONG_LE( X[ 8], data, 32 );
+    GET_ULONG_LE( X[ 9], data, 36 );
+    GET_ULONG_LE( X[10], data, 40 );
+    GET_ULONG_LE( X[11], data, 44 );
+    GET_ULONG_LE( X[12], data, 48 );
+    GET_ULONG_LE( X[13], data, 52 );
+    GET_ULONG_LE( X[14], data, 56 );
+    GET_ULONG_LE( X[15], data, 60 );
+
+#define S(x,n) ((x << n) | ((x & 0xFFFFFFFF) >> (32 - n)))
+
+#define P(a,b,c,d,k,s,t)                                \
+{                                                       \
+    a += F(b,c,d) + X[k] + t; a = S(a,s) + b;           \
+}
+
+    A = ctx->state[0];
+    B = ctx->state[1];
+    C = ctx->state[2];
+    D = ctx->state[3];
+
+#define F(x,y,z) (z ^ (x & (y ^ z)))
+
+    P( A, B, C, D,  0,  7, 0xD76AA478 );
+    P( D, A, B, C,  1, 12, 0xE8C7B756 );
+    P( C, D, A, B,  2, 17, 0x242070DB );
+    P( B, C, D, A,  3, 22, 0xC1BDCEEE );
+    P( A, B, C, D,  4,  7, 0xF57C0FAF );
+    P( D, A, B, C,  5, 12, 0x4787C62A );
+    P( C, D, A, B,  6, 17, 0xA8304613 );
+    P( B, C, D, A,  7, 22, 0xFD469501 );
+    P( A, B, C, D,  8,  7, 0x698098D8 );
+    P( D, A, B, C,  9, 12, 0x8B44F7AF );
+    P( C, D, A, B, 10, 17, 0xFFFF5BB1 );
+    P( B, C, D, A, 11, 22, 0x895CD7BE );
+    P( A, B, C, D, 12,  7, 0x6B901122 );
+    P( D, A, B, C, 13, 12, 0xFD987193 );
+    P( C, D, A, B, 14, 17, 0xA679438E );
+    P( B, C, D, A, 15, 22, 0x49B40821 );
+
+#undef F
+
+#define F(x,y,z) (y ^ (z & (x ^ y)))
+
+    P( A, B, C, D,  1,  5, 0xF61E2562 );
+    P( D, A, B, C,  6,  9, 0xC040B340 );
+    P( C, D, A, B, 11, 14, 0x265E5A51 );
+    P( B, C, D, A,  0, 20, 0xE9B6C7AA );
+    P( A, B, C, D,  5,  5, 0xD62F105D );
+    P( D, A, B, C, 10,  9, 0x02441453 );
+    P( C, D, A, B, 15, 14, 0xD8A1E681 );
+    P( B, C, D, A,  4, 20, 0xE7D3FBC8 );
+    P( A, B, C, D,  9,  5, 0x21E1CDE6 );
+    P( D, A, B, C, 14,  9, 0xC33707D6 );
+    P( C, D, A, B,  3, 14, 0xF4D50D87 );
+    P( B, C, D, A,  8, 20, 0x455A14ED );
+    P( A, B, C, D, 13,  5, 0xA9E3E905 );
+    P( D, A, B, C,  2,  9, 0xFCEFA3F8 );
+    P( C, D, A, B,  7, 14, 0x676F02D9 );
+    P( B, C, D, A, 12, 20, 0x8D2A4C8A );
+
+#undef F
+    
+#define F(x,y,z) (x ^ y ^ z)
+
+    P( A, B, C, D,  5,  4, 0xFFFA3942 );
+    P( D, A, B, C,  8, 11, 0x8771F681 );
+    P( C, D, A, B, 11, 16, 0x6D9D6122 );
+    P( B, C, D, A, 14, 23, 0xFDE5380C );
+    P( A, B, C, D,  1,  4, 0xA4BEEA44 );
+    P( D, A, B, C,  4, 11, 0x4BDECFA9 );
+    P( C, D, A, B,  7, 16, 0xF6BB4B60 );
+    P( B, C, D, A, 10, 23, 0xBEBFBC70 );
+    P( A, B, C, D, 13,  4, 0x289B7EC6 );
+    P( D, A, B, C,  0, 11, 0xEAA127FA );
+    P( C, D, A, B,  3, 16, 0xD4EF3085 );
+    P( B, C, D, A,  6, 23, 0x04881D05 );
+    P( A, B, C, D,  9,  4, 0xD9D4D039 );
+    P( D, A, B, C, 12, 11, 0xE6DB99E5 );
+    P( C, D, A, B, 15, 16, 0x1FA27CF8 );
+    P( B, C, D, A,  2, 23, 0xC4AC5665 );
+
+#undef F
+
+#define F(x,y,z) (y ^ (x | ~z))
+
+    P( A, B, C, D,  0,  6, 0xF4292244 );
+    P( D, A, B, C,  7, 10, 0x432AFF97 );
+    P( C, D, A, B, 14, 15, 0xAB9423A7 );
+    P( B, C, D, A,  5, 21, 0xFC93A039 );
+    P( A, B, C, D, 12,  6, 0x655B59C3 );
+    P( D, A, B, C,  3, 10, 0x8F0CCC92 );
+    P( C, D, A, B, 10, 15, 0xFFEFF47D );
+    P( B, C, D, A,  1, 21, 0x85845DD1 );
+    P( A, B, C, D,  8,  6, 0x6FA87E4F );
+    P( D, A, B, C, 15, 10, 0xFE2CE6E0 );
+    P( C, D, A, B,  6, 15, 0xA3014314 );
+    P( B, C, D, A, 13, 21, 0x4E0811A1 );
+    P( A, B, C, D,  4,  6, 0xF7537E82 );
+    P( D, A, B, C, 11, 10, 0xBD3AF235 );
+    P( C, D, A, B,  2, 15, 0x2AD7D2BB );
+    P( B, C, D, A,  9, 21, 0xEB86D391 );
+
+#undef F
+
+    ctx->state[0] += A;
+    ctx->state[1] += B;
+    ctx->state[2] += C;
+    ctx->state[3] += D;
+}
+
+/*
+ * MD5 process buffer
+ */
+void md5_update( md5_context *ctx, unsigned char *input, int ilen )
+{
+    int fill;
+    unsigned long left;
+
+    if( ilen <= 0 )
+        return;
+
+    left = ctx->total[0] & 0x3F;
+    fill = 64 - left;
+
+    ctx->total[0] += ilen;
+    ctx->total[0] &= 0xFFFFFFFF;
+
+    if( ctx->total[0] < (unsigned long) ilen )
+        ctx->total[1]++;
+
+    if( left && ilen >= fill )
+    {
+        memcpy( (void *) (ctx->buffer + left),
+                (void *) input, fill );
+        md5_process( ctx, ctx->buffer );
+        input += fill;
+        ilen  -= fill;
+        left = 0;
+    }
+
+    while( ilen >= 64 )
+    {
+        md5_process( ctx, input );
+        input += 64;
+        ilen  -= 64;
+    }
+
+    if( ilen > 0 )
+    {
+        memcpy( (void *) (ctx->buffer + left),
+                (void *) input, ilen );
+    }
+}
+
+static const unsigned char md5_padding[64] =
+{
+ 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/*
+ * MD5 final digest
+ */
+void md5_finish( md5_context *ctx, unsigned char output[16] )
+{
+    unsigned long last, padn;
+    unsigned long high, low;
+    unsigned char msglen[8];
+
+    high = ( ctx->total[0] >> 29 )
+         | ( ctx->total[1] <<  3 );
+    low  = ( ctx->total[0] <<  3 );
+
+    PUT_ULONG_LE( low,  msglen, 0 );
+    PUT_ULONG_LE( high, msglen, 4 );
+
+    last = ctx->total[0] & 0x3F;
+    padn = ( last < 56 ) ? ( 56 - last ) : ( 120 - last );
+
+    md5_update( ctx, (unsigned char *) md5_padding, padn );
+    md5_update( ctx, msglen, 8 );
+
+    PUT_ULONG_LE( ctx->state[0], output,  0 );
+    PUT_ULONG_LE( ctx->state[1], output,  4 );
+    PUT_ULONG_LE( ctx->state[2], output,  8 );
+    PUT_ULONG_LE( ctx->state[3], output, 12 );
+}
+
+/*
+ * output = MD5( input buffer )
+ */
+void md5( unsigned char *input, int ilen, unsigned char output[16] )
+{
+    md5_context ctx;
+
+    md5_starts( &ctx );
+    md5_update( &ctx, input, ilen );
+    md5_finish( &ctx, output );
+
+    memset( &ctx, 0, sizeof( md5_context ) );
+}
+
+unsigned int md5hash ( const void * input, int len, unsigned int /*seed*/ )
+{
+  unsigned int hash[4];
+
+  md5((unsigned char *)input,len,(unsigned char *)hash);
+
+  //return hash[0] ^ hash[1] ^ hash[2] ^ hash[3];
+
+  return hash[0];
+}	
+
+void md5_32            ( const void * key, int len, uint32_t /*seed*/, void * out )
+{
+  unsigned int hash[4];
+
+  md5((unsigned char*)key,len,(unsigned char*)hash);
+
+  *(uint32_t*)out = hash[0];
 }
\ No newline at end of file
diff --git a/pstdint.h b/pstdint.h
index 3320264..43dce62 100644
--- a/pstdint.h
+++ b/pstdint.h
@@ -1,799 +1,799 @@
-/*  A portable stdint.h

- ****************************************************************************

- *  BSD License:

- ****************************************************************************

- *

- *  Copyright (c) 2005-2007 Paul Hsieh

- *  All rights reserved.

- *  

- *  Redistribution and use in source and binary forms, with or without

- *  modification, are permitted provided that the following conditions

- *  are met:

- *  

- *  1. Redistributions of source code must retain the above copyright

- *     notice, this list of conditions and the following disclaimer.

- *  2. Redistributions in binary form must reproduce the above copyright

- *     notice, this list of conditions and the following disclaimer in the

- *     documentation and/or other materials provided with the distribution.

- *  3. The name of the author may not be used to endorse or promote products

- *     derived from this software without specific prior written permission.

- *  

- *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR

- *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES

- *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.

- *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,

- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT

- *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF

- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

- *

- ****************************************************************************

- *

- *  Version 0.1.11

- *

- *  The ANSI C standard committee, for the C99 standard, specified the

- *  inclusion of a new standard include file called stdint.h.  This is

- *  a very useful and long desired include file which contains several

- *  very precise definitions for integer scalar types that is

- *  critically important for making portable several classes of

- *  applications including cryptography, hashing, variable length

- *  integer libraries and so on.  But for most developers its likely

- *  useful just for programming sanity.

- *

- *  The problem is that most compiler vendors have decided not to

- *  implement the C99 standard, and the next C++ language standard

- *  (which has a lot more mindshare these days) will be a long time in

- *  coming and its unknown whether or not it will include stdint.h or

- *  how much adoption it will have.  Either way, it will be a long time

- *  before all compilers come with a stdint.h and it also does nothing

- *  for the extremely large number of compilers available today which

- *  do not include this file, or anything comparable to it.

- *

- *  So that's what this file is all about.  Its an attempt to build a

- *  single universal include file that works on as many platforms as

- *  possible to deliver what stdint.h is supposed to.  A few things

- *  that should be noted about this file:

- *

- *    1) It is not guaranteed to be portable and/or present an identical

- *       interface on all platforms.  The extreme variability of the

- *       ANSI C standard makes this an impossibility right from the

- *       very get go. Its really only meant to be useful for the vast

- *       majority of platforms that possess the capability of

- *       implementing usefully and precisely defined, standard sized

- *       integer scalars.  Systems which are not intrinsically 2s

- *       complement may produce invalid constants.

- *

- *    2) There is an unavoidable use of non-reserved symbols.

- *

- *    3) Other standard include files are invoked.

- *

- *    4) This file may come in conflict with future platforms that do

- *       include stdint.h.  The hope is that one or the other can be

- *       used with no real difference.

- *

- *    5) In the current verison, if your platform can't represent

- *       int32_t, int16_t and int8_t, it just dumps out with a compiler

- *       error.

- *

- *    6) 64 bit integers may or may not be defined.  Test for their

- *       presence with the test: #ifdef INT64_MAX or #ifdef UINT64_MAX.

- *       Note that this is different from the C99 specification which

- *       requires the existence of 64 bit support in the compiler.  If

- *       this is not defined for your platform, yet it is capable of

- *       dealing with 64 bits then it is because this file has not yet

- *       been extended to cover all of your system's capabilities.

- *

- *    7) (u)intptr_t may or may not be defined.  Test for its presence

- *       with the test: #ifdef PTRDIFF_MAX.  If this is not defined

- *       for your platform, then it is because this file has not yet

- *       been extended to cover all of your system's capabilities, not

- *       because its optional.

- *

- *    8) The following might not been defined even if your platform is

- *       capable of defining it:

- *

- *       WCHAR_MIN

- *       WCHAR_MAX

- *       (u)int64_t

- *       PTRDIFF_MIN

- *       PTRDIFF_MAX

- *       (u)intptr_t

- *

- *    9) The following have not been defined:

- *

- *       WINT_MIN

- *       WINT_MAX

- *

- *   10) The criteria for defining (u)int_least(*)_t isn't clear,

- *       except for systems which don't have a type that precisely

- *       defined 8, 16, or 32 bit types (which this include file does

- *       not support anyways). Default definitions have been given.

- *

- *   11) The criteria for defining (u)int_fast(*)_t isn't something I

- *       would trust to any particular compiler vendor or the ANSI C

- *       committee.  It is well known that "compatible systems" are

- *       commonly created that have very different performance

- *       characteristics from the systems they are compatible with,

- *       especially those whose vendors make both the compiler and the

- *       system.  Default definitions have been given, but its strongly

- *       recommended that users never use these definitions for any

- *       reason (they do *NOT* deliver any serious guarantee of

- *       improved performance -- not in this file, nor any vendor's

- *       stdint.h).

- *

- *   12) The following macros:

- *

- *       PRINTF_INTMAX_MODIFIER

- *       PRINTF_INT64_MODIFIER

- *       PRINTF_INT32_MODIFIER

- *       PRINTF_INT16_MODIFIER

- *       PRINTF_LEAST64_MODIFIER

- *       PRINTF_LEAST32_MODIFIER

- *       PRINTF_LEAST16_MODIFIER

- *       PRINTF_INTPTR_MODIFIER

- *

- *       are strings which have been defined as the modifiers required

- *       for the "d", "u" and "x" printf formats to correctly output

- *       (u)intmax_t, (u)int64_t, (u)int32_t, (u)int16_t, (u)least64_t,

- *       (u)least32_t, (u)least16_t and (u)intptr_t types respectively.

- *       PRINTF_INTPTR_MODIFIER is not defined for some systems which

- *       provide their own stdint.h.  PRINTF_INT64_MODIFIER is not

- *       defined if INT64_MAX is not defined.  These are an extension

- *       beyond what C99 specifies must be in stdint.h.

- *

- *       In addition, the following macros are defined:

- *

- *       PRINTF_INTMAX_HEX_WIDTH

- *       PRINTF_INT64_HEX_WIDTH

- *       PRINTF_INT32_HEX_WIDTH

- *       PRINTF_INT16_HEX_WIDTH

- *       PRINTF_INT8_HEX_WIDTH

- *       PRINTF_INTMAX_DEC_WIDTH

- *       PRINTF_INT64_DEC_WIDTH

- *       PRINTF_INT32_DEC_WIDTH

- *       PRINTF_INT16_DEC_WIDTH

- *       PRINTF_INT8_DEC_WIDTH

- *

- *       Which specifies the maximum number of characters required to

- *       print the number of that type in either hexadecimal or decimal.

- *       These are an extension beyond what C99 specifies must be in

- *       stdint.h.

- *

- *  Compilers tested (all with 0 warnings at their highest respective

- *  settings): Borland Turbo C 2.0, WATCOM C/C++ 11.0 (16 bits and 32

- *  bits), Microsoft Visual C++ 6.0 (32 bit), Microsoft Visual Studio

- *  .net (VC7), Intel C++ 4.0, GNU gcc v3.3.3

- *

- *  This file should be considered a work in progress.  Suggestions for

- *  improvements, especially those which increase coverage are strongly

- *  encouraged.

- *

- *  Acknowledgements

- *

- *  The following people have made significant contributions to the

- *  development and testing of this file:

- *

- *  Chris Howie

- *  John Steele Scott

- *  Dave Thorup

- *

- */

-

-#include <stddef.h>

-#include <limits.h>

-#include <signal.h>

-

-/*

- *  For gcc with _STDINT_H, fill in the PRINTF_INT*_MODIFIER macros, and

- *  do nothing else.  On the Mac OS X version of gcc this is _STDINT_H_.

- */

-

-#if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined (__WATCOMC__) && (defined (_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) )) && !defined (_PSTDINT_H_INCLUDED)

-#include <stdint.h>

-#define _PSTDINT_H_INCLUDED

-# ifndef PRINTF_INT64_MODIFIER

-#  define PRINTF_INT64_MODIFIER "ll"

-# endif

-# ifndef PRINTF_INT32_MODIFIER

-#  define PRINTF_INT32_MODIFIER "l"

-# endif

-# ifndef PRINTF_INT16_MODIFIER

-#  define PRINTF_INT16_MODIFIER "h"

-# endif

-# ifndef PRINTF_INTMAX_MODIFIER

-#  define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER

-# endif

-# ifndef PRINTF_INT64_HEX_WIDTH

-#  define PRINTF_INT64_HEX_WIDTH "16"

-# endif

-# ifndef PRINTF_INT32_HEX_WIDTH

-#  define PRINTF_INT32_HEX_WIDTH "8"

-# endif

-# ifndef PRINTF_INT16_HEX_WIDTH

-#  define PRINTF_INT16_HEX_WIDTH "4"

-# endif

-# ifndef PRINTF_INT8_HEX_WIDTH

-#  define PRINTF_INT8_HEX_WIDTH "2"

-# endif

-# ifndef PRINTF_INT64_DEC_WIDTH

-#  define PRINTF_INT64_DEC_WIDTH "20"

-# endif

-# ifndef PRINTF_INT32_DEC_WIDTH

-#  define PRINTF_INT32_DEC_WIDTH "10"

-# endif

-# ifndef PRINTF_INT16_DEC_WIDTH

-#  define PRINTF_INT16_DEC_WIDTH "5"

-# endif

-# ifndef PRINTF_INT8_DEC_WIDTH

-#  define PRINTF_INT8_DEC_WIDTH "3"

-# endif

-# ifndef PRINTF_INTMAX_HEX_WIDTH

-#  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH

-# endif

-# ifndef PRINTF_INTMAX_DEC_WIDTH

-#  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH

-# endif

-

-/*

- *  Something really weird is going on with Open Watcom.  Just pull some of

- *  these duplicated definitions from Open Watcom's stdint.h file for now.

- */

-

-# if defined (__WATCOMC__) && __WATCOMC__ >= 1250

-#  if !defined (INT64_C)

-#   define INT64_C(x)   (x + (INT64_MAX - INT64_MAX))

-#  endif

-#  if !defined (UINT64_C)

-#   define UINT64_C(x)  (x + (UINT64_MAX - UINT64_MAX))

-#  endif

-#  if !defined (INT32_C)

-#   define INT32_C(x)   (x + (INT32_MAX - INT32_MAX))

-#  endif

-#  if !defined (UINT32_C)

-#   define UINT32_C(x)  (x + (UINT32_MAX - UINT32_MAX))

-#  endif

-#  if !defined (INT16_C)

-#   define INT16_C(x)   (x)

-#  endif

-#  if !defined (UINT16_C)

-#   define UINT16_C(x)  (x)

-#  endif

-#  if !defined (INT8_C)

-#   define INT8_C(x)   (x)

-#  endif

-#  if !defined (UINT8_C)

-#   define UINT8_C(x)  (x)

-#  endif

-#  if !defined (UINT64_MAX)

-#   define UINT64_MAX  18446744073709551615ULL

-#  endif

-#  if !defined (INT64_MAX)

-#   define INT64_MAX  9223372036854775807LL

-#  endif

-#  if !defined (UINT32_MAX)

-#   define UINT32_MAX  4294967295UL

-#  endif

-#  if !defined (INT32_MAX)

-#   define INT32_MAX  2147483647L

-#  endif

-#  if !defined (INTMAX_MAX)

-#   define INTMAX_MAX INT64_MAX

-#  endif

-#  if !defined (INTMAX_MIN)

-#   define INTMAX_MIN INT64_MIN

-#  endif

-# endif

-#endif

-

-#ifndef _PSTDINT_H_INCLUDED

-#define _PSTDINT_H_INCLUDED

-

-#ifndef SIZE_MAX

-# define SIZE_MAX (~(size_t)0)

-#endif

-

-/*

- *  Deduce the type assignments from limits.h under the assumption that

- *  integer sizes in bits are powers of 2, and follow the ANSI

- *  definitions.

- */

-

-#ifndef UINT8_MAX

-# define UINT8_MAX 0xff

-#endif

-#ifndef uint8_t

-# if (UCHAR_MAX == UINT8_MAX) || defined (S_SPLINT_S)

-    typedef unsigned char uint8_t;

-#   define UINT8_C(v) ((uint8_t) v)

-# else

-#   error "Platform not supported"

-# endif

-#endif

-

-#ifndef INT8_MAX

-# define INT8_MAX 0x7f

-#endif

-#ifndef INT8_MIN

-# define INT8_MIN INT8_C(0x80)

-#endif

-#ifndef int8_t

-# if (SCHAR_MAX == INT8_MAX) || defined (S_SPLINT_S)

-    typedef signed char int8_t;

-#   define INT8_C(v) ((int8_t) v)

-# else

-#   error "Platform not supported"

-# endif

-#endif

-

-#ifndef UINT16_MAX

-# define UINT16_MAX 0xffff

-#endif

-#ifndef uint16_t

-#if (UINT_MAX == UINT16_MAX) || defined (S_SPLINT_S)

-  typedef unsigned int uint16_t;

-# ifndef PRINTF_INT16_MODIFIER

-#  define PRINTF_INT16_MODIFIER ""

-# endif

-# define UINT16_C(v) ((uint16_t) (v))

-#elif (USHRT_MAX == UINT16_MAX)

-  typedef unsigned short uint16_t;

-# define UINT16_C(v) ((uint16_t) (v))

-# ifndef PRINTF_INT16_MODIFIER

-#  define PRINTF_INT16_MODIFIER "h"

-# endif

-#else

-#error "Platform not supported"

-#endif

-#endif

-

-#ifndef INT16_MAX

-# define INT16_MAX 0x7fff

-#endif

-#ifndef INT16_MIN

-# define INT16_MIN INT16_C(0x8000)

-#endif

-#ifndef int16_t

-#if (INT_MAX == INT16_MAX) || defined (S_SPLINT_S)

-  typedef signed int int16_t;

-# define INT16_C(v) ((int16_t) (v))

-# ifndef PRINTF_INT16_MODIFIER

-#  define PRINTF_INT16_MODIFIER ""

-# endif

-#elif (SHRT_MAX == INT16_MAX)

-  typedef signed short int16_t;

-# define INT16_C(v) ((int16_t) (v))

-# ifndef PRINTF_INT16_MODIFIER

-#  define PRINTF_INT16_MODIFIER "h"

-# endif

-#else

-#error "Platform not supported"

-#endif

-#endif

-

-#ifndef UINT32_MAX

-# define UINT32_MAX (0xffffffffUL)

-#endif

-#ifndef uint32_t

-#if (ULONG_MAX == UINT32_MAX) || defined (S_SPLINT_S)

-  typedef unsigned long uint32_t;

-# define UINT32_C(v) v ## UL

-# ifndef PRINTF_INT32_MODIFIER

-#  define PRINTF_INT32_MODIFIER "l"

-# endif

-#elif (UINT_MAX == UINT32_MAX)

-  typedef unsigned int uint32_t;

-# ifndef PRINTF_INT32_MODIFIER

-#  define PRINTF_INT32_MODIFIER ""

-# endif

-# define UINT32_C(v) v ## U

-#elif (USHRT_MAX == UINT32_MAX)

-  typedef unsigned short uint32_t;

-# define UINT32_C(v) ((unsigned short) (v))

-# ifndef PRINTF_INT32_MODIFIER

-#  define PRINTF_INT32_MODIFIER ""

-# endif

-#else

-#error "Platform not supported"

-#endif

-#endif

-

-#ifndef INT32_MAX

-# define INT32_MAX (0x7fffffffL)

-#endif

-#ifndef INT32_MIN

-# define INT32_MIN INT32_C(0x80000000)

-#endif

-#ifndef int32_t

-#if (LONG_MAX == INT32_MAX) || defined (S_SPLINT_S)

-  typedef signed long int32_t;

-# define INT32_C(v) v ## L

-# ifndef PRINTF_INT32_MODIFIER

-#  define PRINTF_INT32_MODIFIER "l"

-# endif

-#elif (INT_MAX == INT32_MAX)

-  typedef signed int int32_t;

-# define INT32_C(v) v

-# ifndef PRINTF_INT32_MODIFIER

-#  define PRINTF_INT32_MODIFIER ""

-# endif

-#elif (SHRT_MAX == INT32_MAX)

-  typedef signed short int32_t;

-# define INT32_C(v) ((short) (v))

-# ifndef PRINTF_INT32_MODIFIER

-#  define PRINTF_INT32_MODIFIER ""

-# endif

-#else

-#error "Platform not supported"

-#endif

-#endif

-

-/*

- *  The macro stdint_int64_defined is temporarily used to record

- *  whether or not 64 integer support is available.  It must be

- *  defined for any 64 integer extensions for new platforms that are

- *  added.

- */

-

-#undef stdint_int64_defined

-#if (defined(__STDC__) && defined(__STDC_VERSION__)) || defined (S_SPLINT_S)

-# if (__STDC__ && __STDC_VERSION >= 199901L) || defined (S_SPLINT_S)

-#  define stdint_int64_defined

-   typedef long long int64_t;

-   typedef unsigned long long uint64_t;

-#  define UINT64_C(v) v ## ULL

-#  define  INT64_C(v) v ## LL

-#  ifndef PRINTF_INT64_MODIFIER

-#   define PRINTF_INT64_MODIFIER "ll"

-#  endif

-# endif

-#endif

-

-#if !defined (stdint_int64_defined)

-# if defined(__GNUC__)

-#  define stdint_int64_defined

-   __extension__ typedef long long int64_t;

-   __extension__ typedef unsigned long long uint64_t;

-#  define UINT64_C(v) v ## ULL

-#  define  INT64_C(v) v ## LL

-#  ifndef PRINTF_INT64_MODIFIER

-#   define PRINTF_INT64_MODIFIER "ll"

-#  endif

-# elif defined(__MWERKS__) || defined (__SUNPRO_C) || defined (__SUNPRO_CC) || defined (__APPLE_CC__) || defined (_LONG_LONG) || defined (_CRAYC) || defined (S_SPLINT_S)

-#  define stdint_int64_defined

-   typedef long long int64_t;

-   typedef unsigned long long uint64_t;

-#  define UINT64_C(v) v ## ULL

-#  define  INT64_C(v) v ## LL

-#  ifndef PRINTF_INT64_MODIFIER

-#   define PRINTF_INT64_MODIFIER "ll"

-#  endif

-# elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined (__BORLANDC__) && __BORLANDC__ > 0x460) || defined (__alpha) || defined (__DECC)

-#  define stdint_int64_defined

-   typedef __int64 int64_t;

-   typedef unsigned __int64 uint64_t;

-#  define UINT64_C(v) v ## UI64

-#  define  INT64_C(v) v ## I64

-#  ifndef PRINTF_INT64_MODIFIER

-#   define PRINTF_INT64_MODIFIER "I64"

-#  endif

-# endif

-#endif

-

-#if !defined (LONG_LONG_MAX) && defined (INT64_C)

-# define LONG_LONG_MAX INT64_C (9223372036854775807)

-#endif

-#ifndef ULONG_LONG_MAX

-# define ULONG_LONG_MAX UINT64_C (18446744073709551615)

-#endif

-

-#if !defined (INT64_MAX) && defined (INT64_C)

-# define INT64_MAX INT64_C (9223372036854775807)

-#endif

-#if !defined (INT64_MIN) && defined (INT64_C)

-# define INT64_MIN INT64_C (-9223372036854775808)

-#endif

-#if !defined (UINT64_MAX) && defined (INT64_C)

-# define UINT64_MAX UINT64_C (18446744073709551615)

-#endif

-

-/*

- *  Width of hexadecimal for number field.

- */

-

-#ifndef PRINTF_INT64_HEX_WIDTH

-# define PRINTF_INT64_HEX_WIDTH "16"

-#endif

-#ifndef PRINTF_INT32_HEX_WIDTH

-# define PRINTF_INT32_HEX_WIDTH "8"

-#endif

-#ifndef PRINTF_INT16_HEX_WIDTH

-# define PRINTF_INT16_HEX_WIDTH "4"

-#endif

-#ifndef PRINTF_INT8_HEX_WIDTH

-# define PRINTF_INT8_HEX_WIDTH "2"

-#endif

-

-#ifndef PRINTF_INT64_DEC_WIDTH

-# define PRINTF_INT64_DEC_WIDTH "20"

-#endif

-#ifndef PRINTF_INT32_DEC_WIDTH

-# define PRINTF_INT32_DEC_WIDTH "10"

-#endif

-#ifndef PRINTF_INT16_DEC_WIDTH

-# define PRINTF_INT16_DEC_WIDTH "5"

-#endif

-#ifndef PRINTF_INT8_DEC_WIDTH

-# define PRINTF_INT8_DEC_WIDTH "3"

-#endif

-

-/*

- *  Ok, lets not worry about 128 bit integers for now.  Moore's law says

- *  we don't need to worry about that until about 2040 at which point

- *  we'll have bigger things to worry about.

- */

-

-#ifdef stdint_int64_defined

-  typedef int64_t intmax_t;

-  typedef uint64_t uintmax_t;

-# define  INTMAX_MAX   INT64_MAX

-# define  INTMAX_MIN   INT64_MIN

-# define UINTMAX_MAX  UINT64_MAX

-# define UINTMAX_C(v) UINT64_C(v)

-# define  INTMAX_C(v)  INT64_C(v)

-# ifndef PRINTF_INTMAX_MODIFIER

-#   define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER

-# endif

-# ifndef PRINTF_INTMAX_HEX_WIDTH

-#  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH

-# endif

-# ifndef PRINTF_INTMAX_DEC_WIDTH

-#  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH

-# endif

-#else

-  typedef int32_t intmax_t;

-  typedef uint32_t uintmax_t;

-# define  INTMAX_MAX   INT32_MAX

-# define UINTMAX_MAX  UINT32_MAX

-# define UINTMAX_C(v) UINT32_C(v)

-# define  INTMAX_C(v)  INT32_C(v)

-# ifndef PRINTF_INTMAX_MODIFIER

-#   define PRINTF_INTMAX_MODIFIER PRINTF_INT32_MODIFIER

-# endif

-# ifndef PRINTF_INTMAX_HEX_WIDTH

-#  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT32_HEX_WIDTH

-# endif

-# ifndef PRINTF_INTMAX_DEC_WIDTH

-#  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT32_DEC_WIDTH

-# endif

-#endif

-

-/*

- *  Because this file currently only supports platforms which have

- *  precise powers of 2 as bit sizes for the default integers, the

- *  least definitions are all trivial.  Its possible that a future

- *  version of this file could have different definitions.

- */

-

-#ifndef stdint_least_defined

-  typedef   int8_t   int_least8_t;

-  typedef  uint8_t  uint_least8_t;

-  typedef  int16_t  int_least16_t;

-  typedef uint16_t uint_least16_t;

-  typedef  int32_t  int_least32_t;

-  typedef uint32_t uint_least32_t;

-# define PRINTF_LEAST32_MODIFIER PRINTF_INT32_MODIFIER

-# define PRINTF_LEAST16_MODIFIER PRINTF_INT16_MODIFIER

-# define  UINT_LEAST8_MAX  UINT8_MAX

-# define   INT_LEAST8_MAX   INT8_MAX

-# define UINT_LEAST16_MAX UINT16_MAX

-# define  INT_LEAST16_MAX  INT16_MAX

-# define UINT_LEAST32_MAX UINT32_MAX

-# define  INT_LEAST32_MAX  INT32_MAX

-# define   INT_LEAST8_MIN   INT8_MIN

-# define  INT_LEAST16_MIN  INT16_MIN

-# define  INT_LEAST32_MIN  INT32_MIN

-# ifdef stdint_int64_defined

-    typedef  int64_t  int_least64_t;

-    typedef uint64_t uint_least64_t;

-#   define PRINTF_LEAST64_MODIFIER PRINTF_INT64_MODIFIER

-#   define UINT_LEAST64_MAX UINT64_MAX

-#   define  INT_LEAST64_MAX  INT64_MAX

-#   define  INT_LEAST64_MIN  INT64_MIN

-# endif

-#endif

-#undef stdint_least_defined

-

-/*

- *  The ANSI C committee pretending to know or specify anything about

- *  performance is the epitome of misguided arrogance.  The mandate of

- *  this file is to *ONLY* ever support that absolute minimum

- *  definition of the fast integer types, for compatibility purposes.

- *  No extensions, and no attempt to suggest what may or may not be a

- *  faster integer type will ever be made in this file.  Developers are

- *  warned to stay away from these types when using this or any other

- *  stdint.h.

- */

-

-typedef   int_least8_t   int_fast8_t;

-typedef  uint_least8_t  uint_fast8_t;

-typedef  int_least16_t  int_fast16_t;

-typedef uint_least16_t uint_fast16_t;

-typedef  int_least32_t  int_fast32_t;

-typedef uint_least32_t uint_fast32_t;

-#define  UINT_FAST8_MAX  UINT_LEAST8_MAX

-#define   INT_FAST8_MAX   INT_LEAST8_MAX

-#define UINT_FAST16_MAX UINT_LEAST16_MAX

-#define  INT_FAST16_MAX  INT_LEAST16_MAX

-#define UINT_FAST32_MAX UINT_LEAST32_MAX

-#define  INT_FAST32_MAX  INT_LEAST32_MAX

-#define   INT_FAST8_MIN   INT_LEAST8_MIN

-#define  INT_FAST16_MIN  INT_LEAST16_MIN

-#define  INT_FAST32_MIN  INT_LEAST32_MIN

-#ifdef stdint_int64_defined

-  typedef  int_least64_t  int_fast64_t;

-  typedef uint_least64_t uint_fast64_t;

-# define UINT_FAST64_MAX UINT_LEAST64_MAX

-# define  INT_FAST64_MAX  INT_LEAST64_MAX

-# define  INT_FAST64_MIN  INT_LEAST64_MIN

-#endif

-

-#undef stdint_int64_defined

-

-/*

- *  Whatever piecemeal, per compiler thing we can do about the wchar_t

- *  type limits.

- */

-

-#if defined(__WATCOMC__) || defined(_MSC_VER) || defined (__GNUC__)

-# include <wchar.h>

-# ifndef WCHAR_MIN

-#  define WCHAR_MIN 0

-# endif

-# ifndef WCHAR_MAX

-#  define WCHAR_MAX ((wchar_t)-1)

-# endif

-#endif

-

-/*

- *  Whatever piecemeal, per compiler/platform thing we can do about the

- *  (u)intptr_t types and limits.

- */

-

-#if defined (_MSC_VER) && defined (_UINTPTR_T_DEFINED)

-# define STDINT_H_UINTPTR_T_DEFINED

-#endif

-

-#ifndef STDINT_H_UINTPTR_T_DEFINED

-# if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) || defined (_WIN64)

-#  define stdint_intptr_bits 64

-# elif defined (__WATCOMC__) || defined (__TURBOC__)

-#  if defined(__TINY__) || defined(__SMALL__) || defined(__MEDIUM__)

-#    define stdint_intptr_bits 16

-#  else

-#    define stdint_intptr_bits 32

-#  endif

-# elif defined (__i386__) || defined (_WIN32) || defined (WIN32)

-#  define stdint_intptr_bits 32

-# elif defined (__INTEL_COMPILER)

-/* TODO -- what will Intel do about x86-64? */

-# endif

-

-# ifdef stdint_intptr_bits

-#  define stdint_intptr_glue3_i(a,b,c)  a##b##c

-#  define stdint_intptr_glue3(a,b,c)    stdint_intptr_glue3_i(a,b,c)

-#  ifndef PRINTF_INTPTR_MODIFIER

-#    define PRINTF_INTPTR_MODIFIER      stdint_intptr_glue3(PRINTF_INT,stdint_intptr_bits,_MODIFIER)

-#  endif

-#  ifndef PTRDIFF_MAX

-#    define PTRDIFF_MAX                 stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)

-#  endif

-#  ifndef PTRDIFF_MIN

-#    define PTRDIFF_MIN                 stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)

-#  endif

-#  ifndef UINTPTR_MAX

-#    define UINTPTR_MAX                 stdint_intptr_glue3(UINT,stdint_intptr_bits,_MAX)

-#  endif

-#  ifndef INTPTR_MAX

-#    define INTPTR_MAX                  stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)

-#  endif

-#  ifndef INTPTR_MIN

-#    define INTPTR_MIN                  stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)

-#  endif

-#  ifndef INTPTR_C

-#    define INTPTR_C(x)                 stdint_intptr_glue3(INT,stdint_intptr_bits,_C)(x)

-#  endif

-#  ifndef UINTPTR_C

-#    define UINTPTR_C(x)                stdint_intptr_glue3(UINT,stdint_intptr_bits,_C)(x)

-#  endif

-  typedef stdint_intptr_glue3(uint,stdint_intptr_bits,_t) uintptr_t;

-  typedef stdint_intptr_glue3( int,stdint_intptr_bits,_t)  intptr_t;

-# else

-/* TODO -- This following is likely wrong for some platforms, and does

-   nothing for the definition of uintptr_t. */

-  typedef ptrdiff_t intptr_t;

-# endif

-# define STDINT_H_UINTPTR_T_DEFINED

-#endif

-

-/*

- *  Assumes sig_atomic_t is signed and we have a 2s complement machine.

- */

-

-#ifndef SIG_ATOMIC_MAX

-# define SIG_ATOMIC_MAX ((((sig_atomic_t) 1) << (sizeof (sig_atomic_t)*CHAR_BIT-1)) - 1)

-#endif

-

-#endif

-

-#if defined (__TEST_PSTDINT_FOR_CORRECTNESS)

-

-/* 

- *  Please compile with the maximum warning settings to make sure macros are not

- *  defined more than once.

- */

- 

-#include <stdlib.h>

-#include <stdio.h>

-#include <string.h>

- 

-#define glue3_aux(x,y,z) x ## y ## z

-#define glue3(x,y,z) glue3_aux(x,y,z)

-

-#define DECLU(bits) glue3(uint,bits,_t) glue3(u,bits,=) glue3(UINT,bits,_C) (0);

-#define DECLI(bits) glue3(int,bits,_t) glue3(i,bits,=) glue3(INT,bits,_C) (0);

-

-#define DECL(us,bits) glue3(DECL,us,) (bits)

-

-#define TESTUMAX(bits) glue3(u,bits,=) glue3(~,u,bits); if (glue3(UINT,bits,_MAX) glue3(!=,u,bits)) printf ("Something wrong with UINT%d_MAX\n", bits)

- 

-int main () {

-  DECL(I,8)

-  DECL(U,8)

-  DECL(I,16)

-  DECL(U,16)

-  DECL(I,32)

-  DECL(U,32)

-#ifdef INT64_MAX

-  DECL(I,64)

-  DECL(U,64)

-#endif

-  intmax_t imax = INTMAX_C(0);

-  uintmax_t umax = UINTMAX_C(0);

-  char str0[256], str1[256];

-

-  sprintf (str0, "%d %x\n", 0, ~0);

-  

-  sprintf (str1, "%d %x\n",  i8, ~0);

-  if (0 != strcmp (str0, str1)) printf ("Something wrong with i8 : %s\n", str1);

-  sprintf (str1, "%u %x\n",  u8, ~0);

-  if (0 != strcmp (str0, str1)) printf ("Something wrong with u8 : %s\n", str1);

-  sprintf (str1, "%d %x\n",  i16, ~0);

-  if (0 != strcmp (str0, str1)) printf ("Something wrong with i16 : %s\n", str1);

-  sprintf (str1, "%u %x\n",  u16, ~0);

-  if (0 != strcmp (str0, str1)) printf ("Something wrong with u16 : %s\n", str1);	

-  sprintf (str1, "%" PRINTF_INT32_MODIFIER "d %x\n",  i32, ~0);

-  if (0 != strcmp (str0, str1)) printf ("Something wrong with i32 : %s\n", str1);

-  sprintf (str1, "%" PRINTF_INT32_MODIFIER "u %x\n",  u32, ~0);

-  if (0 != strcmp (str0, str1)) printf ("Something wrong with u32 : %s\n", str1);

-#ifdef INT64_MAX	

-  sprintf (str1, "%" PRINTF_INT64_MODIFIER "d %x\n",  i64, ~0);

-  if (0 != strcmp (str0, str1)) printf ("Something wrong with i64 : %s\n", str1);

-#endif

-  sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n",  imax, ~0);

-  if (0 != strcmp (str0, str1)) printf ("Something wrong with imax : %s\n", str1);

-  sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n",  umax, ~0);

-  if (0 != strcmp (str0, str1)) printf ("Something wrong with umax : %s\n", str1);	

-  

-  TESTUMAX(8);

-  TESTUMAX(16);

-  TESTUMAX(32);

-#ifdef INT64_MAX

-  TESTUMAX(64);

-#endif

-

-  return EXIT_SUCCESS;

-}

-

-#endif

+/*  A portable stdint.h
+ ****************************************************************************
+ *  BSD License:
+ ****************************************************************************
+ *
+ *  Copyright (c) 2005-2007 Paul Hsieh
+ *  All rights reserved.
+ *  
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *  
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. The name of the author may not be used to endorse or promote products
+ *     derived from this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ ****************************************************************************
+ *
+ *  Version 0.1.11
+ *
+ *  The ANSI C standard committee, for the C99 standard, specified the
+ *  inclusion of a new standard include file called stdint.h.  This is
+ *  a very useful and long desired include file which contains several
+ *  very precise definitions for integer scalar types that is
+ *  critically important for making portable several classes of
+ *  applications including cryptography, hashing, variable length
+ *  integer libraries and so on.  But for most developers its likely
+ *  useful just for programming sanity.
+ *
+ *  The problem is that most compiler vendors have decided not to
+ *  implement the C99 standard, and the next C++ language standard
+ *  (which has a lot more mindshare these days) will be a long time in
+ *  coming and its unknown whether or not it will include stdint.h or
+ *  how much adoption it will have.  Either way, it will be a long time
+ *  before all compilers come with a stdint.h and it also does nothing
+ *  for the extremely large number of compilers available today which
+ *  do not include this file, or anything comparable to it.
+ *
+ *  So that's what this file is all about.  Its an attempt to build a
+ *  single universal include file that works on as many platforms as
+ *  possible to deliver what stdint.h is supposed to.  A few things
+ *  that should be noted about this file:
+ *
+ *    1) It is not guaranteed to be portable and/or present an identical
+ *       interface on all platforms.  The extreme variability of the
+ *       ANSI C standard makes this an impossibility right from the
+ *       very get go. Its really only meant to be useful for the vast
+ *       majority of platforms that possess the capability of
+ *       implementing usefully and precisely defined, standard sized
+ *       integer scalars.  Systems which are not intrinsically 2s
+ *       complement may produce invalid constants.
+ *
+ *    2) There is an unavoidable use of non-reserved symbols.
+ *
+ *    3) Other standard include files are invoked.
+ *
+ *    4) This file may come in conflict with future platforms that do
+ *       include stdint.h.  The hope is that one or the other can be
+ *       used with no real difference.
+ *
+ *    5) In the current verison, if your platform can't represent
+ *       int32_t, int16_t and int8_t, it just dumps out with a compiler
+ *       error.
+ *
+ *    6) 64 bit integers may or may not be defined.  Test for their
+ *       presence with the test: #ifdef INT64_MAX or #ifdef UINT64_MAX.
+ *       Note that this is different from the C99 specification which
+ *       requires the existence of 64 bit support in the compiler.  If
+ *       this is not defined for your platform, yet it is capable of
+ *       dealing with 64 bits then it is because this file has not yet
+ *       been extended to cover all of your system's capabilities.
+ *
+ *    7) (u)intptr_t may or may not be defined.  Test for its presence
+ *       with the test: #ifdef PTRDIFF_MAX.  If this is not defined
+ *       for your platform, then it is because this file has not yet
+ *       been extended to cover all of your system's capabilities, not
+ *       because its optional.
+ *
+ *    8) The following might not been defined even if your platform is
+ *       capable of defining it:
+ *
+ *       WCHAR_MIN
+ *       WCHAR_MAX
+ *       (u)int64_t
+ *       PTRDIFF_MIN
+ *       PTRDIFF_MAX
+ *       (u)intptr_t
+ *
+ *    9) The following have not been defined:
+ *
+ *       WINT_MIN
+ *       WINT_MAX
+ *
+ *   10) The criteria for defining (u)int_least(*)_t isn't clear,
+ *       except for systems which don't have a type that precisely
+ *       defined 8, 16, or 32 bit types (which this include file does
+ *       not support anyways). Default definitions have been given.
+ *
+ *   11) The criteria for defining (u)int_fast(*)_t isn't something I
+ *       would trust to any particular compiler vendor or the ANSI C
+ *       committee.  It is well known that "compatible systems" are
+ *       commonly created that have very different performance
+ *       characteristics from the systems they are compatible with,
+ *       especially those whose vendors make both the compiler and the
+ *       system.  Default definitions have been given, but its strongly
+ *       recommended that users never use these definitions for any
+ *       reason (they do *NOT* deliver any serious guarantee of
+ *       improved performance -- not in this file, nor any vendor's
+ *       stdint.h).
+ *
+ *   12) The following macros:
+ *
+ *       PRINTF_INTMAX_MODIFIER
+ *       PRINTF_INT64_MODIFIER
+ *       PRINTF_INT32_MODIFIER
+ *       PRINTF_INT16_MODIFIER
+ *       PRINTF_LEAST64_MODIFIER
+ *       PRINTF_LEAST32_MODIFIER
+ *       PRINTF_LEAST16_MODIFIER
+ *       PRINTF_INTPTR_MODIFIER
+ *
+ *       are strings which have been defined as the modifiers required
+ *       for the "d", "u" and "x" printf formats to correctly output
+ *       (u)intmax_t, (u)int64_t, (u)int32_t, (u)int16_t, (u)least64_t,
+ *       (u)least32_t, (u)least16_t and (u)intptr_t types respectively.
+ *       PRINTF_INTPTR_MODIFIER is not defined for some systems which
+ *       provide their own stdint.h.  PRINTF_INT64_MODIFIER is not
+ *       defined if INT64_MAX is not defined.  These are an extension
+ *       beyond what C99 specifies must be in stdint.h.
+ *
+ *       In addition, the following macros are defined:
+ *
+ *       PRINTF_INTMAX_HEX_WIDTH
+ *       PRINTF_INT64_HEX_WIDTH
+ *       PRINTF_INT32_HEX_WIDTH
+ *       PRINTF_INT16_HEX_WIDTH
+ *       PRINTF_INT8_HEX_WIDTH
+ *       PRINTF_INTMAX_DEC_WIDTH
+ *       PRINTF_INT64_DEC_WIDTH
+ *       PRINTF_INT32_DEC_WIDTH
+ *       PRINTF_INT16_DEC_WIDTH
+ *       PRINTF_INT8_DEC_WIDTH
+ *
+ *       Which specifies the maximum number of characters required to
+ *       print the number of that type in either hexadecimal or decimal.
+ *       These are an extension beyond what C99 specifies must be in
+ *       stdint.h.
+ *
+ *  Compilers tested (all with 0 warnings at their highest respective
+ *  settings): Borland Turbo C 2.0, WATCOM C/C++ 11.0 (16 bits and 32
+ *  bits), Microsoft Visual C++ 6.0 (32 bit), Microsoft Visual Studio
+ *  .net (VC7), Intel C++ 4.0, GNU gcc v3.3.3
+ *
+ *  This file should be considered a work in progress.  Suggestions for
+ *  improvements, especially those which increase coverage are strongly
+ *  encouraged.
+ *
+ *  Acknowledgements
+ *
+ *  The following people have made significant contributions to the
+ *  development and testing of this file:
+ *
+ *  Chris Howie
+ *  John Steele Scott
+ *  Dave Thorup
+ *
+ */
+
+#include <stddef.h>
+#include <limits.h>
+#include <signal.h>
+
+/*
+ *  For gcc with _STDINT_H, fill in the PRINTF_INT*_MODIFIER macros, and
+ *  do nothing else.  On the Mac OS X version of gcc this is _STDINT_H_.
+ */
+
+#if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined (__WATCOMC__) && (defined (_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) )) && !defined (_PSTDINT_H_INCLUDED)
+#include <stdint.h>
+#define _PSTDINT_H_INCLUDED
+# ifndef PRINTF_INT64_MODIFIER
+#  define PRINTF_INT64_MODIFIER "ll"
+# endif
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER "l"
+# endif
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER "h"
+# endif
+# ifndef PRINTF_INTMAX_MODIFIER
+#  define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
+# endif
+# ifndef PRINTF_INT64_HEX_WIDTH
+#  define PRINTF_INT64_HEX_WIDTH "16"
+# endif
+# ifndef PRINTF_INT32_HEX_WIDTH
+#  define PRINTF_INT32_HEX_WIDTH "8"
+# endif
+# ifndef PRINTF_INT16_HEX_WIDTH
+#  define PRINTF_INT16_HEX_WIDTH "4"
+# endif
+# ifndef PRINTF_INT8_HEX_WIDTH
+#  define PRINTF_INT8_HEX_WIDTH "2"
+# endif
+# ifndef PRINTF_INT64_DEC_WIDTH
+#  define PRINTF_INT64_DEC_WIDTH "20"
+# endif
+# ifndef PRINTF_INT32_DEC_WIDTH
+#  define PRINTF_INT32_DEC_WIDTH "10"
+# endif
+# ifndef PRINTF_INT16_DEC_WIDTH
+#  define PRINTF_INT16_DEC_WIDTH "5"
+# endif
+# ifndef PRINTF_INT8_DEC_WIDTH
+#  define PRINTF_INT8_DEC_WIDTH "3"
+# endif
+# ifndef PRINTF_INTMAX_HEX_WIDTH
+#  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
+# endif
+# ifndef PRINTF_INTMAX_DEC_WIDTH
+#  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
+# endif
+
+/*
+ *  Something really weird is going on with Open Watcom.  Just pull some of
+ *  these duplicated definitions from Open Watcom's stdint.h file for now.
+ */
+
+# if defined (__WATCOMC__) && __WATCOMC__ >= 1250
+#  if !defined (INT64_C)
+#   define INT64_C(x)   (x + (INT64_MAX - INT64_MAX))
+#  endif
+#  if !defined (UINT64_C)
+#   define UINT64_C(x)  (x + (UINT64_MAX - UINT64_MAX))
+#  endif
+#  if !defined (INT32_C)
+#   define INT32_C(x)   (x + (INT32_MAX - INT32_MAX))
+#  endif
+#  if !defined (UINT32_C)
+#   define UINT32_C(x)  (x + (UINT32_MAX - UINT32_MAX))
+#  endif
+#  if !defined (INT16_C)
+#   define INT16_C(x)   (x)
+#  endif
+#  if !defined (UINT16_C)
+#   define UINT16_C(x)  (x)
+#  endif
+#  if !defined (INT8_C)
+#   define INT8_C(x)   (x)
+#  endif
+#  if !defined (UINT8_C)
+#   define UINT8_C(x)  (x)
+#  endif
+#  if !defined (UINT64_MAX)
+#   define UINT64_MAX  18446744073709551615ULL
+#  endif
+#  if !defined (INT64_MAX)
+#   define INT64_MAX  9223372036854775807LL
+#  endif
+#  if !defined (UINT32_MAX)
+#   define UINT32_MAX  4294967295UL
+#  endif
+#  if !defined (INT32_MAX)
+#   define INT32_MAX  2147483647L
+#  endif
+#  if !defined (INTMAX_MAX)
+#   define INTMAX_MAX INT64_MAX
+#  endif
+#  if !defined (INTMAX_MIN)
+#   define INTMAX_MIN INT64_MIN
+#  endif
+# endif
+#endif
+
+#ifndef _PSTDINT_H_INCLUDED
+#define _PSTDINT_H_INCLUDED
+
+#ifndef SIZE_MAX
+# define SIZE_MAX (~(size_t)0)
+#endif
+
+/*
+ *  Deduce the type assignments from limits.h under the assumption that
+ *  integer sizes in bits are powers of 2, and follow the ANSI
+ *  definitions.
+ */
+
+#ifndef UINT8_MAX
+# define UINT8_MAX 0xff
+#endif
+#ifndef uint8_t
+# if (UCHAR_MAX == UINT8_MAX) || defined (S_SPLINT_S)
+    typedef unsigned char uint8_t;
+#   define UINT8_C(v) ((uint8_t) v)
+# else
+#   error "Platform not supported"
+# endif
+#endif
+
+#ifndef INT8_MAX
+# define INT8_MAX 0x7f
+#endif
+#ifndef INT8_MIN
+# define INT8_MIN INT8_C(0x80)
+#endif
+#ifndef int8_t
+# if (SCHAR_MAX == INT8_MAX) || defined (S_SPLINT_S)
+    typedef signed char int8_t;
+#   define INT8_C(v) ((int8_t) v)
+# else
+#   error "Platform not supported"
+# endif
+#endif
+
+#ifndef UINT16_MAX
+# define UINT16_MAX 0xffff
+#endif
+#ifndef uint16_t
+#if (UINT_MAX == UINT16_MAX) || defined (S_SPLINT_S)
+  typedef unsigned int uint16_t;
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER ""
+# endif
+# define UINT16_C(v) ((uint16_t) (v))
+#elif (USHRT_MAX == UINT16_MAX)
+  typedef unsigned short uint16_t;
+# define UINT16_C(v) ((uint16_t) (v))
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER "h"
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+#ifndef INT16_MAX
+# define INT16_MAX 0x7fff
+#endif
+#ifndef INT16_MIN
+# define INT16_MIN INT16_C(0x8000)
+#endif
+#ifndef int16_t
+#if (INT_MAX == INT16_MAX) || defined (S_SPLINT_S)
+  typedef signed int int16_t;
+# define INT16_C(v) ((int16_t) (v))
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER ""
+# endif
+#elif (SHRT_MAX == INT16_MAX)
+  typedef signed short int16_t;
+# define INT16_C(v) ((int16_t) (v))
+# ifndef PRINTF_INT16_MODIFIER
+#  define PRINTF_INT16_MODIFIER "h"
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+#ifndef UINT32_MAX
+# define UINT32_MAX (0xffffffffUL)
+#endif
+#ifndef uint32_t
+#if (ULONG_MAX == UINT32_MAX) || defined (S_SPLINT_S)
+  typedef unsigned long uint32_t;
+# define UINT32_C(v) v ## UL
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER "l"
+# endif
+#elif (UINT_MAX == UINT32_MAX)
+  typedef unsigned int uint32_t;
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER ""
+# endif
+# define UINT32_C(v) v ## U
+#elif (USHRT_MAX == UINT32_MAX)
+  typedef unsigned short uint32_t;
+# define UINT32_C(v) ((unsigned short) (v))
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER ""
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+#ifndef INT32_MAX
+# define INT32_MAX (0x7fffffffL)
+#endif
+#ifndef INT32_MIN
+# define INT32_MIN INT32_C(0x80000000)
+#endif
+#ifndef int32_t
+#if (LONG_MAX == INT32_MAX) || defined (S_SPLINT_S)
+  typedef signed long int32_t;
+# define INT32_C(v) v ## L
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER "l"
+# endif
+#elif (INT_MAX == INT32_MAX)
+  typedef signed int int32_t;
+# define INT32_C(v) v
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER ""
+# endif
+#elif (SHRT_MAX == INT32_MAX)
+  typedef signed short int32_t;
+# define INT32_C(v) ((short) (v))
+# ifndef PRINTF_INT32_MODIFIER
+#  define PRINTF_INT32_MODIFIER ""
+# endif
+#else
+#error "Platform not supported"
+#endif
+#endif
+
+/*
+ *  The macro stdint_int64_defined is temporarily used to record
+ *  whether or not 64 integer support is available.  It must be
+ *  defined for any 64 integer extensions for new platforms that are
+ *  added.
+ */
+
+#undef stdint_int64_defined
+#if (defined(__STDC__) && defined(__STDC_VERSION__)) || defined (S_SPLINT_S)
+# if (__STDC__ && __STDC_VERSION >= 199901L) || defined (S_SPLINT_S)
+#  define stdint_int64_defined
+   typedef long long int64_t;
+   typedef unsigned long long uint64_t;
+#  define UINT64_C(v) v ## ULL
+#  define  INT64_C(v) v ## LL
+#  ifndef PRINTF_INT64_MODIFIER
+#   define PRINTF_INT64_MODIFIER "ll"
+#  endif
+# endif
+#endif
+
+#if !defined (stdint_int64_defined)
+# if defined(__GNUC__)
+#  define stdint_int64_defined
+   __extension__ typedef long long int64_t;
+   __extension__ typedef unsigned long long uint64_t;
+#  define UINT64_C(v) v ## ULL
+#  define  INT64_C(v) v ## LL
+#  ifndef PRINTF_INT64_MODIFIER
+#   define PRINTF_INT64_MODIFIER "ll"
+#  endif
+# elif defined(__MWERKS__) || defined (__SUNPRO_C) || defined (__SUNPRO_CC) || defined (__APPLE_CC__) || defined (_LONG_LONG) || defined (_CRAYC) || defined (S_SPLINT_S)
+#  define stdint_int64_defined
+   typedef long long int64_t;
+   typedef unsigned long long uint64_t;
+#  define UINT64_C(v) v ## ULL
+#  define  INT64_C(v) v ## LL
+#  ifndef PRINTF_INT64_MODIFIER
+#   define PRINTF_INT64_MODIFIER "ll"
+#  endif
+# elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined (__BORLANDC__) && __BORLANDC__ > 0x460) || defined (__alpha) || defined (__DECC)
+#  define stdint_int64_defined
+   typedef __int64 int64_t;
+   typedef unsigned __int64 uint64_t;
+#  define UINT64_C(v) v ## UI64
+#  define  INT64_C(v) v ## I64
+#  ifndef PRINTF_INT64_MODIFIER
+#   define PRINTF_INT64_MODIFIER "I64"
+#  endif
+# endif
+#endif
+
+#if !defined (LONG_LONG_MAX) && defined (INT64_C)
+# define LONG_LONG_MAX INT64_C (9223372036854775807)
+#endif
+#ifndef ULONG_LONG_MAX
+# define ULONG_LONG_MAX UINT64_C (18446744073709551615)
+#endif
+
+#if !defined (INT64_MAX) && defined (INT64_C)
+# define INT64_MAX INT64_C (9223372036854775807)
+#endif
+#if !defined (INT64_MIN) && defined (INT64_C)
+# define INT64_MIN INT64_C (-9223372036854775808)
+#endif
+#if !defined (UINT64_MAX) && defined (INT64_C)
+# define UINT64_MAX UINT64_C (18446744073709551615)
+#endif
+
+/*
+ *  Width of hexadecimal for number field.
+ */
+
+#ifndef PRINTF_INT64_HEX_WIDTH
+# define PRINTF_INT64_HEX_WIDTH "16"
+#endif
+#ifndef PRINTF_INT32_HEX_WIDTH
+# define PRINTF_INT32_HEX_WIDTH "8"
+#endif
+#ifndef PRINTF_INT16_HEX_WIDTH
+# define PRINTF_INT16_HEX_WIDTH "4"
+#endif
+#ifndef PRINTF_INT8_HEX_WIDTH
+# define PRINTF_INT8_HEX_WIDTH "2"
+#endif
+
+#ifndef PRINTF_INT64_DEC_WIDTH
+# define PRINTF_INT64_DEC_WIDTH "20"
+#endif
+#ifndef PRINTF_INT32_DEC_WIDTH
+# define PRINTF_INT32_DEC_WIDTH "10"
+#endif
+#ifndef PRINTF_INT16_DEC_WIDTH
+# define PRINTF_INT16_DEC_WIDTH "5"
+#endif
+#ifndef PRINTF_INT8_DEC_WIDTH
+# define PRINTF_INT8_DEC_WIDTH "3"
+#endif
+
+/*
+ *  Ok, lets not worry about 128 bit integers for now.  Moore's law says
+ *  we don't need to worry about that until about 2040 at which point
+ *  we'll have bigger things to worry about.
+ */
+
+#ifdef stdint_int64_defined
+  typedef int64_t intmax_t;
+  typedef uint64_t uintmax_t;
+# define  INTMAX_MAX   INT64_MAX
+# define  INTMAX_MIN   INT64_MIN
+# define UINTMAX_MAX  UINT64_MAX
+# define UINTMAX_C(v) UINT64_C(v)
+# define  INTMAX_C(v)  INT64_C(v)
+# ifndef PRINTF_INTMAX_MODIFIER
+#   define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
+# endif
+# ifndef PRINTF_INTMAX_HEX_WIDTH
+#  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
+# endif
+# ifndef PRINTF_INTMAX_DEC_WIDTH
+#  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
+# endif
+#else
+  typedef int32_t intmax_t;
+  typedef uint32_t uintmax_t;
+# define  INTMAX_MAX   INT32_MAX
+# define UINTMAX_MAX  UINT32_MAX
+# define UINTMAX_C(v) UINT32_C(v)
+# define  INTMAX_C(v)  INT32_C(v)
+# ifndef PRINTF_INTMAX_MODIFIER
+#   define PRINTF_INTMAX_MODIFIER PRINTF_INT32_MODIFIER
+# endif
+# ifndef PRINTF_INTMAX_HEX_WIDTH
+#  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT32_HEX_WIDTH
+# endif
+# ifndef PRINTF_INTMAX_DEC_WIDTH
+#  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT32_DEC_WIDTH
+# endif
+#endif
+
+/*
+ *  Because this file currently only supports platforms which have
+ *  precise powers of 2 as bit sizes for the default integers, the
+ *  least definitions are all trivial.  Its possible that a future
+ *  version of this file could have different definitions.
+ */
+
+#ifndef stdint_least_defined
+  typedef   int8_t   int_least8_t;
+  typedef  uint8_t  uint_least8_t;
+  typedef  int16_t  int_least16_t;
+  typedef uint16_t uint_least16_t;
+  typedef  int32_t  int_least32_t;
+  typedef uint32_t uint_least32_t;
+# define PRINTF_LEAST32_MODIFIER PRINTF_INT32_MODIFIER
+# define PRINTF_LEAST16_MODIFIER PRINTF_INT16_MODIFIER
+# define  UINT_LEAST8_MAX  UINT8_MAX
+# define   INT_LEAST8_MAX   INT8_MAX
+# define UINT_LEAST16_MAX UINT16_MAX
+# define  INT_LEAST16_MAX  INT16_MAX
+# define UINT_LEAST32_MAX UINT32_MAX
+# define  INT_LEAST32_MAX  INT32_MAX
+# define   INT_LEAST8_MIN   INT8_MIN
+# define  INT_LEAST16_MIN  INT16_MIN
+# define  INT_LEAST32_MIN  INT32_MIN
+# ifdef stdint_int64_defined
+    typedef  int64_t  int_least64_t;
+    typedef uint64_t uint_least64_t;
+#   define PRINTF_LEAST64_MODIFIER PRINTF_INT64_MODIFIER
+#   define UINT_LEAST64_MAX UINT64_MAX
+#   define  INT_LEAST64_MAX  INT64_MAX
+#   define  INT_LEAST64_MIN  INT64_MIN
+# endif
+#endif
+#undef stdint_least_defined
+
+/*
+ *  The ANSI C committee pretending to know or specify anything about
+ *  performance is the epitome of misguided arrogance.  The mandate of
+ *  this file is to *ONLY* ever support that absolute minimum
+ *  definition of the fast integer types, for compatibility purposes.
+ *  No extensions, and no attempt to suggest what may or may not be a
+ *  faster integer type will ever be made in this file.  Developers are
+ *  warned to stay away from these types when using this or any other
+ *  stdint.h.
+ */
+
+typedef   int_least8_t   int_fast8_t;
+typedef  uint_least8_t  uint_fast8_t;
+typedef  int_least16_t  int_fast16_t;
+typedef uint_least16_t uint_fast16_t;
+typedef  int_least32_t  int_fast32_t;
+typedef uint_least32_t uint_fast32_t;
+#define  UINT_FAST8_MAX  UINT_LEAST8_MAX
+#define   INT_FAST8_MAX   INT_LEAST8_MAX
+#define UINT_FAST16_MAX UINT_LEAST16_MAX
+#define  INT_FAST16_MAX  INT_LEAST16_MAX
+#define UINT_FAST32_MAX UINT_LEAST32_MAX
+#define  INT_FAST32_MAX  INT_LEAST32_MAX
+#define   INT_FAST8_MIN   INT_LEAST8_MIN
+#define  INT_FAST16_MIN  INT_LEAST16_MIN
+#define  INT_FAST32_MIN  INT_LEAST32_MIN
+#ifdef stdint_int64_defined
+  typedef  int_least64_t  int_fast64_t;
+  typedef uint_least64_t uint_fast64_t;
+# define UINT_FAST64_MAX UINT_LEAST64_MAX
+# define  INT_FAST64_MAX  INT_LEAST64_MAX
+# define  INT_FAST64_MIN  INT_LEAST64_MIN
+#endif
+
+#undef stdint_int64_defined
+
+/*
+ *  Whatever piecemeal, per compiler thing we can do about the wchar_t
+ *  type limits.
+ */
+
+#if defined(__WATCOMC__) || defined(_MSC_VER) || defined (__GNUC__)
+# include <wchar.h>
+# ifndef WCHAR_MIN
+#  define WCHAR_MIN 0
+# endif
+# ifndef WCHAR_MAX
+#  define WCHAR_MAX ((wchar_t)-1)
+# endif
+#endif
+
+/*
+ *  Whatever piecemeal, per compiler/platform thing we can do about the
+ *  (u)intptr_t types and limits.
+ */
+
+#if defined (_MSC_VER) && defined (_UINTPTR_T_DEFINED)
+# define STDINT_H_UINTPTR_T_DEFINED
+#endif
+
+#ifndef STDINT_H_UINTPTR_T_DEFINED
+# if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) || defined (_WIN64)
+#  define stdint_intptr_bits 64
+# elif defined (__WATCOMC__) || defined (__TURBOC__)
+#  if defined(__TINY__) || defined(__SMALL__) || defined(__MEDIUM__)
+#    define stdint_intptr_bits 16
+#  else
+#    define stdint_intptr_bits 32
+#  endif
+# elif defined (__i386__) || defined (_WIN32) || defined (WIN32)
+#  define stdint_intptr_bits 32
+# elif defined (__INTEL_COMPILER)
+/* TODO -- what will Intel do about x86-64? */
+# endif
+
+# ifdef stdint_intptr_bits
+#  define stdint_intptr_glue3_i(a,b,c)  a##b##c
+#  define stdint_intptr_glue3(a,b,c)    stdint_intptr_glue3_i(a,b,c)
+#  ifndef PRINTF_INTPTR_MODIFIER
+#    define PRINTF_INTPTR_MODIFIER      stdint_intptr_glue3(PRINTF_INT,stdint_intptr_bits,_MODIFIER)
+#  endif
+#  ifndef PTRDIFF_MAX
+#    define PTRDIFF_MAX                 stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
+#  endif
+#  ifndef PTRDIFF_MIN
+#    define PTRDIFF_MIN                 stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
+#  endif
+#  ifndef UINTPTR_MAX
+#    define UINTPTR_MAX                 stdint_intptr_glue3(UINT,stdint_intptr_bits,_MAX)
+#  endif
+#  ifndef INTPTR_MAX
+#    define INTPTR_MAX                  stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
+#  endif
+#  ifndef INTPTR_MIN
+#    define INTPTR_MIN                  stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
+#  endif
+#  ifndef INTPTR_C
+#    define INTPTR_C(x)                 stdint_intptr_glue3(INT,stdint_intptr_bits,_C)(x)
+#  endif
+#  ifndef UINTPTR_C
+#    define UINTPTR_C(x)                stdint_intptr_glue3(UINT,stdint_intptr_bits,_C)(x)
+#  endif
+  typedef stdint_intptr_glue3(uint,stdint_intptr_bits,_t) uintptr_t;
+  typedef stdint_intptr_glue3( int,stdint_intptr_bits,_t)  intptr_t;
+# else
+/* TODO -- This following is likely wrong for some platforms, and does
+   nothing for the definition of uintptr_t. */
+  typedef ptrdiff_t intptr_t;
+# endif
+# define STDINT_H_UINTPTR_T_DEFINED
+#endif
+
+/*
+ *  Assumes sig_atomic_t is signed and we have a 2s complement machine.
+ */
+
+#ifndef SIG_ATOMIC_MAX
+# define SIG_ATOMIC_MAX ((((sig_atomic_t) 1) << (sizeof (sig_atomic_t)*CHAR_BIT-1)) - 1)
+#endif
+
+#endif
+
+#if defined (__TEST_PSTDINT_FOR_CORRECTNESS)
+
+/* 
+ *  Please compile with the maximum warning settings to make sure macros are not
+ *  defined more than once.
+ */
+ 
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+ 
+#define glue3_aux(x,y,z) x ## y ## z
+#define glue3(x,y,z) glue3_aux(x,y,z)
+
+#define DECLU(bits) glue3(uint,bits,_t) glue3(u,bits,=) glue3(UINT,bits,_C) (0);
+#define DECLI(bits) glue3(int,bits,_t) glue3(i,bits,=) glue3(INT,bits,_C) (0);
+
+#define DECL(us,bits) glue3(DECL,us,) (bits)
+
+#define TESTUMAX(bits) glue3(u,bits,=) glue3(~,u,bits); if (glue3(UINT,bits,_MAX) glue3(!=,u,bits)) printf ("Something wrong with UINT%d_MAX\n", bits)
+ 
+int main () {
+  DECL(I,8)
+  DECL(U,8)
+  DECL(I,16)
+  DECL(U,16)
+  DECL(I,32)
+  DECL(U,32)
+#ifdef INT64_MAX
+  DECL(I,64)
+  DECL(U,64)
+#endif
+  intmax_t imax = INTMAX_C(0);
+  uintmax_t umax = UINTMAX_C(0);
+  char str0[256], str1[256];
+
+  sprintf (str0, "%d %x\n", 0, ~0);
+  
+  sprintf (str1, "%d %x\n",  i8, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with i8 : %s\n", str1);
+  sprintf (str1, "%u %x\n",  u8, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with u8 : %s\n", str1);
+  sprintf (str1, "%d %x\n",  i16, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with i16 : %s\n", str1);
+  sprintf (str1, "%u %x\n",  u16, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with u16 : %s\n", str1);	
+  sprintf (str1, "%" PRINTF_INT32_MODIFIER "d %x\n",  i32, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with i32 : %s\n", str1);
+  sprintf (str1, "%" PRINTF_INT32_MODIFIER "u %x\n",  u32, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with u32 : %s\n", str1);
+#ifdef INT64_MAX	
+  sprintf (str1, "%" PRINTF_INT64_MODIFIER "d %x\n",  i64, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with i64 : %s\n", str1);
+#endif
+  sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n",  imax, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with imax : %s\n", str1);
+  sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n",  umax, ~0);
+  if (0 != strcmp (str0, str1)) printf ("Something wrong with umax : %s\n", str1);	
+  
+  TESTUMAX(8);
+  TESTUMAX(16);
+  TESTUMAX(32);
+#ifdef INT64_MAX
+  TESTUMAX(64);
+#endif
+
+  return EXIT_SUCCESS;
+}
+
+#endif
diff --git a/sha1.cpp b/sha1.cpp
index 9578438..0e23c31 100644
--- a/sha1.cpp
+++ b/sha1.cpp
@@ -1,325 +1,325 @@
-/*

-SHA-1 in C

-By Steve Reid <sreid@sea-to-sky.net>

-100% Public Domain

-

------------------

-Modified 7/98

-By James H. Brown <jbrown@burgoyne.com>

-Still 100% Public Domain

-

-Corrected a problem which generated improper hash values on 16 bit machines

-Routine SHA1Update changed from

-  void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned int

-len)

-to

-  void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned

-long len)

-

-The 'len' parameter was declared an int which works fine on 32 bit machines.

-However, on 16 bit machines an int is too small for the shifts being done

-against

-it.  This caused the hash function to generate incorrect values if len was

-greater than 8191 (8K - 1) due to the 'len << 3' on line 3 of SHA1Update().

-

-Since the file IO in main() reads 16K at a time, any file 8K or larger would

-be guaranteed to generate the wrong hash (e.g. Test Vector #3, a million

-"a"s).

-

-I also changed the declaration of variables i & j in SHA1Update to

-unsigned long from unsigned int for the same reason.

-

-These changes should make no difference to any 32 bit implementations since

-an

-int and a long are the same size in those environments.

-

---

-I also corrected a few compiler warnings generated by Borland C.

-1. Added #include <process.h> for exit() prototype

-2. Removed unused variable 'j' in SHA1Final

-3. Changed exit(0) to return(0) at end of main.

-

-ALL changes I made can be located by searching for comments containing 'JHB'

------------------

-Modified 8/98

-By Steve Reid <sreid@sea-to-sky.net>

-Still 100% public domain

-

-1- Removed #include <process.h> and used return() instead of exit()

-2- Fixed overwriting of finalcount in SHA1Final() (discovered by Chris Hall)

-3- Changed email address from steve@edmweb.com to sreid@sea-to-sky.net

-

------------------

-Modified 4/01

-By Saul Kravitz <Saul.Kravitz@celera.com>

-Still 100% PD

-Modified to run on Compaq Alpha hardware.

-

------------------

-Modified 07/2002

-By Ralph Giles <giles@ghostscript.com>

-Still 100% public domain

-modified for use with stdint types, autoconf

-code cleanup, removed attribution comments

-switched SHA1Final() argument order for consistency

-use SHA1_ prefix for public api

-move public api to sha1.h

-*/

-

-/*

-Test Vectors (from FIPS PUB 180-1)

-"abc"

-  A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D

-"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"

-  84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1

-A million repetitions of "a"

-  34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F

-*/

-

-#include <stdio.h>

-#include <string.h>

-#include <stdlib.h>

-

-#include "sha1.h"

-

-#if defined(_MSC_VER)

-#pragma warning(disable : 4267)

-#pragma warning(disable : 4996)

-#pragma warning(disable : 4100)

-#endif

-

-void SHA1_Transform(uint32_t state[5], const uint8_t buffer[64]);

-

-#define rol ROTL32

-

-/* blk0() and blk() perform the initial expand. */

-/* I got the idea of expanding during the round function from SSLeay */

-/* FIXME: can we do this in an endian-proof way? */

-

-#ifdef WORDS_BIGENDIAN

-#define blk0(i) block->l[i]

-#else

-#define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) | (rol(block->l[i],8)&0x00FF00FF))

-#endif

-#define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] ^ block->l[(i+2)&15]^block->l[i&15],1))

-

-/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */

-#define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30);

-#define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30);

-#define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30);

-#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30);

-#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30);

-

-

-/* Hash a single 512-bit block. This is the core of the algorithm. */

-void SHA1_Transform(uint32_t state[5], const uint8_t buffer[64])

-{

-    uint32_t a, b, c, d, e;

-    typedef union {

-        uint8_t c[64];

-        uint32_t l[16];

-    } CHAR64LONG16;

-    CHAR64LONG16* block;

-

-    block = (CHAR64LONG16*)buffer;

-

-    /* Copy context->state[] to working vars */

-    a = state[0];

-    b = state[1];

-    c = state[2];

-    d = state[3];

-    e = state[4];

-

-    /* 4 rounds of 20 operations each. Loop unrolled. */

-    R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);

-    R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);

-    R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);

-    R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);

-    R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);

-    R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);

-    R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);

-    R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);

-    R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);

-    R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);

-    R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);

-    R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);

-    R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);

-    R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);

-    R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);

-    R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);

-    R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);

-    R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);

-    R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);

-    R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);

-

-    /* Add the working vars back into context.state[] */

-    state[0] += a;

-    state[1] += b;

-    state[2] += c;

-    state[3] += d;

-    state[4] += e;

-

-    /* Wipe variables */

-    a = b = c = d = e = 0;

-}

-

-

-/* SHA1Init - Initialize new context */

-void SHA1_Init(SHA1_CTX* context)

-{

-    /* SHA1 initialization constants */

-    context->state[0] = 0x67452301;

-    context->state[1] = 0xEFCDAB89;

-    context->state[2] = 0x98BADCFE;

-    context->state[3] = 0x10325476;

-    context->state[4] = 0xC3D2E1F0;

-    context->count[0] = 0;

-  context->count[1] = 0;

-}

-

-

-/* Run your data through this. */

-void SHA1_Update(SHA1_CTX* context, const uint8_t* data, const size_t len)

-{

-    size_t i, j;

-

-    j = (context->count[0] >> 3) & 63;

-    if ((context->count[0] += len << 3) < (len << 3)) context->count[1]++;

-

-    context->count[1] += (len >> 29);

-

-    if ((j + len) > 63) 

-  {

-        memcpy(&context->buffer[j], data, (i = 64-j));

-        SHA1_Transform(context->state, context->buffer);

-

-        for ( ; i + 63 < len; i += 64) 

-    {

-            SHA1_Transform(context->state, data + i);

-        }

-

-        j = 0;

-    }

-    else i = 0;

-    memcpy(&context->buffer[j], &data[i], len - i);

-}

-

-

-/* Add padding and return the message digest. */

-void SHA1_Final(SHA1_CTX* context, uint8_t digest[SHA1_DIGEST_SIZE])

-{

-    uint32_t i;

-    uint8_t  finalcount[8];

-

-    for (i = 0; i < 8; i++) {

-        finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)]

-         >> ((3-(i & 3)) * 8) ) & 255);  /* Endian independent */

-    }

-    SHA1_Update(context, (uint8_t *)"\200", 1);

-    while ((context->count[0] & 504) != 448) {

-        SHA1_Update(context, (uint8_t *)"\0", 1);

-    }

-    SHA1_Update(context, finalcount, 8);  /* Should cause a SHA1_Transform() */

-    for (i = 0; i < SHA1_DIGEST_SIZE; i++) {

-        digest[i] = (uint8_t)

-         ((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255);

-    }

-

-    /* Wipe variables */

-    i = 0;

-    memset(context->buffer, 0, 64);

-    memset(context->state, 0, 20);

-    memset(context->count, 0, 8);

-    memset(finalcount, 0, 8);	/* SWR */

-}

-

-//-----------------------------------------------------------------------------

-

-void sha1_32a ( const void * key, int len, uint32_t seed, void * out )

-{

-  SHA1_CTX context;

-

-  uint8_t digest[20];

-

-  SHA1_Init(&context);

-  SHA1_Update(&context, (uint8_t*)key, len);

-  SHA1_Final(&context, digest);

-

-  memcpy(out,&digest[0],4);

-}

-

-//-----------------------------------------------------------------------------

-// self test

-

-//#define TEST

-

-#ifdef TEST

-

-static char *test_data[] = {

-    "abc",

-    "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",

-    "A million repetitions of 'a'"};

-static char *test_results[] = {

-    "A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D",

-    "84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1",

-    "34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F"};

-

-

-void digest_to_hex(const uint8_t digest[SHA1_DIGEST_SIZE], char *output)

-{

-    int i,j;

-    char *c = output;

-

-    for (i = 0; i < SHA1_DIGEST_SIZE/4; i++) {

-        for (j = 0; j < 4; j++) {

-            sprintf(c,"%02X", digest[i*4+j]);

-            c += 2;

-        }

-        sprintf(c, " ");

-        c += 1;

-    }

-    *(c - 1) = '\0';

-}

-

-int main(int argc, char** argv)

-{

-    int k;

-    SHA1_CTX context;

-    uint8_t digest[20];

-    char output[80];

-

-    fprintf(stdout, "verifying SHA-1 implementation... ");

-

-    for (k = 0; k < 2; k++){

-        SHA1_Init(&context);

-        SHA1_Update(&context, (uint8_t*)test_data[k], strlen(test_data[k]));

-        SHA1_Final(&context, digest);

-  digest_to_hex(digest, output);

-

-        if (strcmp(output, test_results[k])) {

-            fprintf(stdout, "FAIL\n");

-            fprintf(stderr,"* hash of \"%s\" incorrect:\n", test_data[k]);

-            fprintf(stderr,"\t%s returned\n", output);

-            fprintf(stderr,"\t%s is correct\n", test_results[k]);

-            return (1);

-        }

-    }

-    /* million 'a' vector we feed separately */

-    SHA1_Init(&context);

-    for (k = 0; k < 1000000; k++)

-        SHA1_Update(&context, (uint8_t*)"a", 1);

-    SHA1_Final(&context, digest);

-    digest_to_hex(digest, output);

-    if (strcmp(output, test_results[2])) {

-        fprintf(stdout, "FAIL\n");

-        fprintf(stderr,"* hash of \"%s\" incorrect:\n", test_data[2]);

-        fprintf(stderr,"\t%s returned\n", output);

-        fprintf(stderr,"\t%s is correct\n", test_results[2]);

-        return (1);

-    }

-

-    /* success */

-    fprintf(stdout, "ok\n");

-    return(0);

-}

-#endif /* TEST */

+/*
+SHA-1 in C
+By Steve Reid <sreid@sea-to-sky.net>
+100% Public Domain
+
+-----------------
+Modified 7/98
+By James H. Brown <jbrown@burgoyne.com>
+Still 100% Public Domain
+
+Corrected a problem which generated improper hash values on 16 bit machines
+Routine SHA1Update changed from
+  void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned int
+len)
+to
+  void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned
+long len)
+
+The 'len' parameter was declared an int which works fine on 32 bit machines.
+However, on 16 bit machines an int is too small for the shifts being done
+against
+it.  This caused the hash function to generate incorrect values if len was
+greater than 8191 (8K - 1) due to the 'len << 3' on line 3 of SHA1Update().
+
+Since the file IO in main() reads 16K at a time, any file 8K or larger would
+be guaranteed to generate the wrong hash (e.g. Test Vector #3, a million
+"a"s).
+
+I also changed the declaration of variables i & j in SHA1Update to
+unsigned long from unsigned int for the same reason.
+
+These changes should make no difference to any 32 bit implementations since
+an
+int and a long are the same size in those environments.
+
+--
+I also corrected a few compiler warnings generated by Borland C.
+1. Added #include <process.h> for exit() prototype
+2. Removed unused variable 'j' in SHA1Final
+3. Changed exit(0) to return(0) at end of main.
+
+ALL changes I made can be located by searching for comments containing 'JHB'
+-----------------
+Modified 8/98
+By Steve Reid <sreid@sea-to-sky.net>
+Still 100% public domain
+
+1- Removed #include <process.h> and used return() instead of exit()
+2- Fixed overwriting of finalcount in SHA1Final() (discovered by Chris Hall)
+3- Changed email address from steve@edmweb.com to sreid@sea-to-sky.net
+
+-----------------
+Modified 4/01
+By Saul Kravitz <Saul.Kravitz@celera.com>
+Still 100% PD
+Modified to run on Compaq Alpha hardware.
+
+-----------------
+Modified 07/2002
+By Ralph Giles <giles@ghostscript.com>
+Still 100% public domain
+modified for use with stdint types, autoconf
+code cleanup, removed attribution comments
+switched SHA1Final() argument order for consistency
+use SHA1_ prefix for public api
+move public api to sha1.h
+*/
+
+/*
+Test Vectors (from FIPS PUB 180-1)
+"abc"
+  A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D
+"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
+  84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1
+A million repetitions of "a"
+  34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "sha1.h"
+
+#if defined(_MSC_VER)
+#pragma warning(disable : 4267)
+#pragma warning(disable : 4996)
+#pragma warning(disable : 4100)
+#endif
+
+void SHA1_Transform(uint32_t state[5], const uint8_t buffer[64]);
+
+#define rol ROTL32
+
+/* blk0() and blk() perform the initial expand. */
+/* I got the idea of expanding during the round function from SSLeay */
+/* FIXME: can we do this in an endian-proof way? */
+
+#ifdef WORDS_BIGENDIAN
+#define blk0(i) block->l[i]
+#else
+#define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) | (rol(block->l[i],8)&0x00FF00FF))
+#endif
+#define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] ^ block->l[(i+2)&15]^block->l[i&15],1))
+
+/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
+#define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30);
+#define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30);
+#define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30);
+#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30);
+#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30);
+
+
+/* Hash a single 512-bit block. This is the core of the algorithm. */
+void SHA1_Transform(uint32_t state[5], const uint8_t buffer[64])
+{
+    uint32_t a, b, c, d, e;
+    typedef union {
+        uint8_t c[64];
+        uint32_t l[16];
+    } CHAR64LONG16;
+    CHAR64LONG16* block;
+
+    block = (CHAR64LONG16*)buffer;
+
+    /* Copy context->state[] to working vars */
+    a = state[0];
+    b = state[1];
+    c = state[2];
+    d = state[3];
+    e = state[4];
+
+    /* 4 rounds of 20 operations each. Loop unrolled. */
+    R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
+    R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
+    R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
+    R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
+    R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
+    R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
+    R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
+    R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
+    R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
+    R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
+    R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
+    R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
+    R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
+    R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
+    R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
+    R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
+    R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
+    R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
+    R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
+    R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
+
+    /* Add the working vars back into context.state[] */
+    state[0] += a;
+    state[1] += b;
+    state[2] += c;
+    state[3] += d;
+    state[4] += e;
+
+    /* Wipe variables */
+    a = b = c = d = e = 0;
+}
+
+
+/* SHA1Init - Initialize new context */
+void SHA1_Init(SHA1_CTX* context)
+{
+    /* SHA1 initialization constants */
+    context->state[0] = 0x67452301;
+    context->state[1] = 0xEFCDAB89;
+    context->state[2] = 0x98BADCFE;
+    context->state[3] = 0x10325476;
+    context->state[4] = 0xC3D2E1F0;
+    context->count[0] = 0;
+  context->count[1] = 0;
+}
+
+
+/* Run your data through this. */
+void SHA1_Update(SHA1_CTX* context, const uint8_t* data, const size_t len)
+{
+    size_t i, j;
+
+    j = (context->count[0] >> 3) & 63;
+    if ((context->count[0] += len << 3) < (len << 3)) context->count[1]++;
+
+    context->count[1] += (len >> 29);
+
+    if ((j + len) > 63) 
+  {
+        memcpy(&context->buffer[j], data, (i = 64-j));
+        SHA1_Transform(context->state, context->buffer);
+
+        for ( ; i + 63 < len; i += 64) 
+    {
+            SHA1_Transform(context->state, data + i);
+        }
+
+        j = 0;
+    }
+    else i = 0;
+    memcpy(&context->buffer[j], &data[i], len - i);
+}
+
+
+/* Add padding and return the message digest. */
+void SHA1_Final(SHA1_CTX* context, uint8_t digest[SHA1_DIGEST_SIZE])
+{
+    uint32_t i;
+    uint8_t  finalcount[8];
+
+    for (i = 0; i < 8; i++) {
+        finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)]
+         >> ((3-(i & 3)) * 8) ) & 255);  /* Endian independent */
+    }
+    SHA1_Update(context, (uint8_t *)"\200", 1);
+    while ((context->count[0] & 504) != 448) {
+        SHA1_Update(context, (uint8_t *)"\0", 1);
+    }
+    SHA1_Update(context, finalcount, 8);  /* Should cause a SHA1_Transform() */
+    for (i = 0; i < SHA1_DIGEST_SIZE; i++) {
+        digest[i] = (uint8_t)
+         ((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255);
+    }
+
+    /* Wipe variables */
+    i = 0;
+    memset(context->buffer, 0, 64);
+    memset(context->state, 0, 20);
+    memset(context->count, 0, 8);
+    memset(finalcount, 0, 8);	/* SWR */
+}
+
+//-----------------------------------------------------------------------------
+
+void sha1_32a ( const void * key, int len, uint32_t seed, void * out )
+{
+  SHA1_CTX context;
+
+  uint8_t digest[20];
+
+  SHA1_Init(&context);
+  SHA1_Update(&context, (uint8_t*)key, len);
+  SHA1_Final(&context, digest);
+
+  memcpy(out,&digest[0],4);
+}
+
+//-----------------------------------------------------------------------------
+// self test
+
+//#define TEST
+
+#ifdef TEST
+
+static char *test_data[] = {
+    "abc",
+    "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
+    "A million repetitions of 'a'"};
+static char *test_results[] = {
+    "A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D",
+    "84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1",
+    "34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F"};
+
+
+void digest_to_hex(const uint8_t digest[SHA1_DIGEST_SIZE], char *output)
+{
+    int i,j;
+    char *c = output;
+
+    for (i = 0; i < SHA1_DIGEST_SIZE/4; i++) {
+        for (j = 0; j < 4; j++) {
+            sprintf(c,"%02X", digest[i*4+j]);
+            c += 2;
+        }
+        sprintf(c, " ");
+        c += 1;
+    }
+    *(c - 1) = '\0';
+}
+
+int main(int argc, char** argv)
+{
+    int k;
+    SHA1_CTX context;
+    uint8_t digest[20];
+    char output[80];
+
+    fprintf(stdout, "verifying SHA-1 implementation... ");
+
+    for (k = 0; k < 2; k++){
+        SHA1_Init(&context);
+        SHA1_Update(&context, (uint8_t*)test_data[k], strlen(test_data[k]));
+        SHA1_Final(&context, digest);
+  digest_to_hex(digest, output);
+
+        if (strcmp(output, test_results[k])) {
+            fprintf(stdout, "FAIL\n");
+            fprintf(stderr,"* hash of \"%s\" incorrect:\n", test_data[k]);
+            fprintf(stderr,"\t%s returned\n", output);
+            fprintf(stderr,"\t%s is correct\n", test_results[k]);
+            return (1);
+        }
+    }
+    /* million 'a' vector we feed separately */
+    SHA1_Init(&context);
+    for (k = 0; k < 1000000; k++)
+        SHA1_Update(&context, (uint8_t*)"a", 1);
+    SHA1_Final(&context, digest);
+    digest_to_hex(digest, output);
+    if (strcmp(output, test_results[2])) {
+        fprintf(stdout, "FAIL\n");
+        fprintf(stderr,"* hash of \"%s\" incorrect:\n", test_data[2]);
+        fprintf(stderr,"\t%s returned\n", output);
+        fprintf(stderr,"\t%s is correct\n", test_results[2]);
+        return (1);
+    }
+
+    /* success */
+    fprintf(stdout, "ok\n");
+    return(0);
+}
+#endif /* TEST */
diff --git a/sha1.h b/sha1.h
index b81088f..16b10a1 100644
--- a/sha1.h
+++ b/sha1.h
@@ -1,21 +1,21 @@
-/* public api for steve reid's public domain SHA-1 implementation */

-/* this file is in the public domain */

-

-#pragma once

-

-#include "Platform.h"

-

-struct SHA1_CTX

-{

-    uint32_t state[5];

-    uint32_t count[2];

-    uint8_t  buffer[64];

-};

-

-#define SHA1_DIGEST_SIZE 20

-

-void SHA1_Init(SHA1_CTX* context);

-void SHA1_Update(SHA1_CTX* context, const uint8_t* data, const size_t len);

-void SHA1_Final(SHA1_CTX* context, uint8_t digest[SHA1_DIGEST_SIZE]);

-

+/* public api for steve reid's public domain SHA-1 implementation */
+/* this file is in the public domain */
+
+#pragma once
+
+#include "Platform.h"
+
+struct SHA1_CTX
+{
+    uint32_t state[5];
+    uint32_t count[2];
+    uint8_t  buffer[64];
+};
+
+#define SHA1_DIGEST_SIZE 20
+
+void SHA1_Init(SHA1_CTX* context);
+void SHA1_Update(SHA1_CTX* context, const uint8_t* data, const size_t len);
+void SHA1_Final(SHA1_CTX* context, uint8_t digest[SHA1_DIGEST_SIZE]);
+
 void sha1_32a ( const void * key, int len, uint32_t seed, void * out );
\ No newline at end of file