Replace SkConsistentChecksum with SkCityHash (now including CityHash via DEPS)

Alternative to https://codereview.appspot.com/6847087/ ('Change SkConsistentChecksum to use SuperFastHash')
Review URL: https://codereview.appspot.com/6867060

git-svn-id: http://skia.googlecode.com/svn/trunk@6701 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/DEPS b/DEPS
index 55a6401..5cd2370 100644
--- a/DEPS
+++ b/DEPS
@@ -9,6 +9,7 @@
 #
 deps = {
   "third_party/externals/angle" : "http://angleproject.googlecode.com/svn/trunk@1268",
+  "third_party/externals/cityhash" : "http://cityhash.googlecode.com/svn/trunk@11",
   "third_party/externals/freetype" : "https://android.googlesource.com/platform/external/freetype.git",
   "third_party/externals/gyp" : "http://gyp.googlecode.com/svn/trunk@1517",
   "third_party/externals/libjpeg" : "http://src.chromium.org/svn/trunk/src/third_party/libjpeg@125399",
diff --git a/gyp/utils.gyp b/gyp/utils.gyp
index 5658f04..b2833f2 100644
--- a/gyp/utils.gyp
+++ b/gyp/utils.gyp
@@ -5,6 +5,9 @@
       'product_name': 'skia_utils',
       'type': 'static_library',
       'standalone_static_library': 1,
+      'dependencies': [
+        'cityhash',
+      ],
       'include_dirs': [
         '../include/config',
         '../include/core',
@@ -23,6 +26,7 @@
         '../include/utils/SkCountdown.h',
         '../include/utils/SkRunnable.h',
         '../include/utils/SkThreadPool.h',
+        '../src/utils/SkCityHash.cpp',
         '../src/utils/SkCondVar.cpp',
         '../src/utils/SkCountdown.cpp',
         '../src/utils/SkThreadPool.cpp',
@@ -193,6 +197,25 @@
         ],
       },
     },
+    {
+      'target_name': 'cityhash',
+      'type': 'static_library',
+      'standalone_static_library': 1,
+      'include_dirs': [
+        '../include/config',
+        '../include/core',
+        '../src/utils/cityhash',
+        '../third_party/externals/cityhash/src',
+      ],
+      'sources': [
+        '../third_party/externals/cityhash/src/city.cc',
+      ],
+      'direct_dependent_settings': {
+        'include_dirs': [
+          '../third_party/externals/cityhash/src',
+        ],
+      },
+    },
   ],
 }
 
diff --git a/src/utils/SkCityHash.cpp b/src/utils/SkCityHash.cpp
new file mode 100644
index 0000000..a21aa89
--- /dev/null
+++ b/src/utils/SkCityHash.cpp
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2012 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+/**
+ * Pass any calls through to the CityHash library.
+ * This is the only source file that accesses the CityHash code directly.
+ */
+
+#include "SkCityHash.h"
+#include "SkTypes.h"
+#include "city.h"
+
+uint32_t SkCityHash::Compute32(const char *data, size_t size) {
+    return CityHash32(data, size);
+}
+
+uint64_t SkCityHash::Compute64(const char *data, size_t size) {
+    return CityHash64(data, size);
+}
diff --git a/src/utils/SkCityHash.h b/src/utils/SkCityHash.h
new file mode 100644
index 0000000..c69e0fb
--- /dev/null
+++ b/src/utils/SkCityHash.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2012 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+/**
+ * Hash functions, using the CityHash algorithm.
+ *
+ * Results are guaranteed to be:
+ *  1. consistent across revisions of the library (for a given set
+ *     of bytes, the checksum generated at one revision of the Skia
+ *     library will match the one generated on any other revision of
+ *     the Skia library)
+ *  2. consistent across platforms (for a given
+ *     set of bytes, the checksum generated on one platform will
+ *     match the one generated on any other platform)
+ */
+
+#ifndef SkCityHash_DEFINED
+#define SkCityHash_DEFINED
+
+#include "SkTypes.h"
+
+class SkCityHash : SkNoncopyable {
+public:
+    /**
+     *  Compute a 32-bit checksum for a given data block.
+     *
+     *  @param data Memory address of the data block to be processed.
+     *  @param size Size of the data block in bytes.
+     *  @return checksum result
+     */
+    static uint32_t Compute32(const char *data, size_t size);
+
+    /**
+     *  Compute a 64-bit checksum for a given data block.
+     *
+     *  @param data Memory address of the data block to be processed.
+     *  @param size Size of the data block in bytes.
+     *  @return checksum result
+     */
+    static uint64_t Compute64(const char *data, size_t size);
+};
+
+#endif
diff --git a/src/utils/SkConsistentChecksum.h b/src/utils/SkConsistentChecksum.h
deleted file mode 100644
index 8b7c53d..0000000
--- a/src/utils/SkConsistentChecksum.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright 2012 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#ifndef SkConsistentChecksum_DEFINED
-#define SkConsistentChecksum_DEFINED
-
-#include "SkTypes.h"
-
-class SkConsistentChecksum : SkNoncopyable {
-private:
-    /*
-     *  Our Rotate and Mash helpers are meant to automatically do the right
-     *  thing depending if sizeof(uintptr_t) is 4 or 8.
-     */
-    enum {
-        ROTR = 17,
-        ROTL = sizeof(uintptr_t) * 8 - ROTR,
-        HALFBITS = sizeof(uintptr_t) * 4
-    };
-
-    static inline uintptr_t Mash(uintptr_t total, uintptr_t value) {
-        return ((total >> ROTR) | (total << ROTL)) ^ value;
-    }
-
-public:
-    /**
-     *  Compute a 32-bit checksum for a given data block
-     *
-     *  WARNING: As of 1 Nov 2012, this algorithm is still in
-     *  flux... but once we get it doing what we want, it will be:
-     *  1. consistent across revisions of the library (for a given set
-     *     of bytes, the checksum generated at one revision of the Skia
-     *     library will match the one generated on any other revision of
-     *     the Skia library)
-     *  2. consistent across platforms (for a given
-     *     set of bytes, the checksum generated on one platform will
-     *     match the one generated on any other platform)
-     *
-     *  @param data Memory address of the data block to be processed. Must be
-     *              32-bit aligned.
-     *  @param size Size of the data block in bytes. Must be a multiple of 4.
-     *  @return checksum result
-     */
-    static uint32_t Compute(const uint32_t* data, size_t size) {
-        SkASSERT(SkIsAlign4(size));
-
-        /*
-         *  We want to let the compiler use 32bit or 64bit addressing and math
-         *  so we use uintptr_t as our magic type. This makes the code a little
-         *  more obscure (we can't hard-code 32 or 64 anywhere, but have to use
-         *  sizeof()).
-         */
-        uintptr_t result = 0;
-        const uintptr_t* ptr = reinterpret_cast<const uintptr_t*>(data);
-
-        /*
-         *  count the number of quad element chunks. This takes into account
-         *  if we're on a 32bit or 64bit arch, since we use sizeof(uintptr_t)
-         *  to compute how much to shift-down the size.
-         */
-        size_t n4 = size / (sizeof(uintptr_t) << 2);
-        for (size_t i = 0; i < n4; ++i) {
-            result = Mash(result, *ptr++);
-            result = Mash(result, *ptr++);
-            result = Mash(result, *ptr++);
-            result = Mash(result, *ptr++);
-        }
-        size &= ((sizeof(uintptr_t) << 2) - 1);
-
-        data = reinterpret_cast<const uint32_t*>(ptr);
-        const uint32_t* stop = data + (size >> 2);
-        while (data < stop) {
-            result = Mash(result, *data++);
-        }
-
-        /*
-         *  smash us down to 32bits if we were 64. Note that when uintptr_t is
-         *  32bits, this code-path should go away, but I still got a warning
-         *  when I wrote
-         *      result ^= result >> 32;
-         *  since >>32 is undefined for 32bit ints, hence the wacky HALFBITS
-         *  define.
-         */
-        if (8 == sizeof(result)) {
-            result ^= result >> HALFBITS;
-        }
-        return static_cast<uint32_t>(result);
-    }
-};
-
-#endif
diff --git a/src/utils/cityhash/README b/src/utils/cityhash/README
new file mode 100644
index 0000000..d939288
--- /dev/null
+++ b/src/utils/cityhash/README
@@ -0,0 +1,2 @@
+This directory contains files needed to build third_party/externals/cityhash
+(such as the config.h file that would normally be created by autoconf)
diff --git a/src/utils/cityhash/config.h b/src/utils/cityhash/config.h
new file mode 100644
index 0000000..bd36416
--- /dev/null
+++ b/src/utils/cityhash/config.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright 2012 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+/**
+ * Converts from Skia build flags to the macro definitions cityhash normally
+ * gets from autoconf.
+ */
+
+#include "SkTypes.h"
+
+#ifdef SK_CPU_BENDIAN
+  #define WORDS_BIGENDIAN 1
+#endif
diff --git a/tests/ChecksumTest.cpp b/tests/ChecksumTest.cpp
index 5d5d83b..dea0eef 100644
--- a/tests/ChecksumTest.cpp
+++ b/tests/ChecksumTest.cpp
@@ -7,7 +7,10 @@
  */
 #include "Test.h"
 #include "SkChecksum.h"
-#include "SkConsistentChecksum.h"
+#include "SkCityHash.h"
+
+// Word size that is large enough to hold results of any checksum type.
+typedef uint64_t checksum_result;
 
 namespace skiatest {
     class ChecksumTestClass : public Test {
@@ -22,26 +25,25 @@
     private:
         enum Algorithm {
             kSkChecksum,
-            kSkConsistentChecksum
+            kSkCityHash32,
+            kSkCityHash64
         };
 
         // Call Compute(data, size) on the appropriate checksum algorithm,
         // depending on this->fWhichAlgorithm.
-        uint32_t ComputeChecksum(uint32_t* data, size_t size) {
-            // Our checksum algorithms require 32-bit aligned data.
-            // If either of these tests fail, then the algorithm
-            // doesn't have a chance.
-            REPORTER_ASSERT_MESSAGE(fReporter,
-                                    reinterpret_cast<uintptr_t>(data) % 4 == 0,
-                                    "test data pointer is not 32-bit aligned");
-            REPORTER_ASSERT_MESSAGE(fReporter, SkIsAlign4(size),
-                                    "test data size is not 32-bit aligned");
-
+        checksum_result ComputeChecksum(const char *data, size_t size) {
             switch(fWhichAlgorithm) {
             case kSkChecksum:
-                return SkChecksum::Compute(data, size);
-            case kSkConsistentChecksum:
-                return SkConsistentChecksum::Compute(data, size);
+                REPORTER_ASSERT_MESSAGE(fReporter,
+                                        reinterpret_cast<uintptr_t>(data) % 4 == 0,
+                                        "test data pointer is not 32-bit aligned");
+                REPORTER_ASSERT_MESSAGE(fReporter, SkIsAlign4(size),
+                                        "test data size is not 32-bit aligned");
+                return SkChecksum::Compute(reinterpret_cast<const uint32_t *>(data), size);
+            case kSkCityHash32:
+                return SkCityHash::Compute32(data, size);
+            case kSkCityHash64:
+                return SkCityHash::Compute64(data, size);
             default:
                 SkString message("fWhichAlgorithm has unknown value ");
                 message.appendf("%d", fWhichAlgorithm);
@@ -55,16 +57,22 @@
         // generates the same results if called twice over the same data.
         void TestChecksumSelfConsistency(size_t buf_size) {
             SkAutoMalloc storage(buf_size);
-            uint32_t*    ptr = (uint32_t*)storage.get();
-            char*        cptr = (char*)ptr;
+            char* ptr = reinterpret_cast<char *>(storage.get());
+
+            REPORTER_ASSERT(fReporter,
+                            GetTestDataChecksum(8, 0) ==
+                            GetTestDataChecksum(8, 0));
+            REPORTER_ASSERT(fReporter,
+                            GetTestDataChecksum(8, 0) !=
+                            GetTestDataChecksum(8, 1));
 
             sk_bzero(ptr, buf_size);
-            uint32_t prev = 0;
+            checksum_result prev = 0;
 
             // assert that as we change values (from 0 to non-zero) in
             // our buffer, we get a different value
             for (size_t i = 0; i < buf_size; ++i) {
-                cptr[i] = (i & 0x7f) + 1; // need some non-zero value here
+                ptr[i] = (i & 0x7f) + 1; // need some non-zero value here
 
                 // Try checksums of different-sized chunks, but always
                 // 32-bit aligned and big enough to contain all the
@@ -73,9 +81,9 @@
                 size_t checksum_size = (((i/4)+1)*4);
                 REPORTER_ASSERT(fReporter, checksum_size <= buf_size);
 
-                uint32_t curr = ComputeChecksum(ptr, checksum_size);
+                checksum_result curr = ComputeChecksum(ptr, checksum_size);
                 REPORTER_ASSERT(fReporter, prev != curr);
-                uint32_t again = ComputeChecksum(ptr, checksum_size);
+                checksum_result again = ComputeChecksum(ptr, checksum_size);
                 REPORTER_ASSERT(fReporter, again == curr);
                 prev = curr;
             }
@@ -84,48 +92,49 @@
         // Return the checksum of a buffer of bytes 'len' long.
         // The pattern of values within the buffer will be consistent
         // for every call, based on 'seed'.
-        uint32_t GetTestDataChecksum(size_t len, char seed=0) {
+        checksum_result GetTestDataChecksum(size_t len, char seed=0) {
             SkAutoMalloc storage(len);
-            uint32_t* start = (uint32_t *)storage.get();
-            char* ptr = (char *)start;
+            char* start = reinterpret_cast<char *>(storage.get());
+            char* ptr = start;
             for (size_t i = 0; i < len; ++i) {
                 *ptr++ = ((seed+i) & 0x7f);
             }
-            uint32_t result = ComputeChecksum(start, len);
+            checksum_result result = ComputeChecksum(start, len);
             return result;
         }
 
         void RunTest() {
             // Test self-consistency of checksum algorithms.
             fWhichAlgorithm = kSkChecksum;
-            REPORTER_ASSERT(fReporter,
-                            GetTestDataChecksum(8, 0) ==
-                            GetTestDataChecksum(8, 0));
-            REPORTER_ASSERT(fReporter,
-                            GetTestDataChecksum(8, 0) !=
-                            GetTestDataChecksum(8, 1));
             TestChecksumSelfConsistency(128);
-            fWhichAlgorithm = kSkConsistentChecksum;
-            REPORTER_ASSERT(fReporter,
-                            GetTestDataChecksum(8, 0) ==
-                            GetTestDataChecksum(8, 0));
-            REPORTER_ASSERT(fReporter,
-                            GetTestDataChecksum(8, 0) !=
-                            GetTestDataChecksum(8, 1));
+            fWhichAlgorithm = kSkCityHash32;
+            TestChecksumSelfConsistency(128);
+            fWhichAlgorithm = kSkCityHash64;
             TestChecksumSelfConsistency(128);
 
             // Test checksum results that should be consistent across
             // versions and platforms.
             fWhichAlgorithm = kSkChecksum;
             REPORTER_ASSERT(fReporter, ComputeChecksum(NULL, 0) == 0);
-            fWhichAlgorithm = kSkConsistentChecksum;
-            REPORTER_ASSERT(fReporter, ComputeChecksum(NULL, 0) == 0);
-            REPORTER_ASSERT(fReporter, GetTestDataChecksum(4)  == 0x03020100);
-            REPORTER_ASSERT(fReporter, GetTestDataChecksum(8)  == 0x07860485);
+            fWhichAlgorithm = kSkCityHash32;
+            REPORTER_ASSERT(fReporter, ComputeChecksum(NULL, 0) == 0xdc56d17a);
+            REPORTER_ASSERT(fReporter, GetTestDataChecksum(4)   == 0x616e1132);
+            REPORTER_ASSERT(fReporter, GetTestDataChecksum(8)   == 0xeb0fd2d6);
+            REPORTER_ASSERT(fReporter, GetTestDataChecksum(128) == 0x5321e430);
+            REPORTER_ASSERT(fReporter, GetTestDataChecksum(132) == 0x924a10e4);
+            REPORTER_ASSERT(fReporter, GetTestDataChecksum(256) == 0xd4de9dc9);
+            REPORTER_ASSERT(fReporter, GetTestDataChecksum(260) == 0xecf0325d);
+            fWhichAlgorithm = kSkCityHash64;
+            REPORTER_ASSERT(fReporter, ComputeChecksum(NULL, 0) == 0x9ae16a3b2f90404f);
+            REPORTER_ASSERT(fReporter, GetTestDataChecksum(4)   == 0x82bffd898958e540);
+            REPORTER_ASSERT(fReporter, GetTestDataChecksum(8)   == 0xad5a13e1e8e93b98);
+            REPORTER_ASSERT(fReporter, GetTestDataChecksum(128) == 0x10b153630af1f395);
+            REPORTER_ASSERT(fReporter, GetTestDataChecksum(132) == 0x7db71dc4adcc6647);
+            REPORTER_ASSERT(fReporter, GetTestDataChecksum(256) == 0xeee763519b91b010);
+            REPORTER_ASSERT(fReporter, GetTestDataChecksum(260) == 0x2fe19e0b2239bc23);
 
             // TODO: note the weakness exposed by these collisions...
-            // We need to improve the SkConsistentChecksum algorithm
-            // (and maybe SkChecksum too?)
+            // We need to improve the SkChecksum algorithm.
             // We would prefer that these asserts FAIL!
             // Filed as https://code.google.com/p/skia/issues/detail?id=981
             // ('SkChecksum algorithm allows for way too many collisions')
@@ -134,11 +143,6 @@
                 GetTestDataChecksum(128) == GetTestDataChecksum(256));
             REPORTER_ASSERT(fReporter,
                 GetTestDataChecksum(132) == GetTestDataChecksum(260));
-            fWhichAlgorithm = kSkConsistentChecksum;
-            REPORTER_ASSERT(fReporter, GetTestDataChecksum(128) == 0);
-            REPORTER_ASSERT(fReporter, GetTestDataChecksum(132) == 0x03020100);
-            REPORTER_ASSERT(fReporter, GetTestDataChecksum(256) == 0);
-            REPORTER_ASSERT(fReporter, GetTestDataChecksum(260) == 0x03020100);
         }
 
         Reporter* fReporter;