Some usability ideas around SkTHash.

  - By default, use new SkGoodHash to hash keys, which is:
    * for 4 byte values, use SkChecksum::Mix,
    * for SkStrings, use SkChecksum::Murmur3 on the data,
    * for other structs, shallow hash the struct with Murmur3.
  - Expand SkChecksum::Murmur3 to support non-4-byte-aligned data.
  - Add const foreach() methods.
  - Have foreach() take a functor, which allows lambdas.

BUG=skia:

Review URL: https://codereview.chromium.org/1021033002
diff --git a/dm/DM.cpp b/dm/DM.cpp
index 109a00d..833af0e 100644
--- a/dm/DM.cpp
+++ b/dm/DM.cpp
@@ -104,13 +104,8 @@
         this->append(src);
         this->append(name);
         this->append(md5);
-        while (this->size() % 4) {
-            this->append("!");  // Pad out if needed so we can pass this to Murmur3.
-        }
     }
-    static uint32_t Hash(const Gold& g) {
-        return SkChecksum::Murmur3((const uint32_t*)g.c_str(), g.size());
-    }
+    static uint32_t Hash(const Gold& g) { return SkGoodHash((const SkString&)g); }
 };
 static SkTHashSet<Gold, Gold::Hash> gGold;
 
diff --git a/src/core/SkChecksum.h b/src/core/SkChecksum.h
index d9065f5..8eb1766 100644
--- a/src/core/SkChecksum.h
+++ b/src/core/SkChecksum.h
@@ -8,6 +8,8 @@
 #ifndef SkChecksum_DEFINED
 #define SkChecksum_DEFINED
 
+#include "SkString.h"
+#include "SkTLogic.h"
 #include "SkTypes.h"
 
 /**
@@ -69,21 +71,20 @@
      * This should take 2-3x longer than SkChecksum::Compute, but is a considerably better hash.
      * See en.wikipedia.org/wiki/MurmurHash.
      *
-     *  @param data Memory address of the data block to be processed. Must be 32-bit aligned.
-     *  @param size Size of the data block in bytes. Must be a multiple of 4.
+     *  @param data Memory address of the data block to be processed.
+     *  @param size Size of the data block in bytes.
      *  @param seed Initial hash seed. (optional)
      *  @return hash result
      */
-    static uint32_t Murmur3(const uint32_t* data, size_t bytes, uint32_t seed=0) {
+    static uint32_t Murmur3(const void* data, size_t bytes, uint32_t seed=0) {
         // Use may_alias to remind the compiler we're intentionally violating strict aliasing,
         // and so not to apply strict-aliasing-based optimizations.
         typedef uint32_t SK_ATTRIBUTE(may_alias) aliased_uint32_t;
+        typedef uint8_t SK_ATTRIBUTE(may_alias) aliased_uint8_t;
+
+        // Handle 4 bytes at a time while possible.
         const aliased_uint32_t* safe_data = (const aliased_uint32_t*)data;
-
-        SkASSERTF(SkIsAlign4(bytes), "Expected 4-byte multiple, got %zu", bytes);
         const size_t words = bytes/4;
-
-
         uint32_t hash = seed;
         for (size_t i = 0; i < words; i++) {
             uint32_t k = safe_data[i];
@@ -96,6 +97,20 @@
             hash *= 5;
             hash += 0xe6546b64;
         }
+
+        // Handle last 0-3 bytes.
+        const aliased_uint8_t* safe_tail = (const uint8_t*)(safe_data + words);
+        uint32_t k = 0;
+        switch (bytes & 3) {
+            case 3: k ^= safe_tail[2] << 16;
+            case 2: k ^= safe_tail[1] <<  8;
+            case 1: k ^= safe_tail[0] <<  0;
+                    k *= 0xcc9e2d51;
+                    k = (k << 15) | (k >> 17);
+                    k *= 0x1b873593;
+                    hash ^= k;
+        }
+
         hash ^= bytes;
         return Mix(hash);
     }
@@ -165,4 +180,19 @@
     }
 };
 
+// SkGoodHash should usually be your first choice in hashing data.
+// It should be both reasonably fast and high quality.
+
+template <typename K>
+uint32_t SkGoodHash(const K& k) {
+    if (sizeof(K) == 4) {
+        return SkChecksum::Mix(*(const uint32_t*)&k);
+    }
+    return SkChecksum::Murmur3(&k, sizeof(K));
+}
+
+inline uint32_t SkGoodHash(const SkString& k) {
+    return SkChecksum::Murmur3(k.c_str(), k.size());
+}
+
 #endif
diff --git a/src/core/SkTHash.h b/src/core/SkTHash.h
index b47f8fa..849c850 100644
--- a/src/core/SkTHash.h
+++ b/src/core/SkTHash.h
@@ -65,12 +65,21 @@
     }
 
     // Call fn on every entry in the table.  You may mutate the entries, but be very careful.
-    template <typename Arg>
-    void foreach(void(*fn)(T*, Arg), Arg arg) {
+    template <typename Fn>  // f(T*)
+    void foreach(Fn&& fn) {
         for (int i = 0; i < fCapacity; i++) {
-            Slot& s = fSlots[i];
-            if (!s.empty()) {
-                fn(&s.val, arg);
+            if (!fSlots[i].empty()) {
+                fn(&fSlots[i].val);
+            }
+        }
+    }
+
+    // Call fn on every entry in the table.  You may not mutate anything.
+    template <typename Fn>  // f(T) or f(const T&)
+    void foreach(Fn&& fn) const {
+        for (int i = 0; i < fCapacity; i++) {
+            if (!fSlots[i].empty()) {
+                fn(fSlots[i].val);
             }
         }
     }
@@ -145,7 +154,7 @@
 
 // Maps K->V.  A more user-friendly wrapper around SkTHashTable, suitable for most use cases.
 // K and V are treated as ordinary copyable C++ types, with no assumed relationship between the two.
-template <typename K, typename V, uint32_t(*HashK)(const K&)>
+template <typename K, typename V, uint32_t(*HashK)(const K&) = &SkGoodHash>
 class SkTHashMap : SkNoncopyable {
 public:
     SkTHashMap() {}
@@ -176,7 +185,16 @@
     }
 
     // Call fn on every key/value pair in the table.  You may mutate the value but not the key.
-    void foreach(void(*fn)(K, V*)) { fTable.foreach(ForEach, fn); }
+    template <typename Fn>  // f(K, V*) or f(const K&, V*)
+    void foreach(Fn&& fn) {
+        fTable.foreach([&fn](Pair* p){ fn(p->key, &p->val); });
+    }
+
+    // Call fn on every key/value pair in the table.  You may not mutate anything.
+    template <typename Fn>  // f(K, V), f(const K&, V), f(K, const V&) or f(const K&, const V&).
+    void foreach(Fn&& fn) const {
+        fTable.foreach([&fn](const Pair& p){ fn(p.key, p.val); });
+    }
 
 private:
     struct Pair {
@@ -185,13 +203,12 @@
         static const K& GetKey(const Pair& p) { return p.key; }
         static uint32_t Hash(const K& key) { return HashK(key); }
     };
-    static void ForEach(Pair* p, void (*fn)(K, V*)) { fn(p->key, &p->val); }
 
     SkTHashTable<Pair, K> fTable;
 };
 
 // A set of T.  T is treated as an ordiary copyable C++ type.
-template <typename T, uint32_t(*HashT)(const T&)>
+template <typename T, uint32_t(*HashT)(const T&) = &SkGoodHash>
 class SkTHashSet : SkNoncopyable {
 public:
     SkTHashSet() {}
diff --git a/src/gpu/gl/GrGLCaps.h b/src/gpu/gl/GrGLCaps.h
index 1b77ed0..e0d2bf6 100644
--- a/src/gpu/gl/GrGLCaps.h
+++ b/src/gpu/gl/GrGLCaps.h
@@ -403,13 +403,8 @@
                 && fType      == rhs.fType
                 && fFboFormat == rhs.fFboFormat;
         }
-        static uint32_t Hash(const ReadPixelsSupportedFormat& r) {
-            return SkChecksum::Murmur3(reinterpret_cast<const uint32_t*>(&r), sizeof(r));
-        }
     };
-
-    mutable SkTHashMap<ReadPixelsSupportedFormat, bool, ReadPixelsSupportedFormat::Hash>
-        fReadPixelsSupportedCache;
+    mutable SkTHashMap<ReadPixelsSupportedFormat, bool> fReadPixelsSupportedCache;
 
     typedef GrDrawTargetCaps INHERITED;
 };
diff --git a/src/svg/SkSVGDevice.cpp b/src/svg/SkSVGDevice.cpp
index 45e616b..8d2f18a 100644
--- a/src/svg/SkSVGDevice.cpp
+++ b/src/svg/SkSVGDevice.cpp
@@ -103,12 +103,6 @@
     return tstr;
 }
 
-uint32_t hash_family_string(const SkString& family) {
-    // This is a lame hash function, but we don't really expect to see more than 1-2
-    // family names under normal circumstances.
-    return SkChecksum::Mix(SkToU32(family.size()));
-}
-
 struct Resources {
     Resources(const SkPaint& paint)
         : fPaintServer(svg_color(paint.getColor())) {}
@@ -538,7 +532,7 @@
     }
 
     SkString familyName;
-    SkTHashSet<SkString, hash_family_string> familySet;
+    SkTHashSet<SkString> familySet;
     SkAutoTUnref<const SkTypeface> tface(paint.getTypeface() ?
         SkRef(paint.getTypeface()) : SkTypeface::RefDefault());
 
diff --git a/tests/ChecksumTest.cpp b/tests/ChecksumTest.cpp
index 6248678..3658bd7 100644
--- a/tests/ChecksumTest.cpp
+++ b/tests/ChecksumTest.cpp
@@ -50,3 +50,16 @@
         }
     }
 }
+
+DEF_TEST(GoodHash, r) {
+    ASSERT(SkGoodHash(( int32_t)4) ==  614249093);  // 4 bytes.  Hits SkChecksum::Mix fast path.
+    ASSERT(SkGoodHash((uint32_t)4) ==  614249093);  // (Ditto)
+
+    // None of these are 4 byte sized, so they use SkChecksum::Murmur3, not SkChecksum::Mix.
+    ASSERT(SkGoodHash((uint64_t)4) == 3491892518);
+    ASSERT(SkGoodHash((uint16_t)4) ==  899251846);
+    ASSERT(SkGoodHash( (uint8_t)4) ==  962700458);
+
+    // Tests SkString is correctly specialized.
+    ASSERT(SkGoodHash(SkString("Hi")) == 55667557);
+}
diff --git a/tests/HashTest.cpp b/tests/HashTest.cpp
index 623e597..5abf3db 100644
--- a/tests/HashTest.cpp
+++ b/tests/HashTest.cpp
@@ -3,12 +3,15 @@
 #include "SkTHash.h"
 #include "Test.h"
 
-namespace { uint32_t hash_int(const int& k) { return SkChecksum::Mix(k); } }
-
-static void set_negative_key(int key, double* d) { *d = -key; }
+// Tests use of const foreach().  map.count() is of course the better way to do this.
+static int count(const SkTHashMap<int, double>& map) {
+    int n = 0;
+    map.foreach([&n](int, double) { n++; });
+    return n;
+}
 
 DEF_TEST(HashMap, r) {
-    SkTHashMap<int, double, hash_int> map;
+    SkTHashMap<int, double> map;
 
     map.set(3, 4.0);
     REPORTER_ASSERT(r, map.count() == 1);
@@ -17,7 +20,9 @@
     REPORTER_ASSERT(r, found);
     REPORTER_ASSERT(r, *found == 4.0);
 
-    map.foreach(set_negative_key);
+    map.foreach([](int key, double* d){ *d = -key; });
+    REPORTER_ASSERT(r, count(map) == 1);
+
     found = map.find(3);
     REPORTER_ASSERT(r, found);
     REPORTER_ASSERT(r, *found == -3.0);
@@ -44,10 +49,8 @@
     REPORTER_ASSERT(r, map.count() == 0);
 }
 
-namespace { uint32_t hash_string(const SkString& s) { return SkToInt(s.size()); } }
-
 DEF_TEST(HashSet, r) {
-    SkTHashSet<SkString, hash_string> set;
+    SkTHashSet<SkString> set;
 
     set.add(SkString("Hello"));
     set.add(SkString("World"));