Use sse4.2 CRC32 instructions to hash when available.
About 9x faster than Murmur3 for long inputs.
Most of this is a mechanical change from SkChecksum::Murmur3(...) to SkOpts::hash(...).
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2208903002
CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot;master.client.skia.compile:Build-Ubuntu-GCC-x86_64-Release-CMake-Trybot,Build-Mac-Clang-x86_64-Release-CMake-Trybot
Review-Url: https://codereview.chromium.org/2208903002
diff --git a/src/core/SkChecksum.cpp b/src/core/SkChecksum.cpp
deleted file mode 100644
index 4457eb4..0000000
--- a/src/core/SkChecksum.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright 2015 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#include "SkChecksum.h"
-
-uint32_t SkChecksum::Murmur3(const void* data, size_t bytes, uint32_t seed) {
- // Use may_alias to remind the compiler we're intentionally violating strict aliasing,
- // and so not to apply strict-aliasing-based optimizations.
- typedef uint32_t SK_ATTRIBUTE(may_alias) aliased_uint32_t;
- typedef uint8_t SK_ATTRIBUTE(may_alias) aliased_uint8_t;
-
- // Handle 4 bytes at a time while possible.
- const aliased_uint32_t* safe_data = (const aliased_uint32_t*)data;
- const size_t words = bytes/4;
- uint32_t hash = seed;
- for (size_t i = 0; i < words; i++) {
- uint32_t k = safe_data[i];
- k *= 0xcc9e2d51;
- k = (k << 15) | (k >> 17);
- k *= 0x1b873593;
-
- hash ^= k;
- hash = (hash << 13) | (hash >> 19);
- hash *= 5;
- hash += 0xe6546b64;
- }
-
- // Handle last 0-3 bytes.
- const aliased_uint8_t* safe_tail = (const uint8_t*)(safe_data + words);
- uint32_t k = 0;
- switch (bytes & 3) {
- case 3: k ^= safe_tail[2] << 16;
- case 2: k ^= safe_tail[1] << 8;
- case 1: k ^= safe_tail[0] << 0;
- k *= 0xcc9e2d51;
- k = (k << 15) | (k >> 17);
- k *= 0x1b873593;
- hash ^= k;
- }
-
- hash ^= bytes;
- return SkChecksum::Mix(hash);
-}
diff --git a/src/core/SkDescriptor.h b/src/core/SkDescriptor.h
index 71f7133..efa0278 100644
--- a/src/core/SkDescriptor.h
+++ b/src/core/SkDescriptor.h
@@ -9,7 +9,7 @@
#ifndef SkDescriptor_DEFINED
#define SkDescriptor_DEFINED
-#include "SkChecksum.h"
+#include "SkOpts.h"
#include "SkTypes.h"
class SkDescriptor : SkNoncopyable {
@@ -123,7 +123,7 @@
static uint32_t ComputeChecksum(const SkDescriptor* desc) {
const uint32_t* ptr = (const uint32_t*)desc + 1; // skip the checksum field
size_t len = desc->fLength - sizeof(uint32_t);
- return SkChecksum::Murmur3(ptr, len);
+ return SkOpts::hash(ptr, len);
}
// private so no one can create one except our factories
diff --git a/src/core/SkImageFilterCache.cpp b/src/core/SkImageFilterCache.cpp
index ba8a32c..c7104de 100644
--- a/src/core/SkImageFilterCache.cpp
+++ b/src/core/SkImageFilterCache.cpp
@@ -7,9 +7,9 @@
#include "SkImageFilterCache.h"
-#include "SkChecksum.h"
#include "SkMutex.h"
#include "SkOnce.h"
+#include "SkOpts.h"
#include "SkRefCnt.h"
#include "SkSpecialImage.h"
#include "SkTDynamicHash.h"
@@ -47,7 +47,7 @@
return v.fKey;
}
static uint32_t Hash(const Key& key) {
- return SkChecksum::Murmur3(reinterpret_cast<const uint32_t*>(&key), sizeof(Key));
+ return SkOpts::hash(reinterpret_cast<const uint32_t*>(&key), sizeof(Key));
}
SK_DECLARE_INTERNAL_LLIST_INTERFACE(Value);
};
diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp
index 5263fe4..a4da111 100644
--- a/src/core/SkOpts.cpp
+++ b/src/core/SkOpts.cpp
@@ -26,6 +26,7 @@
#include "SkBlitMask_opts.h"
#include "SkBlitRow_opts.h"
#include "SkBlurImageFilter_opts.h"
+#include "SkChecksum_opts.h"
#include "SkColorCubeFilter_opts.h"
#include "SkMorphologyImageFilter_opts.h"
#include "SkSwizzler_opts.h"
@@ -70,12 +71,14 @@
DEFINE_DEFAULT(inverted_CMYK_to_BGR1);
DEFINE_DEFAULT(srcover_srgb_srgb);
+
+ DEFINE_DEFAULT(hash_fn);
#undef DEFINE_DEFAULT
// Each Init_foo() is defined in src/opts/SkOpts_foo.cpp.
void Init_ssse3();
void Init_sse41();
- void Init_sse42() {}
+ void Init_sse42();
void Init_avx();
void Init_avx2() {}
diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h
index 7c6cfb0..44e337d 100644
--- a/src/core/SkOpts.h
+++ b/src/core/SkOpts.h
@@ -65,6 +65,12 @@
// Blend ndst src pixels over dst, where both src and dst point to sRGB pixels (RGBA or BGRA).
// If nsrc < ndst, we loop over src to create a pattern.
extern void (*srcover_srgb_srgb)(uint32_t* dst, const uint32_t* src, int ndst, int nsrc);
+
+ // The fastest high quality 32-bit hash we can provide on this platform.
+ extern uint32_t (*hash_fn)(const void*, size_t, uint32_t seed);
+ static inline uint32_t hash(const void* data, size_t bytes, uint32_t seed=0) {
+ return hash_fn(data, bytes, seed);
+ }
}
#endif//SkOpts_DEFINED
diff --git a/src/core/SkPaint.cpp b/src/core/SkPaint.cpp
index 07c10d5..44b2928 100644
--- a/src/core/SkPaint.cpp
+++ b/src/core/SkPaint.cpp
@@ -7,7 +7,6 @@
#include "SkPaint.h"
#include "SkAutoKern.h"
-#include "SkChecksum.h"
#include "SkColorFilter.h"
#include "SkData.h"
#include "SkDraw.h"
@@ -19,6 +18,7 @@
#include "SkMutex.h"
#include "SkReadBuffer.h"
#include "SkWriteBuffer.h"
+#include "SkOpts.h"
#include "SkPaintDefaults.h"
#include "SkPathEffect.h"
#include "SkRasterizer.h"
@@ -2388,6 +2388,6 @@
// so fBitfields should be 10 pointers and 6 32-bit values from the start.
static_assert(offsetof(SkPaint, fBitfields) == 9 * sizeof(void*) + 6 * sizeof(uint32_t),
"SkPaint_notPackedTightly");
- return SkChecksum::Murmur3(reinterpret_cast<const uint32_t*>(this),
- offsetof(SkPaint, fBitfields) + sizeof(fBitfields));
+ return SkOpts::hash(reinterpret_cast<const uint32_t*>(this),
+ offsetof(SkPaint, fBitfields) + sizeof(fBitfields));
}
diff --git a/src/core/SkResourceCache.cpp b/src/core/SkResourceCache.cpp
index e465132..4bdc8dd 100644
--- a/src/core/SkResourceCache.cpp
+++ b/src/core/SkResourceCache.cpp
@@ -5,10 +5,10 @@
* found in the LICENSE file.
*/
-#include "SkChecksum.h"
#include "SkMessageBus.h"
#include "SkMipMap.h"
#include "SkMutex.h"
+#include "SkOpts.h"
#include "SkPixelRef.h"
#include "SkResourceCache.h"
#include "SkTraceMemoryDump.h"
@@ -46,9 +46,9 @@
fSharedID_lo = (uint32_t)sharedID;
fSharedID_hi = (uint32_t)(sharedID >> 32);
fNamespace = nameSpace;
- // skip unhashed fields when computing the murmur
- fHash = SkChecksum::Murmur3(this->as32() + kUnhashedLocal32s,
- (fCount32 - kUnhashedLocal32s) << 2);
+ // skip unhashed fields when computing the hash
+ fHash = SkOpts::hash(this->as32() + kUnhashedLocal32s,
+ (fCount32 - kUnhashedLocal32s) << 2);
}
#include "SkTDynamicHash.h"
diff --git a/src/gpu/GrProgramDesc.h b/src/gpu/GrProgramDesc.h
index ec6447d..b17d146 100644
--- a/src/gpu/GrProgramDesc.h
+++ b/src/gpu/GrProgramDesc.h
@@ -10,7 +10,8 @@
#include "GrColor.h"
#include "GrTypesPriv.h"
-#include "SkChecksum.h"
+#include "SkOpts.h"
+#include "SkTArray.h"
/** This class describes a program to generate. It also serves as a program cache key. Very little
of this is GL-specific. The GL-specific parts could be factored out into a subclass. */
@@ -112,7 +113,7 @@
uint32_t* checksum = this->atOffset<uint32_t, GrProgramDesc::kChecksumOffset>();
*checksum = 0; // We'll hash through these bytes, so make sure they're initialized.
- *checksum = SkChecksum::Murmur3(fKey.begin(), keyLength);
+ *checksum = SkOpts::hash(fKey.begin(), keyLength);
}
// The key, stored in fKey, is composed of four parts:
diff --git a/src/gpu/GrResourceCache.cpp b/src/gpu/GrResourceCache.cpp
index 71e40f1..62360ed 100644
--- a/src/gpu/GrResourceCache.cpp
+++ b/src/gpu/GrResourceCache.cpp
@@ -11,9 +11,9 @@
#include "GrCaps.h"
#include "GrGpuResourceCacheAccess.h"
#include "GrTracing.h"
-#include "SkChecksum.h"
#include "SkGr.h"
#include "SkMessageBus.h"
+#include "SkOpts.h"
#include "SkTSort.h"
DECLARE_SKMESSAGEBUS_MESSAGE(GrUniqueKeyInvalidatedMessage);
@@ -43,7 +43,7 @@
}
uint32_t GrResourceKeyHash(const uint32_t* data, size_t size) {
- return SkChecksum::Murmur3(data, size);
+ return SkOpts::hash(data, size);
}
//////////////////////////////////////////////////////////////////////////////
@@ -687,7 +687,7 @@
SkASSERT(SkBudgeted::kNo == resource->resourcePriv().isBudgeted() ||
uniqueKey.isValid());
if (!uniqueKey.isValid()) {
- ++fCouldBeScratch;
+ ++fCouldBeScratch;
SkASSERT(fScratchMap->countForKey(scratchKey));
}
SkASSERT(!resource->resourcePriv().refsWrappedObjects());
diff --git a/src/gpu/batches/GrAADistanceFieldPathRenderer.h b/src/gpu/batches/GrAADistanceFieldPathRenderer.h
index db17b07..985b2f1 100755
--- a/src/gpu/batches/GrAADistanceFieldPathRenderer.h
+++ b/src/gpu/batches/GrAADistanceFieldPathRenderer.h
@@ -13,7 +13,7 @@
#include "GrRect.h"
#include "GrShape.h"
-#include "SkChecksum.h"
+#include "SkOpts.h"
#include "SkTDynamicHash.h"
class GrContext;
@@ -81,7 +81,7 @@
}
static inline uint32_t Hash(Key key) {
- return SkChecksum::Murmur3(key.data(), sizeof(uint32_t) * key.count32());
+ return SkOpts::hash(key.data(), sizeof(uint32_t) * key.count32());
}
};
diff --git a/src/gpu/effects/GrTextureStripAtlas.h b/src/gpu/effects/GrTextureStripAtlas.h
index 91ce61c..5b90a34 100644
--- a/src/gpu/effects/GrTextureStripAtlas.h
+++ b/src/gpu/effects/GrTextureStripAtlas.h
@@ -9,7 +9,7 @@
#define GrTextureStripAtlas_DEFINED
#include "SkBitmap.h"
-#include "SkChecksum.h"
+#include "SkOpts.h"
#include "SkGr.h"
#include "SkTDArray.h"
#include "SkTDynamicHash.h"
@@ -142,7 +142,7 @@
public:
// for SkTDynamicHash
static const Desc& GetKey(const AtlasEntry& entry) { return entry.fDesc; }
- static uint32_t Hash(const Desc& desc) { return SkChecksum::Murmur3(&desc, sizeof(Desc)); }
+ static uint32_t Hash(const Desc& desc) { return SkOpts::hash(&desc, sizeof(Desc)); }
// AtlasEntry proper
AtlasEntry() : fAtlas(nullptr) {}
diff --git a/src/gpu/text/GrAtlasTextBlob.h b/src/gpu/text/GrAtlasTextBlob.h
index f76d026..afc11a9 100644
--- a/src/gpu/text/GrAtlasTextBlob.h
+++ b/src/gpu/text/GrAtlasTextBlob.h
@@ -14,6 +14,7 @@
#include "GrMemoryPool.h"
#include "SkDescriptor.h"
#include "SkMaskFilter.h"
+#include "SkOpts.h"
#include "SkPathEffect.h"
#include "SkRasterizer.h"
#include "SkSurfaceProps.h"
@@ -89,7 +90,7 @@
}
static uint32_t Hash(const Key& key) {
- return SkChecksum::Murmur3(&key, sizeof(Key));
+ return SkOpts::hash(&key, sizeof(Key));
}
void operator delete(void* p) {
diff --git a/src/gpu/text/GrStencilAndCoverTextContext.h b/src/gpu/text/GrStencilAndCoverTextContext.h
index 9b29719..d4f1abd 100644
--- a/src/gpu/text/GrStencilAndCoverTextContext.h
+++ b/src/gpu/text/GrStencilAndCoverTextContext.h
@@ -11,6 +11,7 @@
#include "GrDrawContext.h"
#include "GrStyle.h"
#include "SkDrawFilter.h"
+#include "SkOpts.h"
#include "SkTextBlob.h"
#include "SkTHash.h"
#include "SkTInternalLList.h"
@@ -120,7 +121,7 @@
static uint32_t Hash(const Key& key) {
SkASSERT(key.count() > 1); // 1-length keys should be using the blob-id hash map.
- return SkChecksum::Murmur3(key.begin(), sizeof(uint32_t) * key.count());
+ return SkOpts::hash(key.begin(), sizeof(uint32_t) * key.count());
}
TextBlob(uint32_t blobId, const SkTextBlob* skBlob, const SkPaint& skPaint)
diff --git a/src/gpu/vk/GrVkPipelineStateCache.cpp b/src/gpu/vk/GrVkPipelineStateCache.cpp
index 5e4013d..d404a8d 100644
--- a/src/gpu/vk/GrVkPipelineStateCache.cpp
+++ b/src/gpu/vk/GrVkPipelineStateCache.cpp
@@ -11,6 +11,7 @@
#include "GrProcessor.h"
#include "GrVkPipelineState.h"
#include "GrVkPipelineStateBuilder.h"
+#include "SkOpts.h"
#include "glsl/GrGLSLFragmentProcessor.h"
#include "glsl/GrGLSLProgramDataManager.h"
@@ -112,8 +113,8 @@
int keyLength = desc.fStateKey.count();
SkASSERT(0 == (keyLength % 4));
// Seed the checksum with the checksum of the programDesc then add the vulkan key to it.
- desc.fChecksum = SkChecksum::Murmur3(desc.fStateKey.begin(), keyLength,
- desc.fProgramDesc.getChecksum());
+ desc.fChecksum = SkOpts::hash(desc.fStateKey.begin(), keyLength,
+ desc.fProgramDesc.getChecksum());
Entry* entry = nullptr;
if (Entry** entryptr = fHashTable.find(desc)) {
diff --git a/src/opts/SkChecksum_opts.h b/src/opts/SkChecksum_opts.h
new file mode 100644
index 0000000..346b16b
--- /dev/null
+++ b/src/opts/SkChecksum_opts.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkChecksum_opts_DEFINED
+#define SkChecksum_opts_DEFINED
+
+#include "SkChecksum.h"
+#include "SkTypes.h"
+
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42
+ #include <immintrin.h>
+#endif
+
+// TODO: ARMv8 has optional CRC instructions similar to SSE 4.2
+// TODO: 32-bit x86 version: same sort of idea using only _mm_crc32_u32() and smaller
+
+namespace SK_OPTS_NS {
+
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 && (defined(__x86_64__) || defined(_M_X64))
+ template <typename T>
+ static inline T unaligned_load(const uint8_t* src) {
+ T val;
+ memcpy(&val, src, sizeof(val));
+ return val;
+ }
+
+ static uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t seed) {
+ auto data = (const uint8_t*)vdata;
+
+ // _mm_crc32_u64() operates on 64-bit registers, so we use uint64_t for a while.
+ uint64_t hash = seed;
+ if (bytes >= 24) {
+ // We'll create 3 independent hashes, each using _mm_crc32_u64()
+ // to hash 8 bytes per step. Both 3 and independent are important:
+ // we can execute 3 of these instructions in parallel on a single core.
+ uint64_t a = hash,
+ b = hash,
+ c = hash;
+ size_t steps = bytes/24;
+ while (steps --> 0) {
+ a = _mm_crc32_u64(a, unaligned_load<uint64_t>(data+ 0));
+ b = _mm_crc32_u64(b, unaligned_load<uint64_t>(data+ 8));
+ c = _mm_crc32_u64(c, unaligned_load<uint64_t>(data+16));
+ data += 24;
+ }
+ bytes %= 24;
+ hash = a^b^c;
+ }
+
+ SkASSERT(bytes < 24);
+ if (bytes >= 16) {
+ hash = _mm_crc32_u64(hash, unaligned_load<uint64_t>(data));
+ bytes -= 8;
+ data += 8;
+ }
+
+ SkASSERT(bytes < 16);
+ if (bytes & 8) {
+ hash = _mm_crc32_u64(hash, unaligned_load<uint64_t>(data));
+ data += 8;
+ }
+
+ // The remainder of these _mm_crc32_u*() operate on a 32-bit register.
+ // We don't lose anything here: only the bottom 32-bits were populated.
+ auto hash32 = (uint32_t)hash;
+
+ if (bytes & 4) {
+ hash32 = _mm_crc32_u32(hash32, unaligned_load<uint32_t>(data));
+ data += 4;
+ }
+ if (bytes & 2) {
+ hash32 = _mm_crc32_u16(hash32, unaligned_load<uint16_t>(data));
+ data += 2;
+ }
+ if (bytes & 1) {
+ hash32 = _mm_crc32_u8(hash32, unaligned_load<uint8_t>(data));
+ }
+ return hash32;
+ }
+
+#else
+ static uint32_t hash_fn(const void* data, size_t bytes, uint32_t seed) {
+ // This is Murmur3.
+
+ // Use may_alias to remind the compiler we're intentionally violating strict aliasing,
+ // and so not to apply strict-aliasing-based optimizations.
+ typedef uint32_t SK_ATTRIBUTE(may_alias) aliased_uint32_t;
+ typedef uint8_t SK_ATTRIBUTE(may_alias) aliased_uint8_t;
+
+ // Handle 4 bytes at a time while possible.
+ const aliased_uint32_t* safe_data = (const aliased_uint32_t*)data;
+ const size_t words = bytes/4;
+ uint32_t hash = seed;
+ for (size_t i = 0; i < words; i++) {
+ uint32_t k = safe_data[i];
+ k *= 0xcc9e2d51;
+ k = (k << 15) | (k >> 17);
+ k *= 0x1b873593;
+
+ hash ^= k;
+ hash = (hash << 13) | (hash >> 19);
+ hash *= 5;
+ hash += 0xe6546b64;
+ }
+
+ // Handle last 0-3 bytes.
+ const aliased_uint8_t* safe_tail = (const uint8_t*)(safe_data + words);
+ uint32_t k = 0;
+ switch (bytes & 3) {
+ case 3: k ^= safe_tail[2] << 16;
+ case 2: k ^= safe_tail[1] << 8;
+ case 1: k ^= safe_tail[0] << 0;
+ k *= 0xcc9e2d51;
+ k = (k << 15) | (k >> 17);
+ k *= 0x1b873593;
+ hash ^= k;
+ }
+
+ hash ^= bytes;
+ return SkChecksum::Mix(hash);
+ }
+#endif
+
+} // namespace SK_OPTS_NS
+
+#endif//SkChecksum_opts_DEFINED
diff --git a/src/opts/SkOpts_sse42.cpp b/src/opts/SkOpts_sse42.cpp
new file mode 100644
index 0000000..1883182
--- /dev/null
+++ b/src/opts/SkOpts_sse42.cpp
@@ -0,0 +1,18 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "SkOpts.h"
+
+#define SK_OPTS_NS sse42
+#include "SkChecksum_opts.h"
+
+namespace SkOpts {
+ void Init_sse42() {
+ hash_fn = sse42::hash_fn;
+ }
+}
+
diff --git a/src/pdf/SkPDFGraphicState.h b/src/pdf/SkPDFGraphicState.h
index 84491ba..49723bd 100644
--- a/src/pdf/SkPDFGraphicState.h
+++ b/src/pdf/SkPDFGraphicState.h
@@ -10,7 +10,7 @@
#define SkPDFGraphicState_DEFINED
#include "SkPDFTypes.h"
-#include "SkChecksum.h"
+#include "SkOpts.h"
class SkPaint;
class SkPDFCanon;
@@ -63,7 +63,7 @@
bool operator==(const SkPDFGraphicState& rhs) const {
return 0 == memcmp(&fStrokeWidth, &rhs.fStrokeWidth, 12);
}
- uint32_t hash() const { return SkChecksum::Murmur3(&fStrokeWidth, 12); }
+ uint32_t hash() const { return SkOpts::hash(&fStrokeWidth, 12); }
private:
const SkScalar fStrokeWidth;
diff --git a/src/utils/SkWhitelistTypefaces.cpp b/src/utils/SkWhitelistTypefaces.cpp
index ac82f7c..139e697 100644
--- a/src/utils/SkWhitelistTypefaces.cpp
+++ b/src/utils/SkWhitelistTypefaces.cpp
@@ -5,8 +5,8 @@
* found in the LICENSE file.
*/
-#include "SkChecksum.h"
#include "SkFontDescriptor.h"
+#include "SkOpts.h"
#include "SkStream.h"
#include "SkString.h"
#include "SkTypeface.h"
@@ -80,7 +80,7 @@
if (!fontStream->peek(data.begin(), length)) {
return 0;
}
- return SkChecksum::Murmur3(data.begin(), length);
+ return SkOpts::hash(data.begin(), length);
}
static void serialize_sub(const char* fontName, SkFontStyle style, SkWStream* wstream) {