blob: 78808476ca79ea41d9fd97a42b964bd4a95dfde7 [file] [log] [blame]
mtklein4e976072016-08-08 09:06:27 -07001/*
2 * Copyright 2016 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SkChecksum_opts_DEFINED
9#define SkChecksum_opts_DEFINED
10
Mike Kleinc0bd9f92019-04-23 12:05:21 -050011#include "include/core/SkTypes.h"
12#include "include/private/SkChecksum.h"
Mike Klein7a177b42019-06-17 17:17:47 -050013#include "src/core/SkUtils.h" // sk_unaligned_load
mtklein4e976072016-08-08 09:06:27 -070014
15#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42
16 #include <immintrin.h>
Amaury Le Leyzour4c296332017-05-04 14:32:22 -070017#elif defined(SK_ARM_HAS_CRC32)
mtklein78559a72016-08-22 08:53:45 -070018 #include <arm_acle.h>
mtklein4e976072016-08-08 09:06:27 -070019#endif
20
mtklein4e976072016-08-08 09:06:27 -070021namespace SK_OPTS_NS {
22
mtklein2f4114a2016-08-16 09:29:57 -070023#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 && (defined(__x86_64__) || defined(_M_X64))
24 // This is not a CRC32. It's Just A Hash that uses those instructions because they're fast.
Mike Kleincd71f112017-08-23 11:11:55 -040025 /*not static*/ inline uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t seed) {
mtklein4e976072016-08-08 09:06:27 -070026 auto data = (const uint8_t*)vdata;
27
28 // _mm_crc32_u64() operates on 64-bit registers, so we use uint64_t for a while.
29 uint64_t hash = seed;
30 if (bytes >= 24) {
31 // We'll create 3 independent hashes, each using _mm_crc32_u64()
32 // to hash 8 bytes per step. Both 3 and independent are important:
33 // we can execute 3 of these instructions in parallel on a single core.
34 uint64_t a = hash,
35 b = hash,
36 c = hash;
37 size_t steps = bytes/24;
38 while (steps --> 0) {
Mike Klein7a177b42019-06-17 17:17:47 -050039 a = _mm_crc32_u64(a, sk_unaligned_load<uint64_t>(data+ 0));
40 b = _mm_crc32_u64(b, sk_unaligned_load<uint64_t>(data+ 8));
41 c = _mm_crc32_u64(c, sk_unaligned_load<uint64_t>(data+16));
mtklein4e976072016-08-08 09:06:27 -070042 data += 24;
43 }
44 bytes %= 24;
Mike Kleinfd69b6d2018-10-04 13:58:31 -040045 hash = _mm_crc32_u32(a, _mm_crc32_u32(b, c));
mtklein4e976072016-08-08 09:06:27 -070046 }
47
48 SkASSERT(bytes < 24);
49 if (bytes >= 16) {
Mike Klein7a177b42019-06-17 17:17:47 -050050 hash = _mm_crc32_u64(hash, sk_unaligned_load<uint64_t>(data));
mtklein4e976072016-08-08 09:06:27 -070051 bytes -= 8;
52 data += 8;
53 }
54
55 SkASSERT(bytes < 16);
56 if (bytes & 8) {
Mike Klein7a177b42019-06-17 17:17:47 -050057 hash = _mm_crc32_u64(hash, sk_unaligned_load<uint64_t>(data));
mtklein4e976072016-08-08 09:06:27 -070058 data += 8;
59 }
60
61 // The remainder of these _mm_crc32_u*() operate on a 32-bit register.
62 // We don't lose anything here: only the bottom 32-bits were populated.
63 auto hash32 = (uint32_t)hash;
64
65 if (bytes & 4) {
Mike Klein7a177b42019-06-17 17:17:47 -050066 hash32 = _mm_crc32_u32(hash32, sk_unaligned_load<uint32_t>(data));
mtklein4e976072016-08-08 09:06:27 -070067 data += 4;
68 }
69 if (bytes & 2) {
Mike Klein7a177b42019-06-17 17:17:47 -050070 hash32 = _mm_crc32_u16(hash32, sk_unaligned_load<uint16_t>(data));
mtklein4e976072016-08-08 09:06:27 -070071 data += 2;
72 }
73 if (bytes & 1) {
Mike Klein7a177b42019-06-17 17:17:47 -050074 hash32 = _mm_crc32_u8(hash32, sk_unaligned_load<uint8_t>(data));
mtklein4e976072016-08-08 09:06:27 -070075 }
76 return hash32;
77 }
78
mtklein2f4114a2016-08-16 09:29:57 -070079#elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42
80 // 32-bit version of above, using _mm_crc32_u32() but not _mm_crc32_u64().
Mike Kleincd71f112017-08-23 11:11:55 -040081 /*not static*/ inline uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) {
mtklein2f4114a2016-08-16 09:29:57 -070082 auto data = (const uint8_t*)vdata;
mtklein4e976072016-08-08 09:06:27 -070083
mtklein2f4114a2016-08-16 09:29:57 -070084 if (bytes >= 12) {
85 // We'll create 3 independent hashes, each using _mm_crc32_u32()
86 // to hash 4 bytes per step. Both 3 and independent are important:
87 // we can execute 3 of these instructions in parallel on a single core.
88 uint32_t a = hash,
89 b = hash,
90 c = hash;
91 size_t steps = bytes/12;
92 while (steps --> 0) {
Mike Klein7a177b42019-06-17 17:17:47 -050093 a = _mm_crc32_u32(a, sk_unaligned_load<uint32_t>(data+0));
94 b = _mm_crc32_u32(b, sk_unaligned_load<uint32_t>(data+4));
95 c = _mm_crc32_u32(c, sk_unaligned_load<uint32_t>(data+8));
mtklein2f4114a2016-08-16 09:29:57 -070096 data += 12;
97 }
98 bytes %= 12;
Mike Kleinfd69b6d2018-10-04 13:58:31 -040099 hash = _mm_crc32_u32(a, _mm_crc32_u32(b, c));
mtklein2f4114a2016-08-16 09:29:57 -0700100 }
101
102 SkASSERT(bytes < 12);
103 if (bytes >= 8) {
Mike Klein7a177b42019-06-17 17:17:47 -0500104 hash = _mm_crc32_u32(hash, sk_unaligned_load<uint32_t>(data));
mtklein2f4114a2016-08-16 09:29:57 -0700105 bytes -= 4;
106 data += 4;
107 }
108
109 SkASSERT(bytes < 8);
110 if (bytes & 4) {
Mike Klein7a177b42019-06-17 17:17:47 -0500111 hash = _mm_crc32_u32(hash, sk_unaligned_load<uint32_t>(data));
mtklein2f4114a2016-08-16 09:29:57 -0700112 data += 4;
113 }
114 if (bytes & 2) {
Mike Klein7a177b42019-06-17 17:17:47 -0500115 hash = _mm_crc32_u16(hash, sk_unaligned_load<uint16_t>(data));
mtklein2f4114a2016-08-16 09:29:57 -0700116 data += 2;
117 }
118 if (bytes & 1) {
Mike Klein7a177b42019-06-17 17:17:47 -0500119 hash = _mm_crc32_u8(hash, sk_unaligned_load<uint8_t>(data));
mtklein2f4114a2016-08-16 09:29:57 -0700120 }
121 return hash;
122 }
123
Amaury Le Leyzour4c296332017-05-04 14:32:22 -0700124#elif defined(SK_ARM_HAS_CRC32)
Mike Kleincd71f112017-08-23 11:11:55 -0400125 /*not static*/ inline uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) {
mtklein78559a72016-08-22 08:53:45 -0700126 auto data = (const uint8_t*)vdata;
127 if (bytes >= 24) {
128 uint32_t a = hash,
129 b = hash,
130 c = hash;
131 size_t steps = bytes/24;
132 while (steps --> 0) {
Mike Klein7a177b42019-06-17 17:17:47 -0500133 a = __crc32d(a, sk_unaligned_load<uint64_t>(data+ 0));
134 b = __crc32d(b, sk_unaligned_load<uint64_t>(data+ 8));
135 c = __crc32d(c, sk_unaligned_load<uint64_t>(data+16));
mtklein78559a72016-08-22 08:53:45 -0700136 data += 24;
137 }
138 bytes %= 24;
Mike Kleinfd69b6d2018-10-04 13:58:31 -0400139 hash = __crc32w(a, __crc32w(b, c));
mtklein78559a72016-08-22 08:53:45 -0700140 }
141
142 SkASSERT(bytes < 24);
143 if (bytes >= 16) {
Mike Klein7a177b42019-06-17 17:17:47 -0500144 hash = __crc32d(hash, sk_unaligned_load<uint64_t>(data));
mtklein78559a72016-08-22 08:53:45 -0700145 bytes -= 8;
146 data += 8;
147 }
148
149 SkASSERT(bytes < 16);
150 if (bytes & 8) {
Mike Klein7a177b42019-06-17 17:17:47 -0500151 hash = __crc32d(hash, sk_unaligned_load<uint64_t>(data));
mtklein78559a72016-08-22 08:53:45 -0700152 data += 8;
153 }
154 if (bytes & 4) {
Mike Klein7a177b42019-06-17 17:17:47 -0500155 hash = __crc32w(hash, sk_unaligned_load<uint32_t>(data));
mtklein78559a72016-08-22 08:53:45 -0700156 data += 4;
157 }
158 if (bytes & 2) {
Mike Klein7a177b42019-06-17 17:17:47 -0500159 hash = __crc32h(hash, sk_unaligned_load<uint16_t>(data));
mtklein78559a72016-08-22 08:53:45 -0700160 data += 2;
161 }
162 if (bytes & 1) {
Mike Klein7a177b42019-06-17 17:17:47 -0500163 hash = __crc32b(hash, sk_unaligned_load<uint8_t>(data));
mtklein78559a72016-08-22 08:53:45 -0700164 }
165 return hash;
166 }
167
mtklein2f4114a2016-08-16 09:29:57 -0700168#else
169 // This is Murmur3.
Mike Kleincd71f112017-08-23 11:11:55 -0400170 /*not static*/ inline uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) {
mtklein2f4114a2016-08-16 09:29:57 -0700171 auto data = (const uint8_t*)vdata;
172
173 size_t original_bytes = bytes;
mtklein4e976072016-08-08 09:06:27 -0700174
175 // Handle 4 bytes at a time while possible.
mtklein2f4114a2016-08-16 09:29:57 -0700176 while (bytes >= 4) {
Mike Klein7a177b42019-06-17 17:17:47 -0500177 uint32_t k = sk_unaligned_load<uint32_t>(data);
mtklein4e976072016-08-08 09:06:27 -0700178 k *= 0xcc9e2d51;
179 k = (k << 15) | (k >> 17);
180 k *= 0x1b873593;
181
182 hash ^= k;
183 hash = (hash << 13) | (hash >> 19);
184 hash *= 5;
185 hash += 0xe6546b64;
mtklein2f4114a2016-08-16 09:29:57 -0700186
187 bytes -= 4;
188 data += 4;
mtklein4e976072016-08-08 09:06:27 -0700189 }
190
191 // Handle last 0-3 bytes.
mtklein4e976072016-08-08 09:06:27 -0700192 uint32_t k = 0;
193 switch (bytes & 3) {
mtklein2f4114a2016-08-16 09:29:57 -0700194 case 3: k ^= data[2] << 16;
195 case 2: k ^= data[1] << 8;
196 case 1: k ^= data[0] << 0;
mtklein4e976072016-08-08 09:06:27 -0700197 k *= 0xcc9e2d51;
198 k = (k << 15) | (k >> 17);
199 k *= 0x1b873593;
200 hash ^= k;
201 }
202
mtklein2f4114a2016-08-16 09:29:57 -0700203 hash ^= original_bytes;
mtklein4e976072016-08-08 09:06:27 -0700204 return SkChecksum::Mix(hash);
205 }
206#endif
207
208} // namespace SK_OPTS_NS
209
210#endif//SkChecksum_opts_DEFINED