blob: 3f2ef39c57255232d6ed3cd9e4b5e6609b360892 [file] [log] [blame]
mtklein4e976072016-08-08 09:06:27 -07001/*
2 * Copyright 2016 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SkChecksum_opts_DEFINED
9#define SkChecksum_opts_DEFINED
10
11#include "SkChecksum.h"
12#include "SkTypes.h"
13
14#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42
15 #include <immintrin.h>
Amaury Le Leyzour4c296332017-05-04 14:32:22 -070016#elif defined(SK_ARM_HAS_CRC32)
mtklein78559a72016-08-22 08:53:45 -070017 #include <arm_acle.h>
mtklein4e976072016-08-08 09:06:27 -070018#endif
19
mtklein4e976072016-08-08 09:06:27 -070020namespace SK_OPTS_NS {
21
Mike Klein1b9b7d52018-02-27 10:37:40 -050022template <typename T, typename P>
23static inline T unaligned_load(const P* p) {
24 T v;
25 memcpy(&v, p, sizeof(v));
26 return v;
mtklein2f4114a2016-08-16 09:29:57 -070027}
mtklein4e976072016-08-08 09:06:27 -070028
mtklein2f4114a2016-08-16 09:29:57 -070029#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 && (defined(__x86_64__) || defined(_M_X64))
30 // This is not a CRC32. It's Just A Hash that uses those instructions because they're fast.
Mike Kleincd71f112017-08-23 11:11:55 -040031 /*not static*/ inline uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t seed) {
mtklein4e976072016-08-08 09:06:27 -070032 auto data = (const uint8_t*)vdata;
33
34 // _mm_crc32_u64() operates on 64-bit registers, so we use uint64_t for a while.
35 uint64_t hash = seed;
36 if (bytes >= 24) {
37 // We'll create 3 independent hashes, each using _mm_crc32_u64()
38 // to hash 8 bytes per step. Both 3 and independent are important:
39 // we can execute 3 of these instructions in parallel on a single core.
40 uint64_t a = hash,
41 b = hash,
42 c = hash;
43 size_t steps = bytes/24;
44 while (steps --> 0) {
45 a = _mm_crc32_u64(a, unaligned_load<uint64_t>(data+ 0));
46 b = _mm_crc32_u64(b, unaligned_load<uint64_t>(data+ 8));
47 c = _mm_crc32_u64(c, unaligned_load<uint64_t>(data+16));
48 data += 24;
49 }
50 bytes %= 24;
51 hash = a^b^c;
52 }
53
54 SkASSERT(bytes < 24);
55 if (bytes >= 16) {
56 hash = _mm_crc32_u64(hash, unaligned_load<uint64_t>(data));
57 bytes -= 8;
58 data += 8;
59 }
60
61 SkASSERT(bytes < 16);
62 if (bytes & 8) {
63 hash = _mm_crc32_u64(hash, unaligned_load<uint64_t>(data));
64 data += 8;
65 }
66
67 // The remainder of these _mm_crc32_u*() operate on a 32-bit register.
68 // We don't lose anything here: only the bottom 32-bits were populated.
69 auto hash32 = (uint32_t)hash;
70
71 if (bytes & 4) {
72 hash32 = _mm_crc32_u32(hash32, unaligned_load<uint32_t>(data));
73 data += 4;
74 }
75 if (bytes & 2) {
76 hash32 = _mm_crc32_u16(hash32, unaligned_load<uint16_t>(data));
77 data += 2;
78 }
79 if (bytes & 1) {
80 hash32 = _mm_crc32_u8(hash32, unaligned_load<uint8_t>(data));
81 }
82 return hash32;
83 }
84
mtklein2f4114a2016-08-16 09:29:57 -070085#elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42
86 // 32-bit version of above, using _mm_crc32_u32() but not _mm_crc32_u64().
Mike Kleincd71f112017-08-23 11:11:55 -040087 /*not static*/ inline uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) {
mtklein2f4114a2016-08-16 09:29:57 -070088 auto data = (const uint8_t*)vdata;
mtklein4e976072016-08-08 09:06:27 -070089
mtklein2f4114a2016-08-16 09:29:57 -070090 if (bytes >= 12) {
91 // We'll create 3 independent hashes, each using _mm_crc32_u32()
92 // to hash 4 bytes per step. Both 3 and independent are important:
93 // we can execute 3 of these instructions in parallel on a single core.
94 uint32_t a = hash,
95 b = hash,
96 c = hash;
97 size_t steps = bytes/12;
98 while (steps --> 0) {
99 a = _mm_crc32_u32(a, unaligned_load<uint32_t>(data+0));
100 b = _mm_crc32_u32(b, unaligned_load<uint32_t>(data+4));
101 c = _mm_crc32_u32(c, unaligned_load<uint32_t>(data+8));
102 data += 12;
103 }
104 bytes %= 12;
105 hash = a^b^c;
106 }
107
108 SkASSERT(bytes < 12);
109 if (bytes >= 8) {
110 hash = _mm_crc32_u32(hash, unaligned_load<uint32_t>(data));
111 bytes -= 4;
112 data += 4;
113 }
114
115 SkASSERT(bytes < 8);
116 if (bytes & 4) {
117 hash = _mm_crc32_u32(hash, unaligned_load<uint32_t>(data));
118 data += 4;
119 }
120 if (bytes & 2) {
121 hash = _mm_crc32_u16(hash, unaligned_load<uint16_t>(data));
122 data += 2;
123 }
124 if (bytes & 1) {
125 hash = _mm_crc32_u8(hash, unaligned_load<uint8_t>(data));
126 }
127 return hash;
128 }
129
Amaury Le Leyzour4c296332017-05-04 14:32:22 -0700130#elif defined(SK_ARM_HAS_CRC32)
Mike Kleincd71f112017-08-23 11:11:55 -0400131 /*not static*/ inline uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) {
mtklein78559a72016-08-22 08:53:45 -0700132 auto data = (const uint8_t*)vdata;
133 if (bytes >= 24) {
134 uint32_t a = hash,
135 b = hash,
136 c = hash;
137 size_t steps = bytes/24;
138 while (steps --> 0) {
139 a = __crc32d(a, unaligned_load<uint64_t>(data+ 0));
140 b = __crc32d(b, unaligned_load<uint64_t>(data+ 8));
141 c = __crc32d(c, unaligned_load<uint64_t>(data+16));
142 data += 24;
143 }
144 bytes %= 24;
145 hash = a^b^c;
146 }
147
148 SkASSERT(bytes < 24);
149 if (bytes >= 16) {
150 hash = __crc32d(hash, unaligned_load<uint64_t>(data));
151 bytes -= 8;
152 data += 8;
153 }
154
155 SkASSERT(bytes < 16);
156 if (bytes & 8) {
157 hash = __crc32d(hash, unaligned_load<uint64_t>(data));
158 data += 8;
159 }
160 if (bytes & 4) {
161 hash = __crc32w(hash, unaligned_load<uint32_t>(data));
162 data += 4;
163 }
164 if (bytes & 2) {
165 hash = __crc32h(hash, unaligned_load<uint16_t>(data));
166 data += 2;
167 }
168 if (bytes & 1) {
169 hash = __crc32b(hash, unaligned_load<uint8_t>(data));
170 }
171 return hash;
172 }
173
mtklein2f4114a2016-08-16 09:29:57 -0700174#else
175 // This is Murmur3.
Mike Kleincd71f112017-08-23 11:11:55 -0400176 /*not static*/ inline uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) {
mtklein2f4114a2016-08-16 09:29:57 -0700177 auto data = (const uint8_t*)vdata;
178
179 size_t original_bytes = bytes;
mtklein4e976072016-08-08 09:06:27 -0700180
181 // Handle 4 bytes at a time while possible.
mtklein2f4114a2016-08-16 09:29:57 -0700182 while (bytes >= 4) {
183 uint32_t k = unaligned_load<uint32_t>(data);
mtklein4e976072016-08-08 09:06:27 -0700184 k *= 0xcc9e2d51;
185 k = (k << 15) | (k >> 17);
186 k *= 0x1b873593;
187
188 hash ^= k;
189 hash = (hash << 13) | (hash >> 19);
190 hash *= 5;
191 hash += 0xe6546b64;
mtklein2f4114a2016-08-16 09:29:57 -0700192
193 bytes -= 4;
194 data += 4;
mtklein4e976072016-08-08 09:06:27 -0700195 }
196
197 // Handle last 0-3 bytes.
mtklein4e976072016-08-08 09:06:27 -0700198 uint32_t k = 0;
199 switch (bytes & 3) {
mtklein2f4114a2016-08-16 09:29:57 -0700200 case 3: k ^= data[2] << 16;
201 case 2: k ^= data[1] << 8;
202 case 1: k ^= data[0] << 0;
mtklein4e976072016-08-08 09:06:27 -0700203 k *= 0xcc9e2d51;
204 k = (k << 15) | (k >> 17);
205 k *= 0x1b873593;
206 hash ^= k;
207 }
208
mtklein2f4114a2016-08-16 09:29:57 -0700209 hash ^= original_bytes;
mtklein4e976072016-08-08 09:06:27 -0700210 return SkChecksum::Mix(hash);
211 }
212#endif
213
214} // namespace SK_OPTS_NS
215
216#endif//SkChecksum_opts_DEFINED