mtklein | 4e97607 | 2016-08-08 09:06:27 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2016 Google Inc. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license that can be |
| 5 | * found in the LICENSE file. |
| 6 | */ |
| 7 | |
| 8 | #ifndef SkChecksum_opts_DEFINED |
| 9 | #define SkChecksum_opts_DEFINED |
| 10 | |
Mike Klein | c0bd9f9 | 2019-04-23 12:05:21 -0500 | [diff] [blame] | 11 | #include "include/core/SkTypes.h" |
| 12 | #include "include/private/SkChecksum.h" |
Mike Klein | 7a177b4 | 2019-06-17 17:17:47 -0500 | [diff] [blame] | 13 | #include "src/core/SkUtils.h" // sk_unaligned_load |
mtklein | 4e97607 | 2016-08-08 09:06:27 -0700 | [diff] [blame] | 14 | |
| 15 | #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 |
| 16 | #include <immintrin.h> |
Amaury Le Leyzour | 4c29633 | 2017-05-04 14:32:22 -0700 | [diff] [blame] | 17 | #elif defined(SK_ARM_HAS_CRC32) |
mtklein | 78559a7 | 2016-08-22 08:53:45 -0700 | [diff] [blame] | 18 | #include <arm_acle.h> |
mtklein | 4e97607 | 2016-08-08 09:06:27 -0700 | [diff] [blame] | 19 | #endif |
| 20 | |
mtklein | 4e97607 | 2016-08-08 09:06:27 -0700 | [diff] [blame] | 21 | namespace SK_OPTS_NS { |
| 22 | |
mtklein | 2f4114a | 2016-08-16 09:29:57 -0700 | [diff] [blame] | 23 | #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 && (defined(__x86_64__) || defined(_M_X64)) |
| 24 | // This is not a CRC32. It's Just A Hash that uses those instructions because they're fast. |
Mike Klein | cd71f11 | 2017-08-23 11:11:55 -0400 | [diff] [blame] | 25 | /*not static*/ inline uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t seed) { |
mtklein | 4e97607 | 2016-08-08 09:06:27 -0700 | [diff] [blame] | 26 | auto data = (const uint8_t*)vdata; |
| 27 | |
| 28 | // _mm_crc32_u64() operates on 64-bit registers, so we use uint64_t for a while. |
| 29 | uint64_t hash = seed; |
| 30 | if (bytes >= 24) { |
| 31 | // We'll create 3 independent hashes, each using _mm_crc32_u64() |
| 32 | // to hash 8 bytes per step. Both 3 and independent are important: |
| 33 | // we can execute 3 of these instructions in parallel on a single core. |
| 34 | uint64_t a = hash, |
| 35 | b = hash, |
| 36 | c = hash; |
| 37 | size_t steps = bytes/24; |
| 38 | while (steps --> 0) { |
Mike Klein | 7a177b4 | 2019-06-17 17:17:47 -0500 | [diff] [blame] | 39 | a = _mm_crc32_u64(a, sk_unaligned_load<uint64_t>(data+ 0)); |
| 40 | b = _mm_crc32_u64(b, sk_unaligned_load<uint64_t>(data+ 8)); |
| 41 | c = _mm_crc32_u64(c, sk_unaligned_load<uint64_t>(data+16)); |
mtklein | 4e97607 | 2016-08-08 09:06:27 -0700 | [diff] [blame] | 42 | data += 24; |
| 43 | } |
| 44 | bytes %= 24; |
Mike Klein | fd69b6d | 2018-10-04 13:58:31 -0400 | [diff] [blame] | 45 | hash = _mm_crc32_u32(a, _mm_crc32_u32(b, c)); |
mtklein | 4e97607 | 2016-08-08 09:06:27 -0700 | [diff] [blame] | 46 | } |
| 47 | |
| 48 | SkASSERT(bytes < 24); |
| 49 | if (bytes >= 16) { |
Mike Klein | 7a177b4 | 2019-06-17 17:17:47 -0500 | [diff] [blame] | 50 | hash = _mm_crc32_u64(hash, sk_unaligned_load<uint64_t>(data)); |
mtklein | 4e97607 | 2016-08-08 09:06:27 -0700 | [diff] [blame] | 51 | bytes -= 8; |
| 52 | data += 8; |
| 53 | } |
| 54 | |
| 55 | SkASSERT(bytes < 16); |
| 56 | if (bytes & 8) { |
Mike Klein | 7a177b4 | 2019-06-17 17:17:47 -0500 | [diff] [blame] | 57 | hash = _mm_crc32_u64(hash, sk_unaligned_load<uint64_t>(data)); |
mtklein | 4e97607 | 2016-08-08 09:06:27 -0700 | [diff] [blame] | 58 | data += 8; |
| 59 | } |
| 60 | |
| 61 | // The remainder of these _mm_crc32_u*() operate on a 32-bit register. |
| 62 | // We don't lose anything here: only the bottom 32-bits were populated. |
| 63 | auto hash32 = (uint32_t)hash; |
| 64 | |
| 65 | if (bytes & 4) { |
Mike Klein | 7a177b4 | 2019-06-17 17:17:47 -0500 | [diff] [blame] | 66 | hash32 = _mm_crc32_u32(hash32, sk_unaligned_load<uint32_t>(data)); |
mtklein | 4e97607 | 2016-08-08 09:06:27 -0700 | [diff] [blame] | 67 | data += 4; |
| 68 | } |
| 69 | if (bytes & 2) { |
Mike Klein | 7a177b4 | 2019-06-17 17:17:47 -0500 | [diff] [blame] | 70 | hash32 = _mm_crc32_u16(hash32, sk_unaligned_load<uint16_t>(data)); |
mtklein | 4e97607 | 2016-08-08 09:06:27 -0700 | [diff] [blame] | 71 | data += 2; |
| 72 | } |
| 73 | if (bytes & 1) { |
Mike Klein | 7a177b4 | 2019-06-17 17:17:47 -0500 | [diff] [blame] | 74 | hash32 = _mm_crc32_u8(hash32, sk_unaligned_load<uint8_t>(data)); |
mtklein | 4e97607 | 2016-08-08 09:06:27 -0700 | [diff] [blame] | 75 | } |
| 76 | return hash32; |
| 77 | } |
| 78 | |
mtklein | 2f4114a | 2016-08-16 09:29:57 -0700 | [diff] [blame] | 79 | #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 |
| 80 | // 32-bit version of above, using _mm_crc32_u32() but not _mm_crc32_u64(). |
Mike Klein | cd71f11 | 2017-08-23 11:11:55 -0400 | [diff] [blame] | 81 | /*not static*/ inline uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) { |
mtklein | 2f4114a | 2016-08-16 09:29:57 -0700 | [diff] [blame] | 82 | auto data = (const uint8_t*)vdata; |
mtklein | 4e97607 | 2016-08-08 09:06:27 -0700 | [diff] [blame] | 83 | |
mtklein | 2f4114a | 2016-08-16 09:29:57 -0700 | [diff] [blame] | 84 | if (bytes >= 12) { |
| 85 | // We'll create 3 independent hashes, each using _mm_crc32_u32() |
| 86 | // to hash 4 bytes per step. Both 3 and independent are important: |
| 87 | // we can execute 3 of these instructions in parallel on a single core. |
| 88 | uint32_t a = hash, |
| 89 | b = hash, |
| 90 | c = hash; |
| 91 | size_t steps = bytes/12; |
| 92 | while (steps --> 0) { |
Mike Klein | 7a177b4 | 2019-06-17 17:17:47 -0500 | [diff] [blame] | 93 | a = _mm_crc32_u32(a, sk_unaligned_load<uint32_t>(data+0)); |
| 94 | b = _mm_crc32_u32(b, sk_unaligned_load<uint32_t>(data+4)); |
| 95 | c = _mm_crc32_u32(c, sk_unaligned_load<uint32_t>(data+8)); |
mtklein | 2f4114a | 2016-08-16 09:29:57 -0700 | [diff] [blame] | 96 | data += 12; |
| 97 | } |
| 98 | bytes %= 12; |
Mike Klein | fd69b6d | 2018-10-04 13:58:31 -0400 | [diff] [blame] | 99 | hash = _mm_crc32_u32(a, _mm_crc32_u32(b, c)); |
mtklein | 2f4114a | 2016-08-16 09:29:57 -0700 | [diff] [blame] | 100 | } |
| 101 | |
| 102 | SkASSERT(bytes < 12); |
| 103 | if (bytes >= 8) { |
Mike Klein | 7a177b4 | 2019-06-17 17:17:47 -0500 | [diff] [blame] | 104 | hash = _mm_crc32_u32(hash, sk_unaligned_load<uint32_t>(data)); |
mtklein | 2f4114a | 2016-08-16 09:29:57 -0700 | [diff] [blame] | 105 | bytes -= 4; |
| 106 | data += 4; |
| 107 | } |
| 108 | |
| 109 | SkASSERT(bytes < 8); |
| 110 | if (bytes & 4) { |
Mike Klein | 7a177b4 | 2019-06-17 17:17:47 -0500 | [diff] [blame] | 111 | hash = _mm_crc32_u32(hash, sk_unaligned_load<uint32_t>(data)); |
mtklein | 2f4114a | 2016-08-16 09:29:57 -0700 | [diff] [blame] | 112 | data += 4; |
| 113 | } |
| 114 | if (bytes & 2) { |
Mike Klein | 7a177b4 | 2019-06-17 17:17:47 -0500 | [diff] [blame] | 115 | hash = _mm_crc32_u16(hash, sk_unaligned_load<uint16_t>(data)); |
mtklein | 2f4114a | 2016-08-16 09:29:57 -0700 | [diff] [blame] | 116 | data += 2; |
| 117 | } |
| 118 | if (bytes & 1) { |
Mike Klein | 7a177b4 | 2019-06-17 17:17:47 -0500 | [diff] [blame] | 119 | hash = _mm_crc32_u8(hash, sk_unaligned_load<uint8_t>(data)); |
mtklein | 2f4114a | 2016-08-16 09:29:57 -0700 | [diff] [blame] | 120 | } |
| 121 | return hash; |
| 122 | } |
| 123 | |
Amaury Le Leyzour | 4c29633 | 2017-05-04 14:32:22 -0700 | [diff] [blame] | 124 | #elif defined(SK_ARM_HAS_CRC32) |
Mike Klein | cd71f11 | 2017-08-23 11:11:55 -0400 | [diff] [blame] | 125 | /*not static*/ inline uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) { |
mtklein | 78559a7 | 2016-08-22 08:53:45 -0700 | [diff] [blame] | 126 | auto data = (const uint8_t*)vdata; |
| 127 | if (bytes >= 24) { |
| 128 | uint32_t a = hash, |
| 129 | b = hash, |
| 130 | c = hash; |
| 131 | size_t steps = bytes/24; |
| 132 | while (steps --> 0) { |
Mike Klein | 7a177b4 | 2019-06-17 17:17:47 -0500 | [diff] [blame] | 133 | a = __crc32d(a, sk_unaligned_load<uint64_t>(data+ 0)); |
| 134 | b = __crc32d(b, sk_unaligned_load<uint64_t>(data+ 8)); |
| 135 | c = __crc32d(c, sk_unaligned_load<uint64_t>(data+16)); |
mtklein | 78559a7 | 2016-08-22 08:53:45 -0700 | [diff] [blame] | 136 | data += 24; |
| 137 | } |
| 138 | bytes %= 24; |
Mike Klein | fd69b6d | 2018-10-04 13:58:31 -0400 | [diff] [blame] | 139 | hash = __crc32w(a, __crc32w(b, c)); |
mtklein | 78559a7 | 2016-08-22 08:53:45 -0700 | [diff] [blame] | 140 | } |
| 141 | |
| 142 | SkASSERT(bytes < 24); |
| 143 | if (bytes >= 16) { |
Mike Klein | 7a177b4 | 2019-06-17 17:17:47 -0500 | [diff] [blame] | 144 | hash = __crc32d(hash, sk_unaligned_load<uint64_t>(data)); |
mtklein | 78559a7 | 2016-08-22 08:53:45 -0700 | [diff] [blame] | 145 | bytes -= 8; |
| 146 | data += 8; |
| 147 | } |
| 148 | |
| 149 | SkASSERT(bytes < 16); |
| 150 | if (bytes & 8) { |
Mike Klein | 7a177b4 | 2019-06-17 17:17:47 -0500 | [diff] [blame] | 151 | hash = __crc32d(hash, sk_unaligned_load<uint64_t>(data)); |
mtklein | 78559a7 | 2016-08-22 08:53:45 -0700 | [diff] [blame] | 152 | data += 8; |
| 153 | } |
| 154 | if (bytes & 4) { |
Mike Klein | 7a177b4 | 2019-06-17 17:17:47 -0500 | [diff] [blame] | 155 | hash = __crc32w(hash, sk_unaligned_load<uint32_t>(data)); |
mtklein | 78559a7 | 2016-08-22 08:53:45 -0700 | [diff] [blame] | 156 | data += 4; |
| 157 | } |
| 158 | if (bytes & 2) { |
Mike Klein | 7a177b4 | 2019-06-17 17:17:47 -0500 | [diff] [blame] | 159 | hash = __crc32h(hash, sk_unaligned_load<uint16_t>(data)); |
mtklein | 78559a7 | 2016-08-22 08:53:45 -0700 | [diff] [blame] | 160 | data += 2; |
| 161 | } |
| 162 | if (bytes & 1) { |
Mike Klein | 7a177b4 | 2019-06-17 17:17:47 -0500 | [diff] [blame] | 163 | hash = __crc32b(hash, sk_unaligned_load<uint8_t>(data)); |
mtklein | 78559a7 | 2016-08-22 08:53:45 -0700 | [diff] [blame] | 164 | } |
| 165 | return hash; |
| 166 | } |
| 167 | |
mtklein | 2f4114a | 2016-08-16 09:29:57 -0700 | [diff] [blame] | 168 | #else |
| 169 | // This is Murmur3. |
Mike Klein | cd71f11 | 2017-08-23 11:11:55 -0400 | [diff] [blame] | 170 | /*not static*/ inline uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) { |
mtklein | 2f4114a | 2016-08-16 09:29:57 -0700 | [diff] [blame] | 171 | auto data = (const uint8_t*)vdata; |
| 172 | |
| 173 | size_t original_bytes = bytes; |
mtklein | 4e97607 | 2016-08-08 09:06:27 -0700 | [diff] [blame] | 174 | |
| 175 | // Handle 4 bytes at a time while possible. |
mtklein | 2f4114a | 2016-08-16 09:29:57 -0700 | [diff] [blame] | 176 | while (bytes >= 4) { |
Mike Klein | 7a177b4 | 2019-06-17 17:17:47 -0500 | [diff] [blame] | 177 | uint32_t k = sk_unaligned_load<uint32_t>(data); |
mtklein | 4e97607 | 2016-08-08 09:06:27 -0700 | [diff] [blame] | 178 | k *= 0xcc9e2d51; |
| 179 | k = (k << 15) | (k >> 17); |
| 180 | k *= 0x1b873593; |
| 181 | |
| 182 | hash ^= k; |
| 183 | hash = (hash << 13) | (hash >> 19); |
| 184 | hash *= 5; |
| 185 | hash += 0xe6546b64; |
mtklein | 2f4114a | 2016-08-16 09:29:57 -0700 | [diff] [blame] | 186 | |
| 187 | bytes -= 4; |
| 188 | data += 4; |
mtklein | 4e97607 | 2016-08-08 09:06:27 -0700 | [diff] [blame] | 189 | } |
| 190 | |
| 191 | // Handle last 0-3 bytes. |
mtklein | 4e97607 | 2016-08-08 09:06:27 -0700 | [diff] [blame] | 192 | uint32_t k = 0; |
| 193 | switch (bytes & 3) { |
mtklein | 2f4114a | 2016-08-16 09:29:57 -0700 | [diff] [blame] | 194 | case 3: k ^= data[2] << 16; |
| 195 | case 2: k ^= data[1] << 8; |
| 196 | case 1: k ^= data[0] << 0; |
mtklein | 4e97607 | 2016-08-08 09:06:27 -0700 | [diff] [blame] | 197 | k *= 0xcc9e2d51; |
| 198 | k = (k << 15) | (k >> 17); |
| 199 | k *= 0x1b873593; |
| 200 | hash ^= k; |
| 201 | } |
| 202 | |
mtklein | 2f4114a | 2016-08-16 09:29:57 -0700 | [diff] [blame] | 203 | hash ^= original_bytes; |
mtklein | 4e97607 | 2016-08-08 09:06:27 -0700 | [diff] [blame] | 204 | return SkChecksum::Mix(hash); |
| 205 | } |
| 206 | #endif |
| 207 | |
| 208 | } // namespace SK_OPTS_NS |
| 209 | |
| 210 | #endif//SkChecksum_opts_DEFINED |