Fairphone ODM | 25c12f5 | 2023-12-15 17:24:06 +0800 | [diff] [blame] | 1 | /* slide_hash_simd.h |
| 2 | * |
| 3 | * Copyright 2022 The Chromium Authors |
| 4 | * Use of this source code is governed by a BSD-style license that can be |
| 5 | * found in the Chromium source repository LICENSE file. |
| 6 | */ |
| 7 | |
| 8 | #ifndef SLIDE_HASH_SIMD_H |
| 9 | #define SLIDE_HASH_SIMD_H |
| 10 | |
| 11 | #include "deflate.h" |
| 12 | |
| 13 | #ifndef INLINE |
| 14 | #if defined(_MSC_VER) && !defined(__clang__) |
| 15 | #define INLINE __inline |
| 16 | #else |
| 17 | #define INLINE inline |
| 18 | #endif |
| 19 | #endif |
| 20 | |
| 21 | #if defined(CPU_NO_SIMD) |
| 22 | |
| 23 | #error SIMD has been disabled for your build target |
| 24 | |
| 25 | #elif defined(DEFLATE_SLIDE_HASH_SSE2) |
| 26 | |
| 27 | #include <emmintrin.h> /* SSE2 */ |
| 28 | |
| 29 | #define Z_SLIDE_INIT_SIMD(wsize) _mm_set1_epi16((ush)(wsize)) |
| 30 | |
| 31 | #define Z_SLIDE_HASH_SIMD(table, size, vector_wsize) \ |
| 32 | for (const Posf* const end = table + size; table != end;) { \ |
| 33 | __m128i vO = _mm_loadu_si128((__m128i *)(table + 0)); \ |
| 34 | vO = _mm_subs_epu16(vO, vector_wsize); \ |
| 35 | _mm_storeu_si128((__m128i *)(table + 0), vO); \ |
| 36 | table += 8; \ |
| 37 | } |
| 38 | |
| 39 | typedef __m128i z_vec128i_u16x8_t; |
| 40 | |
| 41 | #elif defined(DEFLATE_SLIDE_HASH_NEON) |
| 42 | |
| 43 | #include <arm_neon.h> /* NEON */ |
| 44 | |
| 45 | #define Z_SLIDE_INIT_SIMD(wsize) vdupq_n_u16((ush)(wsize)) |
| 46 | |
| 47 | #define Z_SLIDE_HASH_SIMD(table, size, vector_wsize) \ |
| 48 | for (const Posf* const end = table + size; table != end;) { \ |
| 49 | uint16x8_t vO = vld1q_u16(table + 0); \ |
| 50 | uint16x8_t v8 = vld1q_u16(table + 8); \ |
| 51 | vO = vqsubq_u16(vO, vector_wsize); \ |
| 52 | v8 = vqsubq_u16(v8, vector_wsize); \ |
| 53 | vst1q_u16(table + 0, vO); \ |
| 54 | vst1q_u16(table + 8, v8); \ |
| 55 | table += 8 + 8; \ |
| 56 | } |
| 57 | |
| 58 | typedef uint16x8_t z_vec128i_u16x8_t; |
| 59 | |
| 60 | #else |
| 61 | |
| 62 | #error slide_hash_simd is not defined for your build target |
| 63 | |
| 64 | #endif |
| 65 | |
| 66 | /* =========================================================================== |
| 67 | * Slide the hash table when sliding the window down (could be avoided with 32 |
| 68 | * bit values at the expense of memory usage). We slide even when level == 0 to |
| 69 | * keep the hash table consistent if we switch back to level > 0 later. |
| 70 | */ |
| 71 | local INLINE void slide_hash_simd( |
| 72 | Posf *head, Posf *prev, const uInt w_size, const uInt hash_size) { |
| 73 | /* |
| 74 | * The SIMD implementation of the hash table slider assumes: |
| 75 | * |
| 76 | * 1. hash chain offset is 2 bytes. Should be true as Pos is "ush" type. |
| 77 | */ |
| 78 | Assert(sizeof(Pos) == 2, "Pos type size error: should be 2 bytes"); |
| 79 | Assert(sizeof(ush) == 2, "ush type size error: should be 2 bytes"); |
| 80 | |
| 81 | Assert(hash_size <= (1 << 16), "Hash table maximum size error"); |
| 82 | Assert(hash_size >= (1 << 8), "Hash table minimum size error"); |
| 83 | Assert(w_size == (ush)w_size, "Prev table size error"); |
| 84 | |
| 85 | /* |
| 86 | * 2. The hash & prev table sizes are a multiple of 32 bytes (256 bits), |
| 87 | * since the NEON table slider moves two 128-bit items per loop (loop is |
| 88 | * unrolled on NEON for performance, see http://crbug.com/863257). |
| 89 | */ |
| 90 | Assert(!((hash_size * sizeof(head[0])) & (32 - 1)), |
| 91 | "Hash table size error: should be a multiple of 32 bytes"); |
| 92 | Assert(!((w_size * sizeof(prev[0])) & (32 - 1)), |
| 93 | "Prev table size error: should be a multiple of 32 bytes"); |
| 94 | |
| 95 | /* |
| 96 | * Duplicate (ush)w_size in each uint16_t component of a 128-bit vector. |
| 97 | */ |
| 98 | const z_vec128i_u16x8_t vec_wsize = Z_SLIDE_INIT_SIMD(w_size); |
| 99 | |
| 100 | /* |
| 101 | * Slide {head,prev} hash chain values: subtracts (ush)w_size from every |
| 102 | * value with a saturating SIMD subtract, to clamp the result to 0(NIL), |
| 103 | * to implement slide_hash() `(m >= wsize ? m - wsize : NIL);` code. |
| 104 | */ |
| 105 | Z_SLIDE_HASH_SIMD(head, hash_size, vec_wsize); |
| 106 | #ifndef FASTEST |
| 107 | Z_SLIDE_HASH_SIMD(prev, w_size, vec_wsize); |
| 108 | #endif |
| 109 | |
| 110 | } |
| 111 | |
| 112 | #undef z_vec128i_u16x8_t |
| 113 | #undef Z_SLIDE_HASH_SIMD |
| 114 | #undef Z_SLIDE_INIT_SIMD |
| 115 | |
| 116 | #endif /* SLIDE_HASH_SIMD_H */ |