Logan Chien | bedbf4f | 2020-01-06 19:35:19 -0800 | [diff] [blame] | 1 | /*===---- smmintrin.h - Implementation of SSE4 intrinsics on PowerPC -------=== |
| 2 | * |
| 3 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | * See https://llvm.org/LICENSE.txt for license information. |
| 5 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | * |
| 7 | *===-----------------------------------------------------------------------=== |
| 8 | */ |
| 9 | |
| 10 | /* Implemented from the specification included in the Intel C++ Compiler |
| 11 | User Guide and Reference, version 9.0. |
| 12 | |
| 13 | NOTE: This is NOT a complete implementation of the SSE4 intrinsics! */ |
| 14 | |
| 15 | #ifndef NO_WARN_X86_INTRINSICS |
| 16 | /* This header is distributed to simplify porting x86_64 code that |
| 17 | makes explicit use of Intel intrinsics to powerp64/powerpc64le. |
| 18 | |
| 19 | It is the user's responsibility to determine if the results are |
| 20 | acceptable and make additional changes as necessary. |
| 21 | |
| 22 | Note that much code that uses Intel intrinsics can be rewritten in |
| 23 | standard C or GNU C extensions, which are more portable and better |
| 24 | optimized across multiple targets. */ |
| 25 | #error \ |
| 26 | "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error." |
| 27 | #endif |
| 28 | |
| 29 | #ifndef SMMINTRIN_H_ |
| 30 | #define SMMINTRIN_H_ |
| 31 | |
| 32 | #if defined(__linux__) && defined(__ppc64__) |
| 33 | |
| 34 | #include <altivec.h> |
| 35 | #include <emmintrin.h> |
| 36 | |
| 37 | extern __inline int |
| 38 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
| 39 | _mm_extract_epi8(__m128i __X, const int __N) { |
| 40 | return (unsigned char)((__v16qi)__X)[__N & 15]; |
| 41 | } |
| 42 | |
| 43 | extern __inline int |
| 44 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
| 45 | _mm_extract_epi32(__m128i __X, const int __N) { |
| 46 | return ((__v4si)__X)[__N & 3]; |
| 47 | } |
| 48 | |
| 49 | extern __inline int |
| 50 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
| 51 | _mm_extract_epi64(__m128i __X, const int __N) { |
| 52 | return ((__v2di)__X)[__N & 1]; |
| 53 | } |
| 54 | |
| 55 | extern __inline int |
| 56 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
| 57 | _mm_extract_ps(__m128 __X, const int __N) { |
| 58 | return ((__v4si)__X)[__N & 3]; |
| 59 | } |
| 60 | |
| 61 | extern __inline __m128i |
| 62 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
| 63 | _mm_blend_epi16(__m128i __A, __m128i __B, const int __imm8) { |
| 64 | __v16qi __charmask = vec_splats((signed char)__imm8); |
| 65 | __charmask = vec_gb(__charmask); |
| 66 | __v8hu __shortmask = (__v8hu)vec_unpackh(__charmask); |
| 67 | #ifdef __BIG_ENDIAN__ |
| 68 | __shortmask = vec_reve(__shortmask); |
| 69 | #endif |
| 70 | return (__m128i)vec_sel((__v8hu)__A, (__v8hu)__B, __shortmask); |
| 71 | } |
| 72 | |
| 73 | extern __inline __m128i |
| 74 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
| 75 | _mm_blendv_epi8(__m128i __A, __m128i __B, __m128i __mask) { |
| 76 | const __v16qu __seven = vec_splats((unsigned char)0x07); |
| 77 | __v16qu __lmask = vec_sra((__v16qu)__mask, __seven); |
| 78 | return (__m128i)vec_sel((__v16qu)__A, (__v16qu)__B, __lmask); |
| 79 | } |
| 80 | |
Pirama Arumuga Nainar | 986b880 | 2021-06-03 16:00:34 -0700 | [diff] [blame] | 81 | extern __inline __m128i |
| 82 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
| 83 | _mm_insert_epi8(__m128i const __A, int const __D, int const __N) { |
| 84 | __v16qi result = (__v16qi)__A; |
| 85 | result[__N & 0xf] = __D; |
| 86 | return (__m128i)result; |
| 87 | } |
| 88 | |
| 89 | extern __inline __m128i |
| 90 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
| 91 | _mm_insert_epi32(__m128i const __A, int const __D, int const __N) { |
| 92 | __v4si result = (__v4si)__A; |
| 93 | result[__N & 3] = __D; |
| 94 | return (__m128i)result; |
| 95 | } |
| 96 | |
| 97 | extern __inline __m128i |
| 98 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
| 99 | _mm_insert_epi64(__m128i const __A, long long const __D, int const __N) { |
| 100 | __v2di result = (__v2di)__A; |
| 101 | result[__N & 1] = __D; |
| 102 | return (__m128i)result; |
| 103 | } |
| 104 | |
Logan Chien | bedbf4f | 2020-01-06 19:35:19 -0800 | [diff] [blame] | 105 | #else |
| 106 | #include_next <smmintrin.h> |
| 107 | #endif /* defined(__linux__) && defined(__ppc64__) */ |
| 108 | |
| 109 | #endif /* _SMMINTRIN_H_ */ |