blob: 7873516053ecee35580a839fe32bb349a6c5e042 [file] [log] [blame]
Logan Chien2833ffb2018-10-09 10:03:24 +08001/*===---- avx512vlbwintrin.h - AVX512VL and AVX512BW intrinsics ------------===
2 *
Logan Chiendf4f7662019-09-04 16:45:23 -07003 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Logan Chien2833ffb2018-10-09 10:03:24 +08006 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#error "Never use <avx512vlbwintrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifndef __AVX512VLBWINTRIN_H
15#define __AVX512VLBWINTRIN_H
16
17/* Define the default attributes for the functions in this file. */
Logan Chien55afb0a2018-10-15 10:42:14 +080018#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(128)))
19#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(256)))
Logan Chien2833ffb2018-10-09 10:03:24 +080020
21/* Integer compare */
22
Logan Chien55afb0a2018-10-15 10:42:14 +080023#define _mm_cmp_epi8_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -080024 ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
25 (__v16qi)(__m128i)(b), (int)(p), \
26 (__mmask16)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +080027
Logan Chien55afb0a2018-10-15 10:42:14 +080028#define _mm_mask_cmp_epi8_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -080029 ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
30 (__v16qi)(__m128i)(b), (int)(p), \
31 (__mmask16)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +080032
Logan Chien55afb0a2018-10-15 10:42:14 +080033#define _mm_cmp_epu8_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -080034 ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
35 (__v16qi)(__m128i)(b), (int)(p), \
36 (__mmask16)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +080037
Logan Chien55afb0a2018-10-15 10:42:14 +080038#define _mm_mask_cmp_epu8_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -080039 ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
40 (__v16qi)(__m128i)(b), (int)(p), \
41 (__mmask16)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +080042
Logan Chien55afb0a2018-10-15 10:42:14 +080043#define _mm256_cmp_epi8_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -080044 ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
45 (__v32qi)(__m256i)(b), (int)(p), \
46 (__mmask32)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +080047
Logan Chien55afb0a2018-10-15 10:42:14 +080048#define _mm256_mask_cmp_epi8_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -080049 ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
50 (__v32qi)(__m256i)(b), (int)(p), \
51 (__mmask32)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +080052
Logan Chien55afb0a2018-10-15 10:42:14 +080053#define _mm256_cmp_epu8_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -080054 ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
55 (__v32qi)(__m256i)(b), (int)(p), \
56 (__mmask32)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +080057
Logan Chien55afb0a2018-10-15 10:42:14 +080058#define _mm256_mask_cmp_epu8_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -080059 ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
60 (__v32qi)(__m256i)(b), (int)(p), \
61 (__mmask32)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +080062
Logan Chien55afb0a2018-10-15 10:42:14 +080063#define _mm_cmp_epi16_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -080064 ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
65 (__v8hi)(__m128i)(b), (int)(p), \
66 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +080067
Logan Chien55afb0a2018-10-15 10:42:14 +080068#define _mm_mask_cmp_epi16_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -080069 ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
70 (__v8hi)(__m128i)(b), (int)(p), \
71 (__mmask8)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +080072
Logan Chien55afb0a2018-10-15 10:42:14 +080073#define _mm_cmp_epu16_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -080074 ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
75 (__v8hi)(__m128i)(b), (int)(p), \
76 (__mmask8)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +080077
Logan Chien55afb0a2018-10-15 10:42:14 +080078#define _mm_mask_cmp_epu16_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -080079 ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
80 (__v8hi)(__m128i)(b), (int)(p), \
81 (__mmask8)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +080082
Logan Chien55afb0a2018-10-15 10:42:14 +080083#define _mm256_cmp_epi16_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -080084 ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
85 (__v16hi)(__m256i)(b), (int)(p), \
86 (__mmask16)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +080087
Logan Chien55afb0a2018-10-15 10:42:14 +080088#define _mm256_mask_cmp_epi16_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -080089 ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
90 (__v16hi)(__m256i)(b), (int)(p), \
91 (__mmask16)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +080092
Logan Chien55afb0a2018-10-15 10:42:14 +080093#define _mm256_cmp_epu16_mask(a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -080094 ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
95 (__v16hi)(__m256i)(b), (int)(p), \
96 (__mmask16)-1))
Logan Chien2833ffb2018-10-09 10:03:24 +080097
Logan Chien55afb0a2018-10-15 10:42:14 +080098#define _mm256_mask_cmp_epu16_mask(m, a, b, p) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -080099 ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
100 (__v16hi)(__m256i)(b), (int)(p), \
101 (__mmask16)(m)))
Logan Chien2833ffb2018-10-09 10:03:24 +0800102
Logan Chien55afb0a2018-10-15 10:42:14 +0800103#define _mm_cmpeq_epi8_mask(A, B) \
104 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
105#define _mm_mask_cmpeq_epi8_mask(k, A, B) \
106 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
107#define _mm_cmpge_epi8_mask(A, B) \
108 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
109#define _mm_mask_cmpge_epi8_mask(k, A, B) \
110 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
111#define _mm_cmpgt_epi8_mask(A, B) \
112 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
113#define _mm_mask_cmpgt_epi8_mask(k, A, B) \
114 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
115#define _mm_cmple_epi8_mask(A, B) \
116 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
117#define _mm_mask_cmple_epi8_mask(k, A, B) \
118 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
119#define _mm_cmplt_epi8_mask(A, B) \
120 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
121#define _mm_mask_cmplt_epi8_mask(k, A, B) \
122 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
123#define _mm_cmpneq_epi8_mask(A, B) \
124 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
125#define _mm_mask_cmpneq_epi8_mask(k, A, B) \
126 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
Logan Chien2833ffb2018-10-09 10:03:24 +0800127
Logan Chien55afb0a2018-10-15 10:42:14 +0800128#define _mm256_cmpeq_epi8_mask(A, B) \
129 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
130#define _mm256_mask_cmpeq_epi8_mask(k, A, B) \
131 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
132#define _mm256_cmpge_epi8_mask(A, B) \
133 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
134#define _mm256_mask_cmpge_epi8_mask(k, A, B) \
135 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
136#define _mm256_cmpgt_epi8_mask(A, B) \
137 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
138#define _mm256_mask_cmpgt_epi8_mask(k, A, B) \
139 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
140#define _mm256_cmple_epi8_mask(A, B) \
141 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
142#define _mm256_mask_cmple_epi8_mask(k, A, B) \
143 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
144#define _mm256_cmplt_epi8_mask(A, B) \
145 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
146#define _mm256_mask_cmplt_epi8_mask(k, A, B) \
147 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
148#define _mm256_cmpneq_epi8_mask(A, B) \
149 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
150#define _mm256_mask_cmpneq_epi8_mask(k, A, B) \
151 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
Logan Chien2833ffb2018-10-09 10:03:24 +0800152
Logan Chien55afb0a2018-10-15 10:42:14 +0800153#define _mm_cmpeq_epu8_mask(A, B) \
154 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
155#define _mm_mask_cmpeq_epu8_mask(k, A, B) \
156 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
157#define _mm_cmpge_epu8_mask(A, B) \
158 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
159#define _mm_mask_cmpge_epu8_mask(k, A, B) \
160 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
161#define _mm_cmpgt_epu8_mask(A, B) \
162 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
163#define _mm_mask_cmpgt_epu8_mask(k, A, B) \
164 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
165#define _mm_cmple_epu8_mask(A, B) \
166 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
167#define _mm_mask_cmple_epu8_mask(k, A, B) \
168 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
169#define _mm_cmplt_epu8_mask(A, B) \
170 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
171#define _mm_mask_cmplt_epu8_mask(k, A, B) \
172 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
173#define _mm_cmpneq_epu8_mask(A, B) \
174 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
175#define _mm_mask_cmpneq_epu8_mask(k, A, B) \
176 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
Logan Chien2833ffb2018-10-09 10:03:24 +0800177
Logan Chien55afb0a2018-10-15 10:42:14 +0800178#define _mm256_cmpeq_epu8_mask(A, B) \
179 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
180#define _mm256_mask_cmpeq_epu8_mask(k, A, B) \
181 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
182#define _mm256_cmpge_epu8_mask(A, B) \
183 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
184#define _mm256_mask_cmpge_epu8_mask(k, A, B) \
185 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
186#define _mm256_cmpgt_epu8_mask(A, B) \
187 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
188#define _mm256_mask_cmpgt_epu8_mask(k, A, B) \
189 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
190#define _mm256_cmple_epu8_mask(A, B) \
191 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
192#define _mm256_mask_cmple_epu8_mask(k, A, B) \
193 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
194#define _mm256_cmplt_epu8_mask(A, B) \
195 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
196#define _mm256_mask_cmplt_epu8_mask(k, A, B) \
197 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
198#define _mm256_cmpneq_epu8_mask(A, B) \
199 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
200#define _mm256_mask_cmpneq_epu8_mask(k, A, B) \
201 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
Logan Chien2833ffb2018-10-09 10:03:24 +0800202
Logan Chien55afb0a2018-10-15 10:42:14 +0800203#define _mm_cmpeq_epi16_mask(A, B) \
204 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
205#define _mm_mask_cmpeq_epi16_mask(k, A, B) \
206 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
207#define _mm_cmpge_epi16_mask(A, B) \
208 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
209#define _mm_mask_cmpge_epi16_mask(k, A, B) \
210 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
211#define _mm_cmpgt_epi16_mask(A, B) \
212 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
213#define _mm_mask_cmpgt_epi16_mask(k, A, B) \
214 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
215#define _mm_cmple_epi16_mask(A, B) \
216 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
217#define _mm_mask_cmple_epi16_mask(k, A, B) \
218 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
219#define _mm_cmplt_epi16_mask(A, B) \
220 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
221#define _mm_mask_cmplt_epi16_mask(k, A, B) \
222 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
223#define _mm_cmpneq_epi16_mask(A, B) \
224 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
225#define _mm_mask_cmpneq_epi16_mask(k, A, B) \
226 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
Logan Chien2833ffb2018-10-09 10:03:24 +0800227
Logan Chien55afb0a2018-10-15 10:42:14 +0800228#define _mm256_cmpeq_epi16_mask(A, B) \
229 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
230#define _mm256_mask_cmpeq_epi16_mask(k, A, B) \
231 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
232#define _mm256_cmpge_epi16_mask(A, B) \
233 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
234#define _mm256_mask_cmpge_epi16_mask(k, A, B) \
235 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
236#define _mm256_cmpgt_epi16_mask(A, B) \
237 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
238#define _mm256_mask_cmpgt_epi16_mask(k, A, B) \
239 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
240#define _mm256_cmple_epi16_mask(A, B) \
241 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
242#define _mm256_mask_cmple_epi16_mask(k, A, B) \
243 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
244#define _mm256_cmplt_epi16_mask(A, B) \
245 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
246#define _mm256_mask_cmplt_epi16_mask(k, A, B) \
247 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
248#define _mm256_cmpneq_epi16_mask(A, B) \
249 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
250#define _mm256_mask_cmpneq_epi16_mask(k, A, B) \
251 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
Logan Chien2833ffb2018-10-09 10:03:24 +0800252
Logan Chien55afb0a2018-10-15 10:42:14 +0800253#define _mm_cmpeq_epu16_mask(A, B) \
254 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
255#define _mm_mask_cmpeq_epu16_mask(k, A, B) \
256 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
257#define _mm_cmpge_epu16_mask(A, B) \
258 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
259#define _mm_mask_cmpge_epu16_mask(k, A, B) \
260 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
261#define _mm_cmpgt_epu16_mask(A, B) \
262 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
263#define _mm_mask_cmpgt_epu16_mask(k, A, B) \
264 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
265#define _mm_cmple_epu16_mask(A, B) \
266 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
267#define _mm_mask_cmple_epu16_mask(k, A, B) \
268 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
269#define _mm_cmplt_epu16_mask(A, B) \
270 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
271#define _mm_mask_cmplt_epu16_mask(k, A, B) \
272 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
273#define _mm_cmpneq_epu16_mask(A, B) \
274 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
275#define _mm_mask_cmpneq_epu16_mask(k, A, B) \
276 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
Logan Chien2833ffb2018-10-09 10:03:24 +0800277
Logan Chien55afb0a2018-10-15 10:42:14 +0800278#define _mm256_cmpeq_epu16_mask(A, B) \
279 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
280#define _mm256_mask_cmpeq_epu16_mask(k, A, B) \
281 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
282#define _mm256_cmpge_epu16_mask(A, B) \
283 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
284#define _mm256_mask_cmpge_epu16_mask(k, A, B) \
285 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
286#define _mm256_cmpgt_epu16_mask(A, B) \
287 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
288#define _mm256_mask_cmpgt_epu16_mask(k, A, B) \
289 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
290#define _mm256_cmple_epu16_mask(A, B) \
291 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
292#define _mm256_mask_cmple_epu16_mask(k, A, B) \
293 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
294#define _mm256_cmplt_epu16_mask(A, B) \
295 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
296#define _mm256_mask_cmplt_epu16_mask(k, A, B) \
297 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
298#define _mm256_cmpneq_epu16_mask(A, B) \
299 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
300#define _mm256_mask_cmpneq_epu16_mask(k, A, B) \
301 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
Logan Chien2833ffb2018-10-09 10:03:24 +0800302
Logan Chien55afb0a2018-10-15 10:42:14 +0800303static __inline__ __m256i __DEFAULT_FN_ATTRS256
304_mm256_mask_add_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B){
305 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
306 (__v32qi)_mm256_add_epi8(__A, __B),
307 (__v32qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800308}
309
Logan Chien55afb0a2018-10-15 10:42:14 +0800310static __inline__ __m256i __DEFAULT_FN_ATTRS256
311_mm256_maskz_add_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
312 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
313 (__v32qi)_mm256_add_epi8(__A, __B),
314 (__v32qi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800315}
316
Logan Chien55afb0a2018-10-15 10:42:14 +0800317static __inline__ __m256i __DEFAULT_FN_ATTRS256
318_mm256_mask_add_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
319 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
320 (__v16hi)_mm256_add_epi16(__A, __B),
321 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800322}
323
Logan Chien55afb0a2018-10-15 10:42:14 +0800324static __inline__ __m256i __DEFAULT_FN_ATTRS256
325_mm256_maskz_add_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
326 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
327 (__v16hi)_mm256_add_epi16(__A, __B),
328 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800329}
330
Logan Chien55afb0a2018-10-15 10:42:14 +0800331static __inline__ __m256i __DEFAULT_FN_ATTRS256
332_mm256_mask_sub_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
333 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
334 (__v32qi)_mm256_sub_epi8(__A, __B),
335 (__v32qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800336}
337
Logan Chien55afb0a2018-10-15 10:42:14 +0800338static __inline__ __m256i __DEFAULT_FN_ATTRS256
339_mm256_maskz_sub_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
340 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
341 (__v32qi)_mm256_sub_epi8(__A, __B),
342 (__v32qi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800343}
344
Logan Chien55afb0a2018-10-15 10:42:14 +0800345static __inline__ __m256i __DEFAULT_FN_ATTRS256
346_mm256_mask_sub_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
347 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
348 (__v16hi)_mm256_sub_epi16(__A, __B),
349 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800350}
351
Logan Chien55afb0a2018-10-15 10:42:14 +0800352static __inline__ __m256i __DEFAULT_FN_ATTRS256
353_mm256_maskz_sub_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
354 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
355 (__v16hi)_mm256_sub_epi16(__A, __B),
356 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800357}
358
Logan Chien55afb0a2018-10-15 10:42:14 +0800359static __inline__ __m128i __DEFAULT_FN_ATTRS128
360_mm_mask_add_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
361 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
362 (__v16qi)_mm_add_epi8(__A, __B),
363 (__v16qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800364}
365
Logan Chien55afb0a2018-10-15 10:42:14 +0800366static __inline__ __m128i __DEFAULT_FN_ATTRS128
367_mm_maskz_add_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
368 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
369 (__v16qi)_mm_add_epi8(__A, __B),
370 (__v16qi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800371}
372
Logan Chien55afb0a2018-10-15 10:42:14 +0800373static __inline__ __m128i __DEFAULT_FN_ATTRS128
374_mm_mask_add_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
375 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
376 (__v8hi)_mm_add_epi16(__A, __B),
377 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800378}
379
Logan Chien55afb0a2018-10-15 10:42:14 +0800380static __inline__ __m128i __DEFAULT_FN_ATTRS128
381_mm_maskz_add_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
382 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
383 (__v8hi)_mm_add_epi16(__A, __B),
384 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800385}
386
Logan Chien55afb0a2018-10-15 10:42:14 +0800387static __inline__ __m128i __DEFAULT_FN_ATTRS128
388_mm_mask_sub_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
389 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
390 (__v16qi)_mm_sub_epi8(__A, __B),
391 (__v16qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800392}
393
Logan Chien55afb0a2018-10-15 10:42:14 +0800394static __inline__ __m128i __DEFAULT_FN_ATTRS128
395_mm_maskz_sub_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
396 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
397 (__v16qi)_mm_sub_epi8(__A, __B),
398 (__v16qi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800399}
400
Logan Chien55afb0a2018-10-15 10:42:14 +0800401static __inline__ __m128i __DEFAULT_FN_ATTRS128
402_mm_mask_sub_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
403 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
404 (__v8hi)_mm_sub_epi16(__A, __B),
405 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800406}
407
Logan Chien55afb0a2018-10-15 10:42:14 +0800408static __inline__ __m128i __DEFAULT_FN_ATTRS128
409_mm_maskz_sub_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
410 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
411 (__v8hi)_mm_sub_epi16(__A, __B),
412 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800413}
414
Logan Chien55afb0a2018-10-15 10:42:14 +0800415static __inline__ __m256i __DEFAULT_FN_ATTRS256
416_mm256_mask_mullo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
417 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
418 (__v16hi)_mm256_mullo_epi16(__A, __B),
419 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800420}
421
Logan Chien55afb0a2018-10-15 10:42:14 +0800422static __inline__ __m256i __DEFAULT_FN_ATTRS256
423_mm256_maskz_mullo_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
424 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
425 (__v16hi)_mm256_mullo_epi16(__A, __B),
426 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800427}
428
Logan Chien55afb0a2018-10-15 10:42:14 +0800429static __inline__ __m128i __DEFAULT_FN_ATTRS128
430_mm_mask_mullo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
431 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
432 (__v8hi)_mm_mullo_epi16(__A, __B),
433 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800434}
435
Logan Chien55afb0a2018-10-15 10:42:14 +0800436static __inline__ __m128i __DEFAULT_FN_ATTRS128
437_mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
438 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
439 (__v8hi)_mm_mullo_epi16(__A, __B),
440 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800441}
442
Logan Chien55afb0a2018-10-15 10:42:14 +0800443static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800444_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W)
445{
446 return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
447 (__v16qi) __W,
448 (__v16qi) __A);
449}
450
Logan Chien55afb0a2018-10-15 10:42:14 +0800451static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800452_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W)
453{
454 return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
455 (__v32qi) __W,
456 (__v32qi) __A);
457}
458
Logan Chien55afb0a2018-10-15 10:42:14 +0800459static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800460_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W)
461{
462 return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
463 (__v8hi) __W,
464 (__v8hi) __A);
465}
466
Logan Chien55afb0a2018-10-15 10:42:14 +0800467static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800468_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W)
469{
470 return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
471 (__v16hi) __W,
472 (__v16hi) __A);
473}
474
Logan Chien55afb0a2018-10-15 10:42:14 +0800475static __inline__ __m128i __DEFAULT_FN_ATTRS128
476_mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +0800477{
Logan Chien55afb0a2018-10-15 10:42:14 +0800478 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
479 (__v16qi)_mm_abs_epi8(__A),
480 (__v16qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800481}
482
Logan Chien55afb0a2018-10-15 10:42:14 +0800483static __inline__ __m128i __DEFAULT_FN_ATTRS128
484_mm_maskz_abs_epi8(__mmask16 __U, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +0800485{
Logan Chien55afb0a2018-10-15 10:42:14 +0800486 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
487 (__v16qi)_mm_abs_epi8(__A),
488 (__v16qi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800489}
490
Logan Chien55afb0a2018-10-15 10:42:14 +0800491static __inline__ __m256i __DEFAULT_FN_ATTRS256
492_mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +0800493{
Logan Chien55afb0a2018-10-15 10:42:14 +0800494 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
495 (__v32qi)_mm256_abs_epi8(__A),
496 (__v32qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800497}
498
Logan Chien55afb0a2018-10-15 10:42:14 +0800499static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800500_mm256_maskz_abs_epi8 (__mmask32 __U, __m256i __A)
501{
Logan Chien55afb0a2018-10-15 10:42:14 +0800502 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
503 (__v32qi)_mm256_abs_epi8(__A),
504 (__v32qi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800505}
506
Logan Chien55afb0a2018-10-15 10:42:14 +0800507static __inline__ __m128i __DEFAULT_FN_ATTRS128
508_mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +0800509{
Logan Chien55afb0a2018-10-15 10:42:14 +0800510 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
511 (__v8hi)_mm_abs_epi16(__A),
512 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800513}
514
Logan Chien55afb0a2018-10-15 10:42:14 +0800515static __inline__ __m128i __DEFAULT_FN_ATTRS128
516_mm_maskz_abs_epi16(__mmask8 __U, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +0800517{
Logan Chien55afb0a2018-10-15 10:42:14 +0800518 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
519 (__v8hi)_mm_abs_epi16(__A),
520 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800521}
522
Logan Chien55afb0a2018-10-15 10:42:14 +0800523static __inline__ __m256i __DEFAULT_FN_ATTRS256
524_mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +0800525{
Logan Chien55afb0a2018-10-15 10:42:14 +0800526 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
527 (__v16hi)_mm256_abs_epi16(__A),
528 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800529}
530
Logan Chien55afb0a2018-10-15 10:42:14 +0800531static __inline__ __m256i __DEFAULT_FN_ATTRS256
532_mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +0800533{
Logan Chien55afb0a2018-10-15 10:42:14 +0800534 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
535 (__v16hi)_mm256_abs_epi16(__A),
536 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800537}
538
Logan Chien55afb0a2018-10-15 10:42:14 +0800539static __inline__ __m128i __DEFAULT_FN_ATTRS128
540_mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
541 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
542 (__v8hi)_mm_packs_epi32(__A, __B),
543 (__v8hi)_mm_setzero_si128());
544}
545
546static __inline__ __m128i __DEFAULT_FN_ATTRS128
547_mm_mask_packs_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800548{
Logan Chien55afb0a2018-10-15 10:42:14 +0800549 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
550 (__v8hi)_mm_packs_epi32(__A, __B),
551 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800552}
553
Logan Chien55afb0a2018-10-15 10:42:14 +0800554static __inline__ __m256i __DEFAULT_FN_ATTRS256
555_mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800556{
Logan Chien55afb0a2018-10-15 10:42:14 +0800557 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
558 (__v16hi)_mm256_packs_epi32(__A, __B),
559 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800560}
561
Logan Chien55afb0a2018-10-15 10:42:14 +0800562static __inline__ __m256i __DEFAULT_FN_ATTRS256
563_mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800564{
Logan Chien55afb0a2018-10-15 10:42:14 +0800565 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
566 (__v16hi)_mm256_packs_epi32(__A, __B),
567 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800568}
569
Logan Chien55afb0a2018-10-15 10:42:14 +0800570static __inline__ __m128i __DEFAULT_FN_ATTRS128
571_mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800572{
Logan Chien55afb0a2018-10-15 10:42:14 +0800573 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
574 (__v16qi)_mm_packs_epi16(__A, __B),
575 (__v16qi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800576}
577
Logan Chien55afb0a2018-10-15 10:42:14 +0800578static __inline__ __m128i __DEFAULT_FN_ATTRS128
579_mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800580{
Logan Chien55afb0a2018-10-15 10:42:14 +0800581 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
582 (__v16qi)_mm_packs_epi16(__A, __B),
583 (__v16qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800584}
585
Logan Chien55afb0a2018-10-15 10:42:14 +0800586static __inline__ __m256i __DEFAULT_FN_ATTRS256
587_mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800588{
Logan Chien55afb0a2018-10-15 10:42:14 +0800589 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
590 (__v32qi)_mm256_packs_epi16(__A, __B),
591 (__v32qi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800592}
593
Logan Chien55afb0a2018-10-15 10:42:14 +0800594static __inline__ __m256i __DEFAULT_FN_ATTRS256
595_mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800596{
Logan Chien55afb0a2018-10-15 10:42:14 +0800597 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
598 (__v32qi)_mm256_packs_epi16(__A, __B),
599 (__v32qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800600}
601
Logan Chien55afb0a2018-10-15 10:42:14 +0800602static __inline__ __m128i __DEFAULT_FN_ATTRS128
603_mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800604{
Logan Chien55afb0a2018-10-15 10:42:14 +0800605 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
606 (__v8hi)_mm_packus_epi32(__A, __B),
607 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800608}
609
Logan Chien55afb0a2018-10-15 10:42:14 +0800610static __inline__ __m128i __DEFAULT_FN_ATTRS128
611_mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800612{
Logan Chien55afb0a2018-10-15 10:42:14 +0800613 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
614 (__v8hi)_mm_packus_epi32(__A, __B),
615 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800616}
617
Logan Chien55afb0a2018-10-15 10:42:14 +0800618static __inline__ __m256i __DEFAULT_FN_ATTRS256
619_mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800620{
Logan Chien55afb0a2018-10-15 10:42:14 +0800621 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
622 (__v16hi)_mm256_packus_epi32(__A, __B),
623 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800624}
625
Logan Chien55afb0a2018-10-15 10:42:14 +0800626static __inline__ __m256i __DEFAULT_FN_ATTRS256
627_mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800628{
Logan Chien55afb0a2018-10-15 10:42:14 +0800629 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
630 (__v16hi)_mm256_packus_epi32(__A, __B),
631 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800632}
633
Logan Chien55afb0a2018-10-15 10:42:14 +0800634static __inline__ __m128i __DEFAULT_FN_ATTRS128
635_mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800636{
Logan Chien55afb0a2018-10-15 10:42:14 +0800637 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
638 (__v16qi)_mm_packus_epi16(__A, __B),
639 (__v16qi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800640}
641
Logan Chien55afb0a2018-10-15 10:42:14 +0800642static __inline__ __m128i __DEFAULT_FN_ATTRS128
643_mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800644{
Logan Chien55afb0a2018-10-15 10:42:14 +0800645 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
646 (__v16qi)_mm_packus_epi16(__A, __B),
647 (__v16qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800648}
649
Logan Chien55afb0a2018-10-15 10:42:14 +0800650static __inline__ __m256i __DEFAULT_FN_ATTRS256
651_mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800652{
Logan Chien55afb0a2018-10-15 10:42:14 +0800653 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
654 (__v32qi)_mm256_packus_epi16(__A, __B),
655 (__v32qi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800656}
657
Logan Chien55afb0a2018-10-15 10:42:14 +0800658static __inline__ __m256i __DEFAULT_FN_ATTRS256
659_mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800660{
Logan Chien55afb0a2018-10-15 10:42:14 +0800661 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
662 (__v32qi)_mm256_packus_epi16(__A, __B),
663 (__v32qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800664}
665
Logan Chien55afb0a2018-10-15 10:42:14 +0800666static __inline__ __m128i __DEFAULT_FN_ATTRS128
667_mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800668{
Logan Chien55afb0a2018-10-15 10:42:14 +0800669 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
670 (__v16qi)_mm_adds_epi8(__A, __B),
671 (__v16qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800672}
673
Logan Chien55afb0a2018-10-15 10:42:14 +0800674static __inline__ __m128i __DEFAULT_FN_ATTRS128
675_mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800676{
Logan Chien55afb0a2018-10-15 10:42:14 +0800677 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
678 (__v16qi)_mm_adds_epi8(__A, __B),
679 (__v16qi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800680}
681
Logan Chien55afb0a2018-10-15 10:42:14 +0800682static __inline__ __m256i __DEFAULT_FN_ATTRS256
683_mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800684{
Logan Chien55afb0a2018-10-15 10:42:14 +0800685 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
686 (__v32qi)_mm256_adds_epi8(__A, __B),
687 (__v32qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800688}
689
Logan Chien55afb0a2018-10-15 10:42:14 +0800690static __inline__ __m256i __DEFAULT_FN_ATTRS256
691_mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800692{
Logan Chien55afb0a2018-10-15 10:42:14 +0800693 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
694 (__v32qi)_mm256_adds_epi8(__A, __B),
695 (__v32qi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800696}
697
Logan Chien55afb0a2018-10-15 10:42:14 +0800698static __inline__ __m128i __DEFAULT_FN_ATTRS128
699_mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800700{
Logan Chien55afb0a2018-10-15 10:42:14 +0800701 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
702 (__v8hi)_mm_adds_epi16(__A, __B),
703 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800704}
705
Logan Chien55afb0a2018-10-15 10:42:14 +0800706static __inline__ __m128i __DEFAULT_FN_ATTRS128
707_mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800708{
Logan Chien55afb0a2018-10-15 10:42:14 +0800709 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
710 (__v8hi)_mm_adds_epi16(__A, __B),
711 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800712}
713
Logan Chien55afb0a2018-10-15 10:42:14 +0800714static __inline__ __m256i __DEFAULT_FN_ATTRS256
715_mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800716{
Logan Chien55afb0a2018-10-15 10:42:14 +0800717 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
718 (__v16hi)_mm256_adds_epi16(__A, __B),
719 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800720}
721
Logan Chien55afb0a2018-10-15 10:42:14 +0800722static __inline__ __m256i __DEFAULT_FN_ATTRS256
723_mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800724{
Logan Chien55afb0a2018-10-15 10:42:14 +0800725 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
726 (__v16hi)_mm256_adds_epi16(__A, __B),
727 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800728}
729
Logan Chien55afb0a2018-10-15 10:42:14 +0800730static __inline__ __m128i __DEFAULT_FN_ATTRS128
731_mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800732{
Logan Chien55afb0a2018-10-15 10:42:14 +0800733 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
734 (__v16qi)_mm_adds_epu8(__A, __B),
735 (__v16qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800736}
737
Logan Chien55afb0a2018-10-15 10:42:14 +0800738static __inline__ __m128i __DEFAULT_FN_ATTRS128
739_mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800740{
Logan Chien55afb0a2018-10-15 10:42:14 +0800741 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
742 (__v16qi)_mm_adds_epu8(__A, __B),
743 (__v16qi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800744}
745
Logan Chien55afb0a2018-10-15 10:42:14 +0800746static __inline__ __m256i __DEFAULT_FN_ATTRS256
747_mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800748{
Logan Chien55afb0a2018-10-15 10:42:14 +0800749 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
750 (__v32qi)_mm256_adds_epu8(__A, __B),
751 (__v32qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800752}
753
Logan Chien55afb0a2018-10-15 10:42:14 +0800754static __inline__ __m256i __DEFAULT_FN_ATTRS256
755_mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800756{
Logan Chien55afb0a2018-10-15 10:42:14 +0800757 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
758 (__v32qi)_mm256_adds_epu8(__A, __B),
759 (__v32qi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800760}
761
Logan Chien55afb0a2018-10-15 10:42:14 +0800762static __inline__ __m128i __DEFAULT_FN_ATTRS128
763_mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800764{
Logan Chien55afb0a2018-10-15 10:42:14 +0800765 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
766 (__v8hi)_mm_adds_epu16(__A, __B),
767 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800768}
769
Logan Chien55afb0a2018-10-15 10:42:14 +0800770static __inline__ __m128i __DEFAULT_FN_ATTRS128
771_mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800772{
Logan Chien55afb0a2018-10-15 10:42:14 +0800773 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
774 (__v8hi)_mm_adds_epu16(__A, __B),
775 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800776}
777
Logan Chien55afb0a2018-10-15 10:42:14 +0800778static __inline__ __m256i __DEFAULT_FN_ATTRS256
779_mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800780{
Logan Chien55afb0a2018-10-15 10:42:14 +0800781 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
782 (__v16hi)_mm256_adds_epu16(__A, __B),
783 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800784}
785
Logan Chien55afb0a2018-10-15 10:42:14 +0800786static __inline__ __m256i __DEFAULT_FN_ATTRS256
787_mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800788{
Logan Chien55afb0a2018-10-15 10:42:14 +0800789 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
790 (__v16hi)_mm256_adds_epu16(__A, __B),
791 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800792}
793
Logan Chien55afb0a2018-10-15 10:42:14 +0800794static __inline__ __m128i __DEFAULT_FN_ATTRS128
795_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800796{
Logan Chien55afb0a2018-10-15 10:42:14 +0800797 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
798 (__v16qi)_mm_avg_epu8(__A, __B),
799 (__v16qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800800}
801
Logan Chien55afb0a2018-10-15 10:42:14 +0800802static __inline__ __m128i __DEFAULT_FN_ATTRS128
803_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800804{
Logan Chien55afb0a2018-10-15 10:42:14 +0800805 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
806 (__v16qi)_mm_avg_epu8(__A, __B),
807 (__v16qi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800808}
809
Logan Chien55afb0a2018-10-15 10:42:14 +0800810static __inline__ __m256i __DEFAULT_FN_ATTRS256
811_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800812{
Logan Chien55afb0a2018-10-15 10:42:14 +0800813 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
814 (__v32qi)_mm256_avg_epu8(__A, __B),
815 (__v32qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800816}
817
Logan Chien55afb0a2018-10-15 10:42:14 +0800818static __inline__ __m256i __DEFAULT_FN_ATTRS256
819_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800820{
Logan Chien55afb0a2018-10-15 10:42:14 +0800821 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
822 (__v32qi)_mm256_avg_epu8(__A, __B),
823 (__v32qi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800824}
825
Logan Chien55afb0a2018-10-15 10:42:14 +0800826static __inline__ __m128i __DEFAULT_FN_ATTRS128
827_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800828{
Logan Chien55afb0a2018-10-15 10:42:14 +0800829 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
830 (__v8hi)_mm_avg_epu16(__A, __B),
831 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800832}
833
Logan Chien55afb0a2018-10-15 10:42:14 +0800834static __inline__ __m128i __DEFAULT_FN_ATTRS128
835_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800836{
Logan Chien55afb0a2018-10-15 10:42:14 +0800837 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
838 (__v8hi)_mm_avg_epu16(__A, __B),
839 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800840}
841
Logan Chien55afb0a2018-10-15 10:42:14 +0800842static __inline__ __m256i __DEFAULT_FN_ATTRS256
843_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800844{
Logan Chien55afb0a2018-10-15 10:42:14 +0800845 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
846 (__v16hi)_mm256_avg_epu16(__A, __B),
847 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800848}
849
Logan Chien55afb0a2018-10-15 10:42:14 +0800850static __inline__ __m256i __DEFAULT_FN_ATTRS256
851_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800852{
Logan Chien55afb0a2018-10-15 10:42:14 +0800853 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
854 (__v16hi)_mm256_avg_epu16(__A, __B),
855 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800856}
857
Logan Chien55afb0a2018-10-15 10:42:14 +0800858static __inline__ __m128i __DEFAULT_FN_ATTRS128
859_mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800860{
Logan Chien55afb0a2018-10-15 10:42:14 +0800861 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
862 (__v16qi)_mm_max_epi8(__A, __B),
863 (__v16qi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800864}
865
Logan Chien55afb0a2018-10-15 10:42:14 +0800866static __inline__ __m128i __DEFAULT_FN_ATTRS128
867_mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800868{
Logan Chien55afb0a2018-10-15 10:42:14 +0800869 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
870 (__v16qi)_mm_max_epi8(__A, __B),
871 (__v16qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800872}
873
Logan Chien55afb0a2018-10-15 10:42:14 +0800874static __inline__ __m256i __DEFAULT_FN_ATTRS256
875_mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800876{
Logan Chien55afb0a2018-10-15 10:42:14 +0800877 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
878 (__v32qi)_mm256_max_epi8(__A, __B),
879 (__v32qi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800880}
881
Logan Chien55afb0a2018-10-15 10:42:14 +0800882static __inline__ __m256i __DEFAULT_FN_ATTRS256
883_mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800884{
Logan Chien55afb0a2018-10-15 10:42:14 +0800885 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
886 (__v32qi)_mm256_max_epi8(__A, __B),
887 (__v32qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800888}
889
Logan Chien55afb0a2018-10-15 10:42:14 +0800890static __inline__ __m128i __DEFAULT_FN_ATTRS128
891_mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800892{
Logan Chien55afb0a2018-10-15 10:42:14 +0800893 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
894 (__v8hi)_mm_max_epi16(__A, __B),
895 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800896}
897
Logan Chien55afb0a2018-10-15 10:42:14 +0800898static __inline__ __m128i __DEFAULT_FN_ATTRS128
899_mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800900{
Logan Chien55afb0a2018-10-15 10:42:14 +0800901 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
902 (__v8hi)_mm_max_epi16(__A, __B),
903 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800904}
905
Logan Chien55afb0a2018-10-15 10:42:14 +0800906static __inline__ __m256i __DEFAULT_FN_ATTRS256
907_mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800908{
Logan Chien55afb0a2018-10-15 10:42:14 +0800909 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
910 (__v16hi)_mm256_max_epi16(__A, __B),
911 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800912}
913
Logan Chien55afb0a2018-10-15 10:42:14 +0800914static __inline__ __m256i __DEFAULT_FN_ATTRS256
915_mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800916{
Logan Chien55afb0a2018-10-15 10:42:14 +0800917 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
918 (__v16hi)_mm256_max_epi16(__A, __B),
919 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800920}
921
Logan Chien55afb0a2018-10-15 10:42:14 +0800922static __inline__ __m128i __DEFAULT_FN_ATTRS128
923_mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800924{
Logan Chien55afb0a2018-10-15 10:42:14 +0800925 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
926 (__v16qi)_mm_max_epu8(__A, __B),
927 (__v16qi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800928}
929
Logan Chien55afb0a2018-10-15 10:42:14 +0800930static __inline__ __m128i __DEFAULT_FN_ATTRS128
931_mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800932{
Logan Chien55afb0a2018-10-15 10:42:14 +0800933 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
934 (__v16qi)_mm_max_epu8(__A, __B),
935 (__v16qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800936}
937
Logan Chien55afb0a2018-10-15 10:42:14 +0800938static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800939_mm256_maskz_max_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
940{
Logan Chien55afb0a2018-10-15 10:42:14 +0800941 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
942 (__v32qi)_mm256_max_epu8(__A, __B),
943 (__v32qi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800944}
945
Logan Chien55afb0a2018-10-15 10:42:14 +0800946static __inline__ __m256i __DEFAULT_FN_ATTRS256
947_mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800948{
Logan Chien55afb0a2018-10-15 10:42:14 +0800949 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
950 (__v32qi)_mm256_max_epu8(__A, __B),
951 (__v32qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800952}
953
Logan Chien55afb0a2018-10-15 10:42:14 +0800954static __inline__ __m128i __DEFAULT_FN_ATTRS128
955_mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800956{
Logan Chien55afb0a2018-10-15 10:42:14 +0800957 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
958 (__v8hi)_mm_max_epu16(__A, __B),
959 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800960}
961
Logan Chien55afb0a2018-10-15 10:42:14 +0800962static __inline__ __m128i __DEFAULT_FN_ATTRS128
963_mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800964{
Logan Chien55afb0a2018-10-15 10:42:14 +0800965 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
966 (__v8hi)_mm_max_epu16(__A, __B),
967 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800968}
969
Logan Chien55afb0a2018-10-15 10:42:14 +0800970static __inline__ __m256i __DEFAULT_FN_ATTRS256
971_mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800972{
Logan Chien55afb0a2018-10-15 10:42:14 +0800973 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
974 (__v16hi)_mm256_max_epu16(__A, __B),
975 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +0800976}
977
Logan Chien55afb0a2018-10-15 10:42:14 +0800978static __inline__ __m256i __DEFAULT_FN_ATTRS256
979_mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800980{
Logan Chien55afb0a2018-10-15 10:42:14 +0800981 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
982 (__v16hi)_mm256_max_epu16(__A, __B),
983 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +0800984}
985
Logan Chien55afb0a2018-10-15 10:42:14 +0800986static __inline__ __m128i __DEFAULT_FN_ATTRS128
987_mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800988{
Logan Chien55afb0a2018-10-15 10:42:14 +0800989 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
990 (__v16qi)_mm_min_epi8(__A, __B),
991 (__v16qi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +0800992}
993
Logan Chien55afb0a2018-10-15 10:42:14 +0800994static __inline__ __m128i __DEFAULT_FN_ATTRS128
995_mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +0800996{
Logan Chien55afb0a2018-10-15 10:42:14 +0800997 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
998 (__v16qi)_mm_min_epi8(__A, __B),
999 (__v16qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001000}
1001
Logan Chien55afb0a2018-10-15 10:42:14 +08001002static __inline__ __m256i __DEFAULT_FN_ATTRS256
1003_mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001004{
Logan Chien55afb0a2018-10-15 10:42:14 +08001005 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1006 (__v32qi)_mm256_min_epi8(__A, __B),
1007 (__v32qi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08001008}
1009
Logan Chien55afb0a2018-10-15 10:42:14 +08001010static __inline__ __m256i __DEFAULT_FN_ATTRS256
1011_mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001012{
Logan Chien55afb0a2018-10-15 10:42:14 +08001013 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1014 (__v32qi)_mm256_min_epi8(__A, __B),
1015 (__v32qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001016}
1017
Logan Chien55afb0a2018-10-15 10:42:14 +08001018static __inline__ __m128i __DEFAULT_FN_ATTRS128
1019_mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001020{
Logan Chien55afb0a2018-10-15 10:42:14 +08001021 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
1022 (__v8hi)_mm_min_epi16(__A, __B),
1023 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08001024}
1025
Logan Chien55afb0a2018-10-15 10:42:14 +08001026static __inline__ __m128i __DEFAULT_FN_ATTRS128
1027_mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001028{
Logan Chien55afb0a2018-10-15 10:42:14 +08001029 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
1030 (__v8hi)_mm_min_epi16(__A, __B),
1031 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001032}
1033
Logan Chien55afb0a2018-10-15 10:42:14 +08001034static __inline__ __m256i __DEFAULT_FN_ATTRS256
1035_mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001036{
Logan Chien55afb0a2018-10-15 10:42:14 +08001037 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
1038 (__v16hi)_mm256_min_epi16(__A, __B),
1039 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08001040}
1041
Logan Chien55afb0a2018-10-15 10:42:14 +08001042static __inline__ __m256i __DEFAULT_FN_ATTRS256
1043_mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001044{
Logan Chien55afb0a2018-10-15 10:42:14 +08001045 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
1046 (__v16hi)_mm256_min_epi16(__A, __B),
1047 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001048}
1049
Logan Chien55afb0a2018-10-15 10:42:14 +08001050static __inline__ __m128i __DEFAULT_FN_ATTRS128
1051_mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001052{
Logan Chien55afb0a2018-10-15 10:42:14 +08001053 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1054 (__v16qi)_mm_min_epu8(__A, __B),
1055 (__v16qi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08001056}
1057
Logan Chien55afb0a2018-10-15 10:42:14 +08001058static __inline__ __m128i __DEFAULT_FN_ATTRS128
1059_mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001060{
Logan Chien55afb0a2018-10-15 10:42:14 +08001061 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1062 (__v16qi)_mm_min_epu8(__A, __B),
1063 (__v16qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001064}
1065
Logan Chien55afb0a2018-10-15 10:42:14 +08001066static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001067_mm256_maskz_min_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
1068{
Logan Chien55afb0a2018-10-15 10:42:14 +08001069 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1070 (__v32qi)_mm256_min_epu8(__A, __B),
1071 (__v32qi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08001072}
1073
Logan Chien55afb0a2018-10-15 10:42:14 +08001074static __inline__ __m256i __DEFAULT_FN_ATTRS256
1075_mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001076{
Logan Chien55afb0a2018-10-15 10:42:14 +08001077 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1078 (__v32qi)_mm256_min_epu8(__A, __B),
1079 (__v32qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001080}
1081
Logan Chien55afb0a2018-10-15 10:42:14 +08001082static __inline__ __m128i __DEFAULT_FN_ATTRS128
1083_mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001084{
Logan Chien55afb0a2018-10-15 10:42:14 +08001085 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
1086 (__v8hi)_mm_min_epu16(__A, __B),
1087 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08001088}
1089
Logan Chien55afb0a2018-10-15 10:42:14 +08001090static __inline__ __m128i __DEFAULT_FN_ATTRS128
1091_mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001092{
Logan Chien55afb0a2018-10-15 10:42:14 +08001093 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
1094 (__v8hi)_mm_min_epu16(__A, __B),
1095 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001096}
1097
Logan Chien55afb0a2018-10-15 10:42:14 +08001098static __inline__ __m256i __DEFAULT_FN_ATTRS256
1099_mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001100{
Logan Chien55afb0a2018-10-15 10:42:14 +08001101 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
1102 (__v16hi)_mm256_min_epu16(__A, __B),
1103 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08001104}
1105
Logan Chien55afb0a2018-10-15 10:42:14 +08001106static __inline__ __m256i __DEFAULT_FN_ATTRS256
1107_mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001108{
Logan Chien55afb0a2018-10-15 10:42:14 +08001109 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
1110 (__v16hi)_mm256_min_epu16(__A, __B),
1111 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001112}
1113
Logan Chien55afb0a2018-10-15 10:42:14 +08001114static __inline__ __m128i __DEFAULT_FN_ATTRS128
1115_mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001116{
Logan Chien55afb0a2018-10-15 10:42:14 +08001117 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1118 (__v16qi)_mm_shuffle_epi8(__A, __B),
1119 (__v16qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001120}
1121
Logan Chien55afb0a2018-10-15 10:42:14 +08001122static __inline__ __m128i __DEFAULT_FN_ATTRS128
1123_mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001124{
Logan Chien55afb0a2018-10-15 10:42:14 +08001125 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1126 (__v16qi)_mm_shuffle_epi8(__A, __B),
1127 (__v16qi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08001128}
1129
Logan Chien55afb0a2018-10-15 10:42:14 +08001130static __inline__ __m256i __DEFAULT_FN_ATTRS256
1131_mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001132{
Logan Chien55afb0a2018-10-15 10:42:14 +08001133 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1134 (__v32qi)_mm256_shuffle_epi8(__A, __B),
1135 (__v32qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001136}
1137
Logan Chien55afb0a2018-10-15 10:42:14 +08001138static __inline__ __m256i __DEFAULT_FN_ATTRS256
1139_mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001140{
Logan Chien55afb0a2018-10-15 10:42:14 +08001141 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1142 (__v32qi)_mm256_shuffle_epi8(__A, __B),
1143 (__v32qi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08001144}
1145
Logan Chien55afb0a2018-10-15 10:42:14 +08001146static __inline__ __m128i __DEFAULT_FN_ATTRS128
1147_mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001148{
Logan Chien55afb0a2018-10-15 10:42:14 +08001149 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1150 (__v16qi)_mm_subs_epi8(__A, __B),
1151 (__v16qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001152}
1153
Logan Chien55afb0a2018-10-15 10:42:14 +08001154static __inline__ __m128i __DEFAULT_FN_ATTRS128
1155_mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001156{
Logan Chien55afb0a2018-10-15 10:42:14 +08001157 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1158 (__v16qi)_mm_subs_epi8(__A, __B),
1159 (__v16qi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08001160}
1161
Logan Chien55afb0a2018-10-15 10:42:14 +08001162static __inline__ __m256i __DEFAULT_FN_ATTRS256
1163_mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001164{
Logan Chien55afb0a2018-10-15 10:42:14 +08001165 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1166 (__v32qi)_mm256_subs_epi8(__A, __B),
1167 (__v32qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001168}
1169
Logan Chien55afb0a2018-10-15 10:42:14 +08001170static __inline__ __m256i __DEFAULT_FN_ATTRS256
1171_mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001172{
Logan Chien55afb0a2018-10-15 10:42:14 +08001173 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1174 (__v32qi)_mm256_subs_epi8(__A, __B),
1175 (__v32qi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08001176}
1177
Logan Chien55afb0a2018-10-15 10:42:14 +08001178static __inline__ __m128i __DEFAULT_FN_ATTRS128
1179_mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001180{
Logan Chien55afb0a2018-10-15 10:42:14 +08001181 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1182 (__v8hi)_mm_subs_epi16(__A, __B),
1183 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001184}
1185
Logan Chien55afb0a2018-10-15 10:42:14 +08001186static __inline__ __m128i __DEFAULT_FN_ATTRS128
1187_mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001188{
Logan Chien55afb0a2018-10-15 10:42:14 +08001189 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1190 (__v8hi)_mm_subs_epi16(__A, __B),
1191 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08001192}
1193
Logan Chien55afb0a2018-10-15 10:42:14 +08001194static __inline__ __m256i __DEFAULT_FN_ATTRS256
1195_mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001196{
Logan Chien55afb0a2018-10-15 10:42:14 +08001197 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1198 (__v16hi)_mm256_subs_epi16(__A, __B),
1199 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001200}
1201
Logan Chien55afb0a2018-10-15 10:42:14 +08001202static __inline__ __m256i __DEFAULT_FN_ATTRS256
1203_mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001204{
Logan Chien55afb0a2018-10-15 10:42:14 +08001205 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1206 (__v16hi)_mm256_subs_epi16(__A, __B),
1207 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08001208}
1209
Logan Chien55afb0a2018-10-15 10:42:14 +08001210static __inline__ __m128i __DEFAULT_FN_ATTRS128
1211_mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001212{
Logan Chien55afb0a2018-10-15 10:42:14 +08001213 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1214 (__v16qi)_mm_subs_epu8(__A, __B),
1215 (__v16qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001216}
1217
Logan Chien55afb0a2018-10-15 10:42:14 +08001218static __inline__ __m128i __DEFAULT_FN_ATTRS128
1219_mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001220{
Logan Chien55afb0a2018-10-15 10:42:14 +08001221 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1222 (__v16qi)_mm_subs_epu8(__A, __B),
1223 (__v16qi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08001224}
1225
Logan Chien55afb0a2018-10-15 10:42:14 +08001226static __inline__ __m256i __DEFAULT_FN_ATTRS256
1227_mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001228{
Logan Chien55afb0a2018-10-15 10:42:14 +08001229 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1230 (__v32qi)_mm256_subs_epu8(__A, __B),
1231 (__v32qi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001232}
1233
Logan Chien55afb0a2018-10-15 10:42:14 +08001234static __inline__ __m256i __DEFAULT_FN_ATTRS256
1235_mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001236{
Logan Chien55afb0a2018-10-15 10:42:14 +08001237 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1238 (__v32qi)_mm256_subs_epu8(__A, __B),
1239 (__v32qi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08001240}
1241
Logan Chien55afb0a2018-10-15 10:42:14 +08001242static __inline__ __m128i __DEFAULT_FN_ATTRS128
1243_mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001244{
Logan Chien55afb0a2018-10-15 10:42:14 +08001245 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1246 (__v8hi)_mm_subs_epu16(__A, __B),
1247 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001248}
1249
Logan Chien55afb0a2018-10-15 10:42:14 +08001250static __inline__ __m128i __DEFAULT_FN_ATTRS128
1251_mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001252{
Logan Chien55afb0a2018-10-15 10:42:14 +08001253 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1254 (__v8hi)_mm_subs_epu16(__A, __B),
1255 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08001256}
1257
Logan Chien55afb0a2018-10-15 10:42:14 +08001258static __inline__ __m256i __DEFAULT_FN_ATTRS256
1259_mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A,
1260 __m256i __B) {
1261 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1262 (__v16hi)_mm256_subs_epu16(__A, __B),
1263 (__v16hi)__W);
1264}
1265
1266static __inline__ __m256i __DEFAULT_FN_ATTRS256
1267_mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001268{
Logan Chien55afb0a2018-10-15 10:42:14 +08001269 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1270 (__v16hi)_mm256_subs_epu16(__A, __B),
1271 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08001272}
1273
Logan Chien55afb0a2018-10-15 10:42:14 +08001274static __inline__ __m128i __DEFAULT_FN_ATTRS128
1275_mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001276{
Logan Chien55afb0a2018-10-15 10:42:14 +08001277 return (__m128i)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
1278 (__v8hi) __B);
Logan Chien2833ffb2018-10-09 10:03:24 +08001279}
1280
Logan Chien55afb0a2018-10-15 10:42:14 +08001281static __inline__ __m128i __DEFAULT_FN_ATTRS128
1282_mm_mask_permutex2var_epi16(__m128i __A, __mmask8 __U, __m128i __I,
1283 __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001284{
Logan Chien55afb0a2018-10-15 10:42:14 +08001285 return (__m128i)__builtin_ia32_selectw_128(__U,
1286 (__v8hi)_mm_permutex2var_epi16(__A, __I, __B),
1287 (__v8hi)__A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001288}
1289
Logan Chien55afb0a2018-10-15 10:42:14 +08001290static __inline__ __m128i __DEFAULT_FN_ATTRS128
1291_mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U,
1292 __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001293{
Logan Chien55afb0a2018-10-15 10:42:14 +08001294 return (__m128i)__builtin_ia32_selectw_128(__U,
1295 (__v8hi)_mm_permutex2var_epi16(__A, __I, __B),
1296 (__v8hi)__I);
Logan Chien2833ffb2018-10-09 10:03:24 +08001297}
1298
Logan Chien55afb0a2018-10-15 10:42:14 +08001299static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001300_mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I,
1301 __m128i __B)
1302{
Logan Chien55afb0a2018-10-15 10:42:14 +08001303 return (__m128i)__builtin_ia32_selectw_128(__U,
1304 (__v8hi)_mm_permutex2var_epi16(__A, __I, __B),
1305 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08001306}
1307
Logan Chien55afb0a2018-10-15 10:42:14 +08001308static __inline__ __m256i __DEFAULT_FN_ATTRS256
1309_mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001310{
Logan Chien55afb0a2018-10-15 10:42:14 +08001311 return (__m256i)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
1312 (__v16hi)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08001313}
1314
Logan Chien55afb0a2018-10-15 10:42:14 +08001315static __inline__ __m256i __DEFAULT_FN_ATTRS256
1316_mm256_mask_permutex2var_epi16(__m256i __A, __mmask16 __U, __m256i __I,
1317 __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001318{
Logan Chien55afb0a2018-10-15 10:42:14 +08001319 return (__m256i)__builtin_ia32_selectw_256(__U,
1320 (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B),
1321 (__v16hi)__A);
Logan Chien2833ffb2018-10-09 10:03:24 +08001322}
1323
Logan Chien55afb0a2018-10-15 10:42:14 +08001324static __inline__ __m256i __DEFAULT_FN_ATTRS256
1325_mm256_mask2_permutex2var_epi16(__m256i __A, __m256i __I, __mmask16 __U,
1326 __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001327{
Logan Chien55afb0a2018-10-15 10:42:14 +08001328 return (__m256i)__builtin_ia32_selectw_256(__U,
1329 (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B),
1330 (__v16hi)__I);
Logan Chien2833ffb2018-10-09 10:03:24 +08001331}
1332
Logan Chien55afb0a2018-10-15 10:42:14 +08001333static __inline__ __m256i __DEFAULT_FN_ATTRS256
1334_mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A, __m256i __I,
1335 __m256i __B)
1336{
1337 return (__m256i)__builtin_ia32_selectw_256(__U,
1338 (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B),
1339 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08001340}
1341
Logan Chien55afb0a2018-10-15 10:42:14 +08001342static __inline__ __m128i __DEFAULT_FN_ATTRS128
1343_mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
1344 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1345 (__v8hi)_mm_maddubs_epi16(__X, __Y),
1346 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001347}
1348
Logan Chien55afb0a2018-10-15 10:42:14 +08001349static __inline__ __m128i __DEFAULT_FN_ATTRS128
1350_mm_maskz_maddubs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) {
1351 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1352 (__v8hi)_mm_maddubs_epi16(__X, __Y),
1353 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08001354}
1355
Logan Chien55afb0a2018-10-15 10:42:14 +08001356static __inline__ __m256i __DEFAULT_FN_ATTRS256
1357_mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X,
1358 __m256i __Y) {
1359 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1360 (__v16hi)_mm256_maddubs_epi16(__X, __Y),
1361 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001362}
1363
Logan Chien55afb0a2018-10-15 10:42:14 +08001364static __inline__ __m256i __DEFAULT_FN_ATTRS256
1365_mm256_maskz_maddubs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) {
1366 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1367 (__v16hi)_mm256_maddubs_epi16(__X, __Y),
1368 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08001369}
1370
Logan Chien55afb0a2018-10-15 10:42:14 +08001371static __inline__ __m128i __DEFAULT_FN_ATTRS128
1372_mm_mask_madd_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
1373 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1374 (__v4si)_mm_madd_epi16(__A, __B),
1375 (__v4si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001376}
1377
Logan Chien55afb0a2018-10-15 10:42:14 +08001378static __inline__ __m128i __DEFAULT_FN_ATTRS128
1379_mm_maskz_madd_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
1380 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1381 (__v4si)_mm_madd_epi16(__A, __B),
1382 (__v4si)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08001383}
1384
Logan Chien55afb0a2018-10-15 10:42:14 +08001385static __inline__ __m256i __DEFAULT_FN_ATTRS256
1386_mm256_mask_madd_epi16(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
1387 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
1388 (__v8si)_mm256_madd_epi16(__A, __B),
1389 (__v8si)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001390}
1391
Logan Chien55afb0a2018-10-15 10:42:14 +08001392static __inline__ __m256i __DEFAULT_FN_ATTRS256
1393_mm256_maskz_madd_epi16(__mmask8 __U, __m256i __A, __m256i __B) {
1394 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
1395 (__v8si)_mm256_madd_epi16(__A, __B),
1396 (__v8si)_mm256_setzero_si256());
1397}
1398
1399static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001400_mm_cvtsepi16_epi8 (__m128i __A) {
1401 return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
1402 (__v16qi) _mm_setzero_si128(),
1403 (__mmask8) -1);
1404}
1405
Logan Chien55afb0a2018-10-15 10:42:14 +08001406static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001407_mm_mask_cvtsepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) {
1408 return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
1409 (__v16qi) __O,
1410 __M);
1411}
1412
Logan Chien55afb0a2018-10-15 10:42:14 +08001413static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001414_mm_maskz_cvtsepi16_epi8 (__mmask8 __M, __m128i __A) {
1415 return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
1416 (__v16qi) _mm_setzero_si128(),
1417 __M);
1418}
1419
Logan Chien55afb0a2018-10-15 10:42:14 +08001420static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001421_mm256_cvtsepi16_epi8 (__m256i __A) {
1422 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1423 (__v16qi) _mm_setzero_si128(),
1424 (__mmask16) -1);
1425}
1426
Logan Chien55afb0a2018-10-15 10:42:14 +08001427static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001428_mm256_mask_cvtsepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) {
1429 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1430 (__v16qi) __O,
1431 __M);
1432}
1433
Logan Chien55afb0a2018-10-15 10:42:14 +08001434static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001435_mm256_maskz_cvtsepi16_epi8 (__mmask16 __M, __m256i __A) {
1436 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1437 (__v16qi) _mm_setzero_si128(),
1438 __M);
1439}
1440
Logan Chien55afb0a2018-10-15 10:42:14 +08001441static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001442_mm_cvtusepi16_epi8 (__m128i __A) {
1443 return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
1444 (__v16qi) _mm_setzero_si128(),
1445 (__mmask8) -1);
1446}
1447
Logan Chien55afb0a2018-10-15 10:42:14 +08001448static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001449_mm_mask_cvtusepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) {
1450 return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
1451 (__v16qi) __O,
1452 __M);
1453}
1454
Logan Chien55afb0a2018-10-15 10:42:14 +08001455static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001456_mm_maskz_cvtusepi16_epi8 (__mmask8 __M, __m128i __A) {
1457 return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
1458 (__v16qi) _mm_setzero_si128(),
1459 __M);
1460}
1461
Logan Chien55afb0a2018-10-15 10:42:14 +08001462static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001463_mm256_cvtusepi16_epi8 (__m256i __A) {
1464 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1465 (__v16qi) _mm_setzero_si128(),
1466 (__mmask16) -1);
1467}
1468
Logan Chien55afb0a2018-10-15 10:42:14 +08001469static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001470_mm256_mask_cvtusepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) {
1471 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1472 (__v16qi) __O,
1473 __M);
1474}
1475
Logan Chien55afb0a2018-10-15 10:42:14 +08001476static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001477_mm256_maskz_cvtusepi16_epi8 (__mmask16 __M, __m256i __A) {
1478 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1479 (__v16qi) _mm_setzero_si128(),
1480 __M);
1481}
1482
Logan Chien55afb0a2018-10-15 10:42:14 +08001483static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001484_mm_cvtepi16_epi8 (__m128i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08001485 return (__m128i)__builtin_shufflevector(
1486 __builtin_convertvector((__v8hi)__A, __v8qi),
1487 (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
1488 12, 13, 14, 15);
Logan Chien2833ffb2018-10-09 10:03:24 +08001489}
1490
Logan Chien55afb0a2018-10-15 10:42:14 +08001491static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001492_mm_mask_cvtepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) {
1493 return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
1494 (__v16qi) __O,
1495 __M);
1496}
1497
Logan Chien55afb0a2018-10-15 10:42:14 +08001498static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001499_mm_maskz_cvtepi16_epi8 (__mmask8 __M, __m128i __A) {
1500 return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
1501 (__v16qi) _mm_setzero_si128(),
1502 __M);
1503}
1504
Logan Chien55afb0a2018-10-15 10:42:14 +08001505static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001506_mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1507{
1508 __builtin_ia32_pmovwb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
1509}
1510
1511
Logan Chien55afb0a2018-10-15 10:42:14 +08001512static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001513_mm_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1514{
1515 __builtin_ia32_pmovswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
1516}
1517
Logan Chien55afb0a2018-10-15 10:42:14 +08001518static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001519_mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1520{
1521 __builtin_ia32_pmovuswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
1522}
1523
Logan Chien55afb0a2018-10-15 10:42:14 +08001524static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001525_mm256_cvtepi16_epi8 (__m256i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08001526 return (__m128i)__builtin_convertvector((__v16hi) __A, __v16qi);
Logan Chien2833ffb2018-10-09 10:03:24 +08001527}
1528
Logan Chien55afb0a2018-10-15 10:42:14 +08001529static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001530_mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08001531 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1532 (__v16qi)_mm256_cvtepi16_epi8(__A),
1533 (__v16qi)__O);
Logan Chien2833ffb2018-10-09 10:03:24 +08001534}
1535
Logan Chien55afb0a2018-10-15 10:42:14 +08001536static __inline__ __m128i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001537_mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A) {
Logan Chien55afb0a2018-10-15 10:42:14 +08001538 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1539 (__v16qi)_mm256_cvtepi16_epi8(__A),
1540 (__v16qi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08001541}
1542
Logan Chien55afb0a2018-10-15 10:42:14 +08001543static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001544_mm256_mask_cvtepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A)
1545{
1546 __builtin_ia32_pmovwb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M);
1547}
1548
Logan Chien55afb0a2018-10-15 10:42:14 +08001549static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001550_mm256_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A)
1551{
1552 __builtin_ia32_pmovswb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M);
1553}
1554
Logan Chien55afb0a2018-10-15 10:42:14 +08001555static __inline__ void __DEFAULT_FN_ATTRS256
1556_mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08001557{
1558 __builtin_ia32_pmovuswb256mem_mask ((__v16qi*) __P, (__v16hi) __A, __M);
1559}
Logan Chien55afb0a2018-10-15 10:42:14 +08001560
1561static __inline__ __m128i __DEFAULT_FN_ATTRS128
1562_mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
1563 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1564 (__v8hi)_mm_mulhrs_epi16(__X, __Y),
1565 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001566}
1567
Logan Chien55afb0a2018-10-15 10:42:14 +08001568static __inline__ __m128i __DEFAULT_FN_ATTRS128
1569_mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) {
1570 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1571 (__v8hi)_mm_mulhrs_epi16(__X, __Y),
1572 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08001573}
1574
Logan Chien55afb0a2018-10-15 10:42:14 +08001575static __inline__ __m256i __DEFAULT_FN_ATTRS256
1576_mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) {
1577 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1578 (__v16hi)_mm256_mulhrs_epi16(__X, __Y),
1579 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001580}
1581
Logan Chien55afb0a2018-10-15 10:42:14 +08001582static __inline__ __m256i __DEFAULT_FN_ATTRS256
1583_mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) {
1584 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1585 (__v16hi)_mm256_mulhrs_epi16(__X, __Y),
1586 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08001587}
1588
Logan Chien55afb0a2018-10-15 10:42:14 +08001589static __inline__ __m128i __DEFAULT_FN_ATTRS128
1590_mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
1591 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1592 (__v8hi)_mm_mulhi_epu16(__A, __B),
1593 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001594}
1595
Logan Chien55afb0a2018-10-15 10:42:14 +08001596static __inline__ __m128i __DEFAULT_FN_ATTRS128
1597_mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
1598 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1599 (__v8hi)_mm_mulhi_epu16(__A, __B),
1600 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08001601}
1602
Logan Chien55afb0a2018-10-15 10:42:14 +08001603static __inline__ __m256i __DEFAULT_FN_ATTRS256
1604_mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
1605 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1606 (__v16hi)_mm256_mulhi_epu16(__A, __B),
1607 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001608}
1609
Logan Chien55afb0a2018-10-15 10:42:14 +08001610static __inline__ __m256i __DEFAULT_FN_ATTRS256
1611_mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
1612 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1613 (__v16hi)_mm256_mulhi_epu16(__A, __B),
1614 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08001615}
1616
Logan Chien55afb0a2018-10-15 10:42:14 +08001617static __inline__ __m128i __DEFAULT_FN_ATTRS128
1618_mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
1619 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1620 (__v8hi)_mm_mulhi_epi16(__A, __B),
1621 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001622}
1623
Logan Chien55afb0a2018-10-15 10:42:14 +08001624static __inline__ __m128i __DEFAULT_FN_ATTRS128
1625_mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
1626 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1627 (__v8hi)_mm_mulhi_epi16(__A, __B),
1628 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08001629}
1630
Logan Chien55afb0a2018-10-15 10:42:14 +08001631static __inline__ __m256i __DEFAULT_FN_ATTRS256
1632_mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
1633 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1634 (__v16hi)_mm256_mulhi_epi16(__A, __B),
1635 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001636}
1637
Logan Chien55afb0a2018-10-15 10:42:14 +08001638static __inline__ __m256i __DEFAULT_FN_ATTRS256
1639_mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
1640 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1641 (__v16hi)_mm256_mulhi_epi16(__A, __B),
1642 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08001643}
1644
Logan Chien55afb0a2018-10-15 10:42:14 +08001645static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001646_mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
1647 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1648 (__v16qi)_mm_unpackhi_epi8(__A, __B),
1649 (__v16qi)__W);
1650}
1651
Logan Chien55afb0a2018-10-15 10:42:14 +08001652static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001653_mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
1654 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1655 (__v16qi)_mm_unpackhi_epi8(__A, __B),
1656 (__v16qi)_mm_setzero_si128());
1657}
1658
Logan Chien55afb0a2018-10-15 10:42:14 +08001659static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001660_mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
1661 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1662 (__v32qi)_mm256_unpackhi_epi8(__A, __B),
1663 (__v32qi)__W);
1664}
1665
Logan Chien55afb0a2018-10-15 10:42:14 +08001666static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001667_mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
1668 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1669 (__v32qi)_mm256_unpackhi_epi8(__A, __B),
1670 (__v32qi)_mm256_setzero_si256());
1671}
1672
Logan Chien55afb0a2018-10-15 10:42:14 +08001673static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001674_mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
1675 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1676 (__v8hi)_mm_unpackhi_epi16(__A, __B),
1677 (__v8hi)__W);
1678}
1679
Logan Chien55afb0a2018-10-15 10:42:14 +08001680static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001681_mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
1682 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1683 (__v8hi)_mm_unpackhi_epi16(__A, __B),
1684 (__v8hi) _mm_setzero_si128());
1685}
1686
Logan Chien55afb0a2018-10-15 10:42:14 +08001687static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001688_mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
1689 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1690 (__v16hi)_mm256_unpackhi_epi16(__A, __B),
1691 (__v16hi)__W);
1692}
1693
Logan Chien55afb0a2018-10-15 10:42:14 +08001694static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001695_mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
1696 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1697 (__v16hi)_mm256_unpackhi_epi16(__A, __B),
1698 (__v16hi)_mm256_setzero_si256());
1699}
1700
Logan Chien55afb0a2018-10-15 10:42:14 +08001701static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001702_mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
1703 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1704 (__v16qi)_mm_unpacklo_epi8(__A, __B),
1705 (__v16qi)__W);
1706}
1707
Logan Chien55afb0a2018-10-15 10:42:14 +08001708static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001709_mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
1710 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
1711 (__v16qi)_mm_unpacklo_epi8(__A, __B),
1712 (__v16qi)_mm_setzero_si128());
1713}
1714
Logan Chien55afb0a2018-10-15 10:42:14 +08001715static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001716_mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
1717 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1718 (__v32qi)_mm256_unpacklo_epi8(__A, __B),
1719 (__v32qi)__W);
1720}
1721
Logan Chien55afb0a2018-10-15 10:42:14 +08001722static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001723_mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
1724 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
1725 (__v32qi)_mm256_unpacklo_epi8(__A, __B),
1726 (__v32qi)_mm256_setzero_si256());
1727}
1728
Logan Chien55afb0a2018-10-15 10:42:14 +08001729static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001730_mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
1731 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1732 (__v8hi)_mm_unpacklo_epi16(__A, __B),
1733 (__v8hi)__W);
1734}
1735
Logan Chien55afb0a2018-10-15 10:42:14 +08001736static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001737_mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
1738 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1739 (__v8hi)_mm_unpacklo_epi16(__A, __B),
1740 (__v8hi) _mm_setzero_si128());
1741}
1742
Logan Chien55afb0a2018-10-15 10:42:14 +08001743static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001744_mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
1745 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1746 (__v16hi)_mm256_unpacklo_epi16(__A, __B),
1747 (__v16hi)__W);
1748}
1749
Logan Chien55afb0a2018-10-15 10:42:14 +08001750static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001751_mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
1752 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1753 (__v16hi)_mm256_unpacklo_epi16(__A, __B),
1754 (__v16hi)_mm256_setzero_si256());
1755}
1756
Logan Chien55afb0a2018-10-15 10:42:14 +08001757static __inline__ __m128i __DEFAULT_FN_ATTRS128
1758_mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08001759{
Logan Chien55afb0a2018-10-15 10:42:14 +08001760 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1761 (__v8hi)_mm_cvtepi8_epi16(__A),
1762 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001763}
1764
Logan Chien55afb0a2018-10-15 10:42:14 +08001765static __inline__ __m128i __DEFAULT_FN_ATTRS128
1766_mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08001767{
Logan Chien55afb0a2018-10-15 10:42:14 +08001768 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1769 (__v8hi)_mm_cvtepi8_epi16(__A),
1770 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08001771}
1772
Logan Chien55afb0a2018-10-15 10:42:14 +08001773static __inline__ __m256i __DEFAULT_FN_ATTRS256
1774_mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08001775{
Logan Chien55afb0a2018-10-15 10:42:14 +08001776 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1777 (__v16hi)_mm256_cvtepi8_epi16(__A),
1778 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001779}
1780
Logan Chien55afb0a2018-10-15 10:42:14 +08001781static __inline__ __m256i __DEFAULT_FN_ATTRS256
1782_mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08001783{
Logan Chien55afb0a2018-10-15 10:42:14 +08001784 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1785 (__v16hi)_mm256_cvtepi8_epi16(__A),
1786 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08001787}
1788
1789
Logan Chien55afb0a2018-10-15 10:42:14 +08001790static __inline__ __m128i __DEFAULT_FN_ATTRS128
1791_mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08001792{
Logan Chien55afb0a2018-10-15 10:42:14 +08001793 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1794 (__v8hi)_mm_cvtepu8_epi16(__A),
1795 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001796}
1797
Logan Chien55afb0a2018-10-15 10:42:14 +08001798static __inline__ __m128i __DEFAULT_FN_ATTRS128
1799_mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08001800{
Logan Chien55afb0a2018-10-15 10:42:14 +08001801 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1802 (__v8hi)_mm_cvtepu8_epi16(__A),
1803 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08001804}
1805
Logan Chien55afb0a2018-10-15 10:42:14 +08001806static __inline__ __m256i __DEFAULT_FN_ATTRS256
1807_mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
Logan Chien2833ffb2018-10-09 10:03:24 +08001808{
Logan Chien55afb0a2018-10-15 10:42:14 +08001809 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1810 (__v16hi)_mm256_cvtepu8_epi16(__A),
1811 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001812}
1813
Logan Chien55afb0a2018-10-15 10:42:14 +08001814static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08001815_mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A)
1816{
Logan Chien55afb0a2018-10-15 10:42:14 +08001817 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1818 (__v16hi)_mm256_cvtepu8_epi16(__A),
1819 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08001820}
1821
1822
Logan Chien55afb0a2018-10-15 10:42:14 +08001823#define _mm_mask_shufflehi_epi16(W, U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001824 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1825 (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
1826 (__v8hi)(__m128i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001827
Logan Chien55afb0a2018-10-15 10:42:14 +08001828#define _mm_maskz_shufflehi_epi16(U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001829 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1830 (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
1831 (__v8hi)_mm_setzero_si128()))
Logan Chien2833ffb2018-10-09 10:03:24 +08001832
Logan Chien55afb0a2018-10-15 10:42:14 +08001833#define _mm256_mask_shufflehi_epi16(W, U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001834 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1835 (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \
1836 (__v16hi)(__m256i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001837
Logan Chien55afb0a2018-10-15 10:42:14 +08001838#define _mm256_maskz_shufflehi_epi16(U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001839 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1840 (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \
1841 (__v16hi)_mm256_setzero_si256()))
Logan Chien2833ffb2018-10-09 10:03:24 +08001842
Logan Chien55afb0a2018-10-15 10:42:14 +08001843#define _mm_mask_shufflelo_epi16(W, U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001844 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1845 (__v8hi)_mm_shufflelo_epi16((A), (imm)), \
1846 (__v8hi)(__m128i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001847
Logan Chien55afb0a2018-10-15 10:42:14 +08001848#define _mm_maskz_shufflelo_epi16(U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001849 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1850 (__v8hi)_mm_shufflelo_epi16((A), (imm)), \
1851 (__v8hi)_mm_setzero_si128()))
Logan Chien2833ffb2018-10-09 10:03:24 +08001852
Logan Chien55afb0a2018-10-15 10:42:14 +08001853#define _mm256_mask_shufflelo_epi16(W, U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001854 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1855 (__v16hi)_mm256_shufflelo_epi16((A), \
1856 (imm)), \
1857 (__v16hi)(__m256i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08001858
Logan Chien55afb0a2018-10-15 10:42:14 +08001859#define _mm256_maskz_shufflelo_epi16(U, A, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001860 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1861 (__v16hi)_mm256_shufflelo_epi16((A), \
1862 (imm)), \
1863 (__v16hi)_mm256_setzero_si256()))
Logan Chien2833ffb2018-10-09 10:03:24 +08001864
Logan Chien55afb0a2018-10-15 10:42:14 +08001865static __inline__ __m256i __DEFAULT_FN_ATTRS256
1866_mm256_sllv_epi16(__m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001867{
Logan Chien55afb0a2018-10-15 10:42:14 +08001868 return (__m256i)__builtin_ia32_psllv16hi((__v16hi)__A, (__v16hi)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08001869}
1870
Logan Chien55afb0a2018-10-15 10:42:14 +08001871static __inline__ __m256i __DEFAULT_FN_ATTRS256
1872_mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001873{
Logan Chien55afb0a2018-10-15 10:42:14 +08001874 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1875 (__v16hi)_mm256_sllv_epi16(__A, __B),
1876 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001877}
1878
Logan Chien55afb0a2018-10-15 10:42:14 +08001879static __inline__ __m256i __DEFAULT_FN_ATTRS256
1880_mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001881{
Logan Chien55afb0a2018-10-15 10:42:14 +08001882 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1883 (__v16hi)_mm256_sllv_epi16(__A, __B),
1884 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08001885}
1886
Logan Chien55afb0a2018-10-15 10:42:14 +08001887static __inline__ __m128i __DEFAULT_FN_ATTRS128
1888_mm_sllv_epi16(__m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001889{
Logan Chien55afb0a2018-10-15 10:42:14 +08001890 return (__m128i)__builtin_ia32_psllv8hi((__v8hi)__A, (__v8hi)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08001891}
1892
Logan Chien55afb0a2018-10-15 10:42:14 +08001893static __inline__ __m128i __DEFAULT_FN_ATTRS128
1894_mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001895{
Logan Chien55afb0a2018-10-15 10:42:14 +08001896 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1897 (__v8hi)_mm_sllv_epi16(__A, __B),
1898 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001899}
1900
Logan Chien55afb0a2018-10-15 10:42:14 +08001901static __inline__ __m128i __DEFAULT_FN_ATTRS128
1902_mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001903{
Logan Chien55afb0a2018-10-15 10:42:14 +08001904 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1905 (__v8hi)_mm_sllv_epi16(__A, __B),
1906 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08001907}
1908
Logan Chien55afb0a2018-10-15 10:42:14 +08001909static __inline__ __m128i __DEFAULT_FN_ATTRS128
1910_mm_mask_sll_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001911{
Logan Chien55afb0a2018-10-15 10:42:14 +08001912 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1913 (__v8hi)_mm_sll_epi16(__A, __B),
1914 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001915}
1916
Logan Chien55afb0a2018-10-15 10:42:14 +08001917static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08001918_mm_maskz_sll_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
1919{
Logan Chien55afb0a2018-10-15 10:42:14 +08001920 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1921 (__v8hi)_mm_sll_epi16(__A, __B),
1922 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08001923}
1924
Logan Chien55afb0a2018-10-15 10:42:14 +08001925static __inline__ __m256i __DEFAULT_FN_ATTRS256
1926_mm256_mask_sll_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001927{
Logan Chien55afb0a2018-10-15 10:42:14 +08001928 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1929 (__v16hi)_mm256_sll_epi16(__A, __B),
1930 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001931}
1932
Logan Chien55afb0a2018-10-15 10:42:14 +08001933static __inline__ __m256i __DEFAULT_FN_ATTRS256
1934_mm256_maskz_sll_epi16(__mmask16 __U, __m256i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001935{
Logan Chien55afb0a2018-10-15 10:42:14 +08001936 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1937 (__v16hi)_mm256_sll_epi16(__A, __B),
1938 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08001939}
1940
Logan Chien55afb0a2018-10-15 10:42:14 +08001941static __inline__ __m128i __DEFAULT_FN_ATTRS128
Sasha Smundak0fc590b2020-10-07 08:11:59 -07001942_mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001943{
Logan Chien55afb0a2018-10-15 10:42:14 +08001944 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1945 (__v8hi)_mm_slli_epi16(__A, __B),
1946 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001947}
1948
Logan Chien55afb0a2018-10-15 10:42:14 +08001949static __inline__ __m128i __DEFAULT_FN_ATTRS128
Sasha Smundak0fc590b2020-10-07 08:11:59 -07001950_mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001951{
Logan Chien55afb0a2018-10-15 10:42:14 +08001952 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
1953 (__v8hi)_mm_slli_epi16(__A, __B),
1954 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08001955}
1956
Logan Chien55afb0a2018-10-15 10:42:14 +08001957static __inline__ __m256i __DEFAULT_FN_ATTRS256
Sasha Smundak0fc590b2020-10-07 08:11:59 -07001958_mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A,
1959 unsigned int __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001960{
Logan Chien55afb0a2018-10-15 10:42:14 +08001961 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1962 (__v16hi)_mm256_slli_epi16(__A, __B),
1963 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001964}
1965
Logan Chien55afb0a2018-10-15 10:42:14 +08001966static __inline__ __m256i __DEFAULT_FN_ATTRS256
Sasha Smundak0fc590b2020-10-07 08:11:59 -07001967_mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, unsigned int __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001968{
Logan Chien55afb0a2018-10-15 10:42:14 +08001969 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1970 (__v16hi)_mm256_slli_epi16(__A, __B),
1971 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08001972}
1973
Logan Chien55afb0a2018-10-15 10:42:14 +08001974static __inline__ __m256i __DEFAULT_FN_ATTRS256
1975_mm256_srlv_epi16(__m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001976{
Logan Chien55afb0a2018-10-15 10:42:14 +08001977 return (__m256i)__builtin_ia32_psrlv16hi((__v16hi)__A, (__v16hi)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08001978}
1979
Logan Chien55afb0a2018-10-15 10:42:14 +08001980static __inline__ __m256i __DEFAULT_FN_ATTRS256
1981_mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001982{
Logan Chien55afb0a2018-10-15 10:42:14 +08001983 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1984 (__v16hi)_mm256_srlv_epi16(__A, __B),
1985 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08001986}
1987
Logan Chien55afb0a2018-10-15 10:42:14 +08001988static __inline__ __m256i __DEFAULT_FN_ATTRS256
1989_mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001990{
Logan Chien55afb0a2018-10-15 10:42:14 +08001991 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
1992 (__v16hi)_mm256_srlv_epi16(__A, __B),
1993 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08001994}
1995
Logan Chien55afb0a2018-10-15 10:42:14 +08001996static __inline__ __m128i __DEFAULT_FN_ATTRS128
1997_mm_srlv_epi16(__m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08001998{
Logan Chien55afb0a2018-10-15 10:42:14 +08001999 return (__m128i)__builtin_ia32_psrlv8hi((__v8hi)__A, (__v8hi)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08002000}
2001
Logan Chien55afb0a2018-10-15 10:42:14 +08002002static __inline__ __m128i __DEFAULT_FN_ATTRS128
2003_mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08002004{
Logan Chien55afb0a2018-10-15 10:42:14 +08002005 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2006 (__v8hi)_mm_srlv_epi16(__A, __B),
2007 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002008}
2009
Logan Chien55afb0a2018-10-15 10:42:14 +08002010static __inline__ __m128i __DEFAULT_FN_ATTRS128
2011_mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08002012{
Logan Chien55afb0a2018-10-15 10:42:14 +08002013 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2014 (__v8hi)_mm_srlv_epi16(__A, __B),
2015 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08002016}
2017
Logan Chien55afb0a2018-10-15 10:42:14 +08002018static __inline__ __m256i __DEFAULT_FN_ATTRS256
2019_mm256_srav_epi16(__m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08002020{
Logan Chien55afb0a2018-10-15 10:42:14 +08002021 return (__m256i)__builtin_ia32_psrav16hi((__v16hi)__A, (__v16hi)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08002022}
2023
Logan Chien55afb0a2018-10-15 10:42:14 +08002024static __inline__ __m256i __DEFAULT_FN_ATTRS256
2025_mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08002026{
Logan Chien55afb0a2018-10-15 10:42:14 +08002027 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2028 (__v16hi)_mm256_srav_epi16(__A, __B),
2029 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002030}
2031
Logan Chien55afb0a2018-10-15 10:42:14 +08002032static __inline__ __m256i __DEFAULT_FN_ATTRS256
2033_mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08002034{
Logan Chien55afb0a2018-10-15 10:42:14 +08002035 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2036 (__v16hi)_mm256_srav_epi16(__A, __B),
2037 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08002038}
2039
Logan Chien55afb0a2018-10-15 10:42:14 +08002040static __inline__ __m128i __DEFAULT_FN_ATTRS128
2041_mm_srav_epi16(__m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08002042{
Logan Chien55afb0a2018-10-15 10:42:14 +08002043 return (__m128i)__builtin_ia32_psrav8hi((__v8hi)__A, (__v8hi)__B);
Logan Chien2833ffb2018-10-09 10:03:24 +08002044}
2045
Logan Chien55afb0a2018-10-15 10:42:14 +08002046static __inline__ __m128i __DEFAULT_FN_ATTRS128
2047_mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08002048{
Logan Chien55afb0a2018-10-15 10:42:14 +08002049 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2050 (__v8hi)_mm_srav_epi16(__A, __B),
2051 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002052}
2053
Logan Chien55afb0a2018-10-15 10:42:14 +08002054static __inline__ __m128i __DEFAULT_FN_ATTRS128
2055_mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08002056{
Logan Chien55afb0a2018-10-15 10:42:14 +08002057 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2058 (__v8hi)_mm_srav_epi16(__A, __B),
2059 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08002060}
2061
Logan Chien55afb0a2018-10-15 10:42:14 +08002062static __inline__ __m128i __DEFAULT_FN_ATTRS128
2063_mm_mask_sra_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08002064{
Logan Chien55afb0a2018-10-15 10:42:14 +08002065 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2066 (__v8hi)_mm_sra_epi16(__A, __B),
2067 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002068}
2069
Logan Chien55afb0a2018-10-15 10:42:14 +08002070static __inline__ __m128i __DEFAULT_FN_ATTRS128
2071_mm_maskz_sra_epi16(__mmask8 __U, __m128i __A, __m128i __B)
2072{
2073 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2074 (__v8hi)_mm_sra_epi16(__A, __B),
2075 (__v8hi)_mm_setzero_si128());
2076}
2077
2078static __inline__ __m256i __DEFAULT_FN_ATTRS256
2079_mm256_mask_sra_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
2080{
2081 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2082 (__v16hi)_mm256_sra_epi16(__A, __B),
2083 (__v16hi)__W);
2084}
2085
2086static __inline__ __m256i __DEFAULT_FN_ATTRS256
2087_mm256_maskz_sra_epi16(__mmask16 __U, __m256i __A, __m128i __B)
2088{
2089 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2090 (__v16hi)_mm256_sra_epi16(__A, __B),
2091 (__v16hi)_mm256_setzero_si256());
2092}
2093
2094static __inline__ __m128i __DEFAULT_FN_ATTRS128
Sasha Smundak0fc590b2020-10-07 08:11:59 -07002095_mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08002096{
2097 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2098 (__v8hi)_mm_srai_epi16(__A, __B),
2099 (__v8hi)__W);
2100}
2101
2102static __inline__ __m128i __DEFAULT_FN_ATTRS128
Sasha Smundak0fc590b2020-10-07 08:11:59 -07002103_mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08002104{
2105 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2106 (__v8hi)_mm_srai_epi16(__A, __B),
2107 (__v8hi)_mm_setzero_si128());
2108}
2109
2110static __inline__ __m256i __DEFAULT_FN_ATTRS256
Sasha Smundak0fc590b2020-10-07 08:11:59 -07002111_mm256_mask_srai_epi16(__m256i __W, __mmask16 __U, __m256i __A,
2112 unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08002113{
2114 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2115 (__v16hi)_mm256_srai_epi16(__A, __B),
2116 (__v16hi)__W);
2117}
2118
2119static __inline__ __m256i __DEFAULT_FN_ATTRS256
Sasha Smundak0fc590b2020-10-07 08:11:59 -07002120_mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, unsigned int __B)
Logan Chien55afb0a2018-10-15 10:42:14 +08002121{
2122 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2123 (__v16hi)_mm256_srai_epi16(__A, __B),
2124 (__v16hi)_mm256_setzero_si256());
2125}
2126
2127static __inline__ __m128i __DEFAULT_FN_ATTRS128
2128_mm_mask_srl_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
2129{
2130 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2131 (__v8hi)_mm_srl_epi16(__A, __B),
2132 (__v8hi)__W);
2133}
2134
2135static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002136_mm_maskz_srl_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
2137{
Logan Chien55afb0a2018-10-15 10:42:14 +08002138 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2139 (__v8hi)_mm_srl_epi16(__A, __B),
2140 (__v8hi)_mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08002141}
2142
Logan Chien55afb0a2018-10-15 10:42:14 +08002143static __inline__ __m256i __DEFAULT_FN_ATTRS256
2144_mm256_mask_srl_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08002145{
Logan Chien55afb0a2018-10-15 10:42:14 +08002146 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2147 (__v16hi)_mm256_srl_epi16(__A, __B),
2148 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002149}
2150
Logan Chien55afb0a2018-10-15 10:42:14 +08002151static __inline__ __m256i __DEFAULT_FN_ATTRS256
2152_mm256_maskz_srl_epi16(__mmask16 __U, __m256i __A, __m128i __B)
Logan Chien2833ffb2018-10-09 10:03:24 +08002153{
Logan Chien55afb0a2018-10-15 10:42:14 +08002154 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2155 (__v16hi)_mm256_srl_epi16(__A, __B),
2156 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08002157}
2158
Logan Chien55afb0a2018-10-15 10:42:14 +08002159static __inline__ __m128i __DEFAULT_FN_ATTRS128
2160_mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B)
2161{
2162 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2163 (__v8hi)_mm_srli_epi16(__A, __B),
2164 (__v8hi)__W);
2165}
Logan Chien2833ffb2018-10-09 10:03:24 +08002166
Logan Chien55afb0a2018-10-15 10:42:14 +08002167static __inline__ __m128i __DEFAULT_FN_ATTRS128
2168_mm_maskz_srli_epi16 (__mmask8 __U, __m128i __A, int __B)
2169{
2170 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
2171 (__v8hi)_mm_srli_epi16(__A, __B),
2172 (__v8hi)_mm_setzero_si128());
2173}
Logan Chien2833ffb2018-10-09 10:03:24 +08002174
Logan Chien55afb0a2018-10-15 10:42:14 +08002175static __inline__ __m256i __DEFAULT_FN_ATTRS256
2176_mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B)
2177{
2178 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2179 (__v16hi)_mm256_srli_epi16(__A, __B),
2180 (__v16hi)__W);
2181}
Logan Chien2833ffb2018-10-09 10:03:24 +08002182
Logan Chien55afb0a2018-10-15 10:42:14 +08002183static __inline__ __m256i __DEFAULT_FN_ATTRS256
2184_mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B)
2185{
2186 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
2187 (__v16hi)_mm256_srli_epi16(__A, __B),
2188 (__v16hi)_mm256_setzero_si256());
2189}
Logan Chien2833ffb2018-10-09 10:03:24 +08002190
Logan Chien55afb0a2018-10-15 10:42:14 +08002191static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002192_mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
2193{
2194 return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
2195 (__v8hi) __A,
2196 (__v8hi) __W);
2197}
2198
Logan Chien55afb0a2018-10-15 10:42:14 +08002199static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002200_mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A)
2201{
2202 return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
2203 (__v8hi) __A,
Logan Chien55afb0a2018-10-15 10:42:14 +08002204 (__v8hi) _mm_setzero_si128 ());
Logan Chien2833ffb2018-10-09 10:03:24 +08002205}
2206
Logan Chien55afb0a2018-10-15 10:42:14 +08002207static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002208_mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
2209{
2210 return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
2211 (__v16hi) __A,
2212 (__v16hi) __W);
2213}
2214
Logan Chien55afb0a2018-10-15 10:42:14 +08002215static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002216_mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A)
2217{
2218 return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
2219 (__v16hi) __A,
2220 (__v16hi) _mm256_setzero_si256 ());
2221}
2222
Logan Chien55afb0a2018-10-15 10:42:14 +08002223static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002224_mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
2225{
2226 return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
2227 (__v16qi) __A,
2228 (__v16qi) __W);
2229}
2230
Logan Chien55afb0a2018-10-15 10:42:14 +08002231static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002232_mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A)
2233{
2234 return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
2235 (__v16qi) __A,
Logan Chien55afb0a2018-10-15 10:42:14 +08002236 (__v16qi) _mm_setzero_si128 ());
Logan Chien2833ffb2018-10-09 10:03:24 +08002237}
2238
Logan Chien55afb0a2018-10-15 10:42:14 +08002239static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002240_mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
2241{
2242 return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
2243 (__v32qi) __A,
2244 (__v32qi) __W);
2245}
2246
Logan Chien55afb0a2018-10-15 10:42:14 +08002247static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002248_mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A)
2249{
2250 return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
2251 (__v32qi) __A,
2252 (__v32qi) _mm256_setzero_si256 ());
2253}
2254
2255
Logan Chien55afb0a2018-10-15 10:42:14 +08002256static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002257_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A)
2258{
Logan Chien55afb0a2018-10-15 10:42:14 +08002259 return (__m128i) __builtin_ia32_selectb_128(__M,
2260 (__v16qi) _mm_set1_epi8(__A),
2261 (__v16qi) __O);
Logan Chien2833ffb2018-10-09 10:03:24 +08002262}
2263
Logan Chien55afb0a2018-10-15 10:42:14 +08002264static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002265_mm_maskz_set1_epi8 (__mmask16 __M, char __A)
2266{
Logan Chien55afb0a2018-10-15 10:42:14 +08002267 return (__m128i) __builtin_ia32_selectb_128(__M,
2268 (__v16qi) _mm_set1_epi8(__A),
2269 (__v16qi) _mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08002270}
2271
Logan Chien55afb0a2018-10-15 10:42:14 +08002272static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002273_mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A)
2274{
Logan Chien55afb0a2018-10-15 10:42:14 +08002275 return (__m256i) __builtin_ia32_selectb_256(__M,
2276 (__v32qi) _mm256_set1_epi8(__A),
2277 (__v32qi) __O);
Logan Chien2833ffb2018-10-09 10:03:24 +08002278}
2279
Logan Chien55afb0a2018-10-15 10:42:14 +08002280static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002281_mm256_maskz_set1_epi8 (__mmask32 __M, char __A)
2282{
Logan Chien55afb0a2018-10-15 10:42:14 +08002283 return (__m256i) __builtin_ia32_selectb_256(__M,
2284 (__v32qi) _mm256_set1_epi8(__A),
2285 (__v32qi) _mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08002286}
2287
Logan Chien969aea62018-12-05 18:40:57 +08002288static __inline __m128i __DEFAULT_FN_ATTRS128
2289_mm_loadu_epi16 (void const *__P)
2290{
2291 struct __loadu_epi16 {
Logan Chiendbcf4122019-03-21 10:50:25 +08002292 __m128i_u __v;
Logan Chien969aea62018-12-05 18:40:57 +08002293 } __attribute__((__packed__, __may_alias__));
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002294 return ((const struct __loadu_epi16*)__P)->__v;
Logan Chien969aea62018-12-05 18:40:57 +08002295}
2296
Logan Chien55afb0a2018-10-15 10:42:14 +08002297static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002298_mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P)
2299{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002300 return (__m128i) __builtin_ia32_loaddquhi128_mask ((const __v8hi *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08002301 (__v8hi) __W,
2302 (__mmask8) __U);
2303}
2304
Logan Chien55afb0a2018-10-15 10:42:14 +08002305static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002306_mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P)
2307{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002308 return (__m128i) __builtin_ia32_loaddquhi128_mask ((const __v8hi *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08002309 (__v8hi)
Logan Chien55afb0a2018-10-15 10:42:14 +08002310 _mm_setzero_si128 (),
Logan Chien2833ffb2018-10-09 10:03:24 +08002311 (__mmask8) __U);
2312}
2313
Logan Chien969aea62018-12-05 18:40:57 +08002314static __inline __m256i __DEFAULT_FN_ATTRS256
2315_mm256_loadu_epi16 (void const *__P)
2316{
2317 struct __loadu_epi16 {
Logan Chiendbcf4122019-03-21 10:50:25 +08002318 __m256i_u __v;
Logan Chien969aea62018-12-05 18:40:57 +08002319 } __attribute__((__packed__, __may_alias__));
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002320 return ((const struct __loadu_epi16*)__P)->__v;
Logan Chien969aea62018-12-05 18:40:57 +08002321}
2322
Logan Chien55afb0a2018-10-15 10:42:14 +08002323static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002324_mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P)
2325{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002326 return (__m256i) __builtin_ia32_loaddquhi256_mask ((const __v16hi *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08002327 (__v16hi) __W,
2328 (__mmask16) __U);
2329}
2330
Logan Chien55afb0a2018-10-15 10:42:14 +08002331static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002332_mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P)
2333{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002334 return (__m256i) __builtin_ia32_loaddquhi256_mask ((const __v16hi *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08002335 (__v16hi)
2336 _mm256_setzero_si256 (),
2337 (__mmask16) __U);
2338}
2339
Logan Chien969aea62018-12-05 18:40:57 +08002340static __inline __m128i __DEFAULT_FN_ATTRS128
2341_mm_loadu_epi8 (void const *__P)
2342{
2343 struct __loadu_epi8 {
Logan Chiendbcf4122019-03-21 10:50:25 +08002344 __m128i_u __v;
Logan Chien969aea62018-12-05 18:40:57 +08002345 } __attribute__((__packed__, __may_alias__));
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002346 return ((const struct __loadu_epi8*)__P)->__v;
Logan Chien969aea62018-12-05 18:40:57 +08002347}
2348
Logan Chien55afb0a2018-10-15 10:42:14 +08002349static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002350_mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P)
2351{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002352 return (__m128i) __builtin_ia32_loaddquqi128_mask ((const __v16qi *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08002353 (__v16qi) __W,
2354 (__mmask16) __U);
2355}
2356
Logan Chien55afb0a2018-10-15 10:42:14 +08002357static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002358_mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P)
2359{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002360 return (__m128i) __builtin_ia32_loaddquqi128_mask ((const __v16qi *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08002361 (__v16qi)
2362 _mm_setzero_si128 (),
2363 (__mmask16) __U);
2364}
2365
Logan Chien969aea62018-12-05 18:40:57 +08002366static __inline __m256i __DEFAULT_FN_ATTRS256
2367_mm256_loadu_epi8 (void const *__P)
2368{
2369 struct __loadu_epi8 {
Logan Chiendbcf4122019-03-21 10:50:25 +08002370 __m256i_u __v;
Logan Chien969aea62018-12-05 18:40:57 +08002371 } __attribute__((__packed__, __may_alias__));
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002372 return ((const struct __loadu_epi8*)__P)->__v;
Logan Chien969aea62018-12-05 18:40:57 +08002373}
2374
Logan Chien55afb0a2018-10-15 10:42:14 +08002375static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002376_mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P)
2377{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002378 return (__m256i) __builtin_ia32_loaddquqi256_mask ((const __v32qi *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08002379 (__v32qi) __W,
2380 (__mmask32) __U);
2381}
2382
Logan Chien55afb0a2018-10-15 10:42:14 +08002383static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002384_mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P)
2385{
Sasha Smundak33d5ddd2020-05-04 13:37:26 -07002386 return (__m256i) __builtin_ia32_loaddquqi256_mask ((const __v32qi *) __P,
Logan Chien2833ffb2018-10-09 10:03:24 +08002387 (__v32qi)
2388 _mm256_setzero_si256 (),
2389 (__mmask32) __U);
2390}
Logan Chien969aea62018-12-05 18:40:57 +08002391
2392static __inline void __DEFAULT_FN_ATTRS128
2393_mm_storeu_epi16 (void *__P, __m128i __A)
2394{
2395 struct __storeu_epi16 {
Logan Chiendbcf4122019-03-21 10:50:25 +08002396 __m128i_u __v;
Logan Chien969aea62018-12-05 18:40:57 +08002397 } __attribute__((__packed__, __may_alias__));
2398 ((struct __storeu_epi16*)__P)->__v = __A;
2399}
2400
2401static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002402_mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A)
2403{
2404 __builtin_ia32_storedquhi128_mask ((__v8hi *) __P,
2405 (__v8hi) __A,
2406 (__mmask8) __U);
2407}
2408
Logan Chien969aea62018-12-05 18:40:57 +08002409static __inline void __DEFAULT_FN_ATTRS256
2410_mm256_storeu_epi16 (void *__P, __m256i __A)
2411{
2412 struct __storeu_epi16 {
Logan Chiendbcf4122019-03-21 10:50:25 +08002413 __m256i_u __v;
Logan Chien969aea62018-12-05 18:40:57 +08002414 } __attribute__((__packed__, __may_alias__));
2415 ((struct __storeu_epi16*)__P)->__v = __A;
2416}
2417
Logan Chien55afb0a2018-10-15 10:42:14 +08002418static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002419_mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A)
2420{
2421 __builtin_ia32_storedquhi256_mask ((__v16hi *) __P,
2422 (__v16hi) __A,
2423 (__mmask16) __U);
2424}
2425
Logan Chien969aea62018-12-05 18:40:57 +08002426static __inline void __DEFAULT_FN_ATTRS128
2427_mm_storeu_epi8 (void *__P, __m128i __A)
2428{
2429 struct __storeu_epi8 {
Logan Chiendbcf4122019-03-21 10:50:25 +08002430 __m128i_u __v;
Logan Chien969aea62018-12-05 18:40:57 +08002431 } __attribute__((__packed__, __may_alias__));
2432 ((struct __storeu_epi8*)__P)->__v = __A;
2433}
2434
Logan Chien55afb0a2018-10-15 10:42:14 +08002435static __inline__ void __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002436_mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A)
2437{
2438 __builtin_ia32_storedquqi128_mask ((__v16qi *) __P,
2439 (__v16qi) __A,
2440 (__mmask16) __U);
2441}
2442
Logan Chien969aea62018-12-05 18:40:57 +08002443static __inline void __DEFAULT_FN_ATTRS256
2444_mm256_storeu_epi8 (void *__P, __m256i __A)
2445{
2446 struct __storeu_epi8 {
Logan Chiendbcf4122019-03-21 10:50:25 +08002447 __m256i_u __v;
Logan Chien969aea62018-12-05 18:40:57 +08002448 } __attribute__((__packed__, __may_alias__));
2449 ((struct __storeu_epi8*)__P)->__v = __A;
2450}
2451
Logan Chien55afb0a2018-10-15 10:42:14 +08002452static __inline__ void __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002453_mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
2454{
2455 __builtin_ia32_storedquqi256_mask ((__v32qi *) __P,
2456 (__v32qi) __A,
2457 (__mmask32) __U);
2458}
2459
Logan Chien55afb0a2018-10-15 10:42:14 +08002460static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002461_mm_test_epi8_mask (__m128i __A, __m128i __B)
2462{
Logan Chien55afb0a2018-10-15 10:42:14 +08002463 return _mm_cmpneq_epi8_mask (_mm_and_si128(__A, __B), _mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08002464}
2465
Logan Chien55afb0a2018-10-15 10:42:14 +08002466static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002467_mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
2468{
Logan Chien55afb0a2018-10-15 10:42:14 +08002469 return _mm_mask_cmpneq_epi8_mask (__U, _mm_and_si128 (__A, __B),
2470 _mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08002471}
2472
Logan Chien55afb0a2018-10-15 10:42:14 +08002473static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002474_mm256_test_epi8_mask (__m256i __A, __m256i __B)
2475{
Logan Chien55afb0a2018-10-15 10:42:14 +08002476 return _mm256_cmpneq_epi8_mask (_mm256_and_si256(__A, __B),
2477 _mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08002478}
2479
Logan Chien55afb0a2018-10-15 10:42:14 +08002480static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002481_mm256_mask_test_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
2482{
Logan Chien55afb0a2018-10-15 10:42:14 +08002483 return _mm256_mask_cmpneq_epi8_mask (__U, _mm256_and_si256(__A, __B),
2484 _mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08002485}
2486
Logan Chien55afb0a2018-10-15 10:42:14 +08002487static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002488_mm_test_epi16_mask (__m128i __A, __m128i __B)
2489{
Logan Chien55afb0a2018-10-15 10:42:14 +08002490 return _mm_cmpneq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08002491}
2492
Logan Chien55afb0a2018-10-15 10:42:14 +08002493static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002494_mm_mask_test_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
2495{
Logan Chien55afb0a2018-10-15 10:42:14 +08002496 return _mm_mask_cmpneq_epi16_mask (__U, _mm_and_si128 (__A, __B),
2497 _mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08002498}
2499
Logan Chien55afb0a2018-10-15 10:42:14 +08002500static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002501_mm256_test_epi16_mask (__m256i __A, __m256i __B)
2502{
Logan Chien55afb0a2018-10-15 10:42:14 +08002503 return _mm256_cmpneq_epi16_mask (_mm256_and_si256 (__A, __B),
2504 _mm256_setzero_si256 ());
Logan Chien2833ffb2018-10-09 10:03:24 +08002505}
2506
Logan Chien55afb0a2018-10-15 10:42:14 +08002507static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002508_mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
2509{
Logan Chien55afb0a2018-10-15 10:42:14 +08002510 return _mm256_mask_cmpneq_epi16_mask (__U, _mm256_and_si256(__A, __B),
2511 _mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08002512}
2513
Logan Chien55afb0a2018-10-15 10:42:14 +08002514static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002515_mm_testn_epi8_mask (__m128i __A, __m128i __B)
2516{
Logan Chien55afb0a2018-10-15 10:42:14 +08002517 return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08002518}
2519
Logan Chien55afb0a2018-10-15 10:42:14 +08002520static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002521_mm_mask_testn_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
2522{
Logan Chien55afb0a2018-10-15 10:42:14 +08002523 return _mm_mask_cmpeq_epi8_mask (__U, _mm_and_si128 (__A, __B),
2524 _mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08002525}
2526
Logan Chien55afb0a2018-10-15 10:42:14 +08002527static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002528_mm256_testn_epi8_mask (__m256i __A, __m256i __B)
2529{
Logan Chien55afb0a2018-10-15 10:42:14 +08002530 return _mm256_cmpeq_epi8_mask (_mm256_and_si256 (__A, __B),
2531 _mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08002532}
2533
Logan Chien55afb0a2018-10-15 10:42:14 +08002534static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002535_mm256_mask_testn_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
2536{
Logan Chien55afb0a2018-10-15 10:42:14 +08002537 return _mm256_mask_cmpeq_epi8_mask (__U, _mm256_and_si256 (__A, __B),
2538 _mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08002539}
2540
Logan Chien55afb0a2018-10-15 10:42:14 +08002541static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002542_mm_testn_epi16_mask (__m128i __A, __m128i __B)
2543{
Logan Chien55afb0a2018-10-15 10:42:14 +08002544 return _mm_cmpeq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08002545}
2546
Logan Chien55afb0a2018-10-15 10:42:14 +08002547static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002548_mm_mask_testn_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
2549{
Logan Chien55afb0a2018-10-15 10:42:14 +08002550 return _mm_mask_cmpeq_epi16_mask (__U, _mm_and_si128(__A, __B), _mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08002551}
2552
Logan Chien55afb0a2018-10-15 10:42:14 +08002553static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002554_mm256_testn_epi16_mask (__m256i __A, __m256i __B)
2555{
Logan Chien55afb0a2018-10-15 10:42:14 +08002556 return _mm256_cmpeq_epi16_mask (_mm256_and_si256(__A, __B),
2557 _mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08002558}
2559
Logan Chien55afb0a2018-10-15 10:42:14 +08002560static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002561_mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
2562{
Logan Chien55afb0a2018-10-15 10:42:14 +08002563 return _mm256_mask_cmpeq_epi16_mask (__U, _mm256_and_si256 (__A, __B),
2564 _mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08002565}
2566
Logan Chien55afb0a2018-10-15 10:42:14 +08002567static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002568_mm_movepi8_mask (__m128i __A)
2569{
2570 return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A);
2571}
2572
Logan Chien55afb0a2018-10-15 10:42:14 +08002573static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002574_mm256_movepi8_mask (__m256i __A)
2575{
2576 return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A);
2577}
2578
Logan Chien55afb0a2018-10-15 10:42:14 +08002579static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002580_mm_movepi16_mask (__m128i __A)
2581{
2582 return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A);
2583}
2584
Logan Chien55afb0a2018-10-15 10:42:14 +08002585static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002586_mm256_movepi16_mask (__m256i __A)
2587{
2588 return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A);
2589}
2590
Logan Chien55afb0a2018-10-15 10:42:14 +08002591static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002592_mm_movm_epi8 (__mmask16 __A)
2593{
2594 return (__m128i) __builtin_ia32_cvtmask2b128 (__A);
2595}
2596
Logan Chien55afb0a2018-10-15 10:42:14 +08002597static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002598_mm256_movm_epi8 (__mmask32 __A)
2599{
2600 return (__m256i) __builtin_ia32_cvtmask2b256 (__A);
2601}
2602
Logan Chien55afb0a2018-10-15 10:42:14 +08002603static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002604_mm_movm_epi16 (__mmask8 __A)
2605{
2606 return (__m128i) __builtin_ia32_cvtmask2w128 (__A);
2607}
2608
Logan Chien55afb0a2018-10-15 10:42:14 +08002609static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002610_mm256_movm_epi16 (__mmask16 __A)
2611{
2612 return (__m256i) __builtin_ia32_cvtmask2w256 (__A);
2613}
2614
Logan Chien55afb0a2018-10-15 10:42:14 +08002615static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002616_mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A)
2617{
2618 return (__m128i)__builtin_ia32_selectb_128(__M,
2619 (__v16qi) _mm_broadcastb_epi8(__A),
2620 (__v16qi) __O);
2621}
2622
Logan Chien55afb0a2018-10-15 10:42:14 +08002623static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002624_mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A)
2625{
2626 return (__m128i)__builtin_ia32_selectb_128(__M,
2627 (__v16qi) _mm_broadcastb_epi8(__A),
2628 (__v16qi) _mm_setzero_si128());
2629}
2630
Logan Chien55afb0a2018-10-15 10:42:14 +08002631static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002632_mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A)
2633{
2634 return (__m256i)__builtin_ia32_selectb_256(__M,
2635 (__v32qi) _mm256_broadcastb_epi8(__A),
2636 (__v32qi) __O);
2637}
2638
Logan Chien55afb0a2018-10-15 10:42:14 +08002639static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002640_mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A)
2641{
2642 return (__m256i)__builtin_ia32_selectb_256(__M,
2643 (__v32qi) _mm256_broadcastb_epi8(__A),
2644 (__v32qi) _mm256_setzero_si256());
2645}
2646
Logan Chien55afb0a2018-10-15 10:42:14 +08002647static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002648_mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2649{
2650 return (__m128i)__builtin_ia32_selectw_128(__M,
2651 (__v8hi) _mm_broadcastw_epi16(__A),
2652 (__v8hi) __O);
2653}
2654
Logan Chien55afb0a2018-10-15 10:42:14 +08002655static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002656_mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A)
2657{
2658 return (__m128i)__builtin_ia32_selectw_128(__M,
2659 (__v8hi) _mm_broadcastw_epi16(__A),
2660 (__v8hi) _mm_setzero_si128());
2661}
2662
Logan Chien55afb0a2018-10-15 10:42:14 +08002663static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002664_mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A)
2665{
2666 return (__m256i)__builtin_ia32_selectw_256(__M,
2667 (__v16hi) _mm256_broadcastw_epi16(__A),
2668 (__v16hi) __O);
2669}
2670
Logan Chien55afb0a2018-10-15 10:42:14 +08002671static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002672_mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A)
2673{
2674 return (__m256i)__builtin_ia32_selectw_256(__M,
2675 (__v16hi) _mm256_broadcastw_epi16(__A),
2676 (__v16hi) _mm256_setzero_si256());
2677}
2678
Logan Chien55afb0a2018-10-15 10:42:14 +08002679static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002680_mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A)
2681{
Logan Chien55afb0a2018-10-15 10:42:14 +08002682 return (__m256i) __builtin_ia32_selectw_256 (__M,
2683 (__v16hi) _mm256_set1_epi16(__A),
2684 (__v16hi) __O);
Logan Chien2833ffb2018-10-09 10:03:24 +08002685}
2686
Logan Chien55afb0a2018-10-15 10:42:14 +08002687static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002688_mm256_maskz_set1_epi16 (__mmask16 __M, short __A)
2689{
Logan Chien55afb0a2018-10-15 10:42:14 +08002690 return (__m256i) __builtin_ia32_selectw_256(__M,
2691 (__v16hi)_mm256_set1_epi16(__A),
2692 (__v16hi) _mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08002693}
2694
Logan Chien55afb0a2018-10-15 10:42:14 +08002695static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002696_mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A)
2697{
Logan Chien55afb0a2018-10-15 10:42:14 +08002698 return (__m128i) __builtin_ia32_selectw_128(__M,
2699 (__v8hi) _mm_set1_epi16(__A),
2700 (__v8hi) __O);
Logan Chien2833ffb2018-10-09 10:03:24 +08002701}
2702
Logan Chien55afb0a2018-10-15 10:42:14 +08002703static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002704_mm_maskz_set1_epi16 (__mmask8 __M, short __A)
2705{
Logan Chien55afb0a2018-10-15 10:42:14 +08002706 return (__m128i) __builtin_ia32_selectw_128(__M,
2707 (__v8hi) _mm_set1_epi16(__A),
2708 (__v8hi) _mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08002709}
2710
Logan Chien55afb0a2018-10-15 10:42:14 +08002711static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002712_mm_permutexvar_epi16 (__m128i __A, __m128i __B)
2713{
Logan Chien55afb0a2018-10-15 10:42:14 +08002714 return (__m128i)__builtin_ia32_permvarhi128((__v8hi) __B, (__v8hi) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +08002715}
2716
Logan Chien55afb0a2018-10-15 10:42:14 +08002717static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002718_mm_maskz_permutexvar_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
2719{
Logan Chien55afb0a2018-10-15 10:42:14 +08002720 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
2721 (__v8hi)_mm_permutexvar_epi16(__A, __B),
2722 (__v8hi) _mm_setzero_si128());
Logan Chien2833ffb2018-10-09 10:03:24 +08002723}
2724
Logan Chien55afb0a2018-10-15 10:42:14 +08002725static __inline__ __m128i __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +08002726_mm_mask_permutexvar_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
2727 __m128i __B)
2728{
Logan Chien55afb0a2018-10-15 10:42:14 +08002729 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
2730 (__v8hi)_mm_permutexvar_epi16(__A, __B),
2731 (__v8hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002732}
2733
Logan Chien55afb0a2018-10-15 10:42:14 +08002734static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002735_mm256_permutexvar_epi16 (__m256i __A, __m256i __B)
2736{
Logan Chien55afb0a2018-10-15 10:42:14 +08002737 return (__m256i)__builtin_ia32_permvarhi256((__v16hi) __B, (__v16hi) __A);
Logan Chien2833ffb2018-10-09 10:03:24 +08002738}
2739
Logan Chien55afb0a2018-10-15 10:42:14 +08002740static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002741_mm256_maskz_permutexvar_epi16 (__mmask16 __M, __m256i __A,
2742 __m256i __B)
2743{
Logan Chien55afb0a2018-10-15 10:42:14 +08002744 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
2745 (__v16hi)_mm256_permutexvar_epi16(__A, __B),
2746 (__v16hi)_mm256_setzero_si256());
Logan Chien2833ffb2018-10-09 10:03:24 +08002747}
2748
Logan Chien55afb0a2018-10-15 10:42:14 +08002749static __inline__ __m256i __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002750_mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
2751 __m256i __B)
2752{
Logan Chien55afb0a2018-10-15 10:42:14 +08002753 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
2754 (__v16hi)_mm256_permutexvar_epi16(__A, __B),
2755 (__v16hi)__W);
Logan Chien2833ffb2018-10-09 10:03:24 +08002756}
2757
Logan Chien55afb0a2018-10-15 10:42:14 +08002758#define _mm_mask_alignr_epi8(W, U, A, B, N) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002759 ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08002760 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002761 (__v16qi)(__m128i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002762
Logan Chien55afb0a2018-10-15 10:42:14 +08002763#define _mm_maskz_alignr_epi8(U, A, B, N) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002764 ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08002765 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002766 (__v16qi)_mm_setzero_si128()))
Logan Chien2833ffb2018-10-09 10:03:24 +08002767
Logan Chien55afb0a2018-10-15 10:42:14 +08002768#define _mm256_mask_alignr_epi8(W, U, A, B, N) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002769 ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08002770 (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002771 (__v32qi)(__m256i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002772
Logan Chien55afb0a2018-10-15 10:42:14 +08002773#define _mm256_maskz_alignr_epi8(U, A, B, N) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002774 ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08002775 (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002776 (__v32qi)_mm256_setzero_si256()))
Logan Chien2833ffb2018-10-09 10:03:24 +08002777
Logan Chien55afb0a2018-10-15 10:42:14 +08002778#define _mm_dbsad_epu8(A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002779 ((__m128i)__builtin_ia32_dbpsadbw128((__v16qi)(__m128i)(A), \
2780 (__v16qi)(__m128i)(B), (int)(imm)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002781
Logan Chien55afb0a2018-10-15 10:42:14 +08002782#define _mm_mask_dbsad_epu8(W, U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002783 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08002784 (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002785 (__v8hi)(__m128i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002786
Logan Chien55afb0a2018-10-15 10:42:14 +08002787#define _mm_maskz_dbsad_epu8(U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002788 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08002789 (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002790 (__v8hi)_mm_setzero_si128()))
Logan Chien2833ffb2018-10-09 10:03:24 +08002791
Logan Chien55afb0a2018-10-15 10:42:14 +08002792#define _mm256_dbsad_epu8(A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002793 ((__m256i)__builtin_ia32_dbpsadbw256((__v32qi)(__m256i)(A), \
2794 (__v32qi)(__m256i)(B), (int)(imm)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002795
Logan Chien55afb0a2018-10-15 10:42:14 +08002796#define _mm256_mask_dbsad_epu8(W, U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002797 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08002798 (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002799 (__v16hi)(__m256i)(W)))
Logan Chien2833ffb2018-10-09 10:03:24 +08002800
Logan Chien55afb0a2018-10-15 10:42:14 +08002801#define _mm256_maskz_dbsad_epu8(U, A, B, imm) \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002802 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
Logan Chien55afb0a2018-10-15 10:42:14 +08002803 (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08002804 (__v16hi)_mm256_setzero_si256()))
Logan Chien2833ffb2018-10-09 10:03:24 +08002805
Logan Chien55afb0a2018-10-15 10:42:14 +08002806#undef __DEFAULT_FN_ATTRS128
2807#undef __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +08002808
2809#endif /* __AVX512VLBWINTRIN_H */