blob: c764d68a185d912dabd444d519f40a7e23c603e2 [file] [log] [blame]
Ying Wanga6720142011-12-20 14:43:20 -08001/*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __EMMINTRIN_H
25#define __EMMINTRIN_H
26
27#ifndef __SSE2__
28#error "SSE2 instruction set not enabled"
29#else
30
31#include <xmmintrin.h>
32
33typedef double __m128d __attribute__((__vector_size__(16)));
34typedef long long __m128i __attribute__((__vector_size__(16)));
35
36/* Type defines. */
37typedef double __v2df __attribute__ ((__vector_size__ (16)));
38typedef long long __v2di __attribute__ ((__vector_size__ (16)));
39typedef short __v8hi __attribute__((__vector_size__(16)));
40typedef char __v16qi __attribute__((__vector_size__(16)));
41
42static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070043_mm_add_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -080044{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070045 __a[0] += __b[0];
46 return __a;
Ying Wanga6720142011-12-20 14:43:20 -080047}
48
49static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070050_mm_add_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -080051{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070052 return __a + __b;
Ying Wanga6720142011-12-20 14:43:20 -080053}
54
55static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070056_mm_sub_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -080057{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070058 __a[0] -= __b[0];
59 return __a;
Ying Wanga6720142011-12-20 14:43:20 -080060}
61
62static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070063_mm_sub_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -080064{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070065 return __a - __b;
Ying Wanga6720142011-12-20 14:43:20 -080066}
67
68static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070069_mm_mul_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -080070{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070071 __a[0] *= __b[0];
72 return __a;
Ying Wanga6720142011-12-20 14:43:20 -080073}
74
75static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070076_mm_mul_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -080077{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070078 return __a * __b;
Ying Wanga6720142011-12-20 14:43:20 -080079}
80
81static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070082_mm_div_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -080083{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070084 __a[0] /= __b[0];
85 return __a;
Ying Wanga6720142011-12-20 14:43:20 -080086}
87
88static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070089_mm_div_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -080090{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070091 return __a / __b;
Ying Wanga6720142011-12-20 14:43:20 -080092}
93
94static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070095_mm_sqrt_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -080096{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070097 __m128d __c = __builtin_ia32_sqrtsd(__b);
98 return (__m128d) { __c[0], __a[1] };
Ying Wanga6720142011-12-20 14:43:20 -080099}
100
101static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700102_mm_sqrt_pd(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800103{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700104 return __builtin_ia32_sqrtpd(__a);
Ying Wanga6720142011-12-20 14:43:20 -0800105}
106
107static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700108_mm_min_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800109{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700110 return __builtin_ia32_minsd(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800111}
112
113static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700114_mm_min_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800115{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700116 return __builtin_ia32_minpd(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800117}
118
119static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700120_mm_max_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800121{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700122 return __builtin_ia32_maxsd(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800123}
124
125static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700126_mm_max_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800127{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700128 return __builtin_ia32_maxpd(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800129}
130
131static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700132_mm_and_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800133{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700134 return (__m128d)((__v4si)__a & (__v4si)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800135}
136
137static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700138_mm_andnot_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800139{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700140 return (__m128d)(~(__v4si)__a & (__v4si)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800141}
142
143static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700144_mm_or_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800145{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700146 return (__m128d)((__v4si)__a | (__v4si)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800147}
148
149static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700150_mm_xor_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800151{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700152 return (__m128d)((__v4si)__a ^ (__v4si)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800153}
154
155static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700156_mm_cmpeq_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800157{
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700158 return (__m128d)__builtin_ia32_cmpeqpd(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800159}
160
161static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700162_mm_cmplt_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800163{
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700164 return (__m128d)__builtin_ia32_cmpltpd(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800165}
166
167static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700168_mm_cmple_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800169{
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700170 return (__m128d)__builtin_ia32_cmplepd(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800171}
172
173static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700174_mm_cmpgt_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800175{
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700176 return (__m128d)__builtin_ia32_cmpltpd(__b, __a);
Ying Wanga6720142011-12-20 14:43:20 -0800177}
178
179static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700180_mm_cmpge_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800181{
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700182 return (__m128d)__builtin_ia32_cmplepd(__b, __a);
Ying Wanga6720142011-12-20 14:43:20 -0800183}
184
185static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700186_mm_cmpord_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800187{
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700188 return (__m128d)__builtin_ia32_cmpordpd(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800189}
190
191static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700192_mm_cmpunord_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800193{
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700194 return (__m128d)__builtin_ia32_cmpunordpd(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800195}
196
197static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700198_mm_cmpneq_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800199{
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700200 return (__m128d)__builtin_ia32_cmpneqpd(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800201}
202
203static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700204_mm_cmpnlt_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800205{
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700206 return (__m128d)__builtin_ia32_cmpnltpd(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800207}
208
209static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700210_mm_cmpnle_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800211{
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700212 return (__m128d)__builtin_ia32_cmpnlepd(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800213}
214
215static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700216_mm_cmpngt_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800217{
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700218 return (__m128d)__builtin_ia32_cmpnltpd(__b, __a);
Ying Wanga6720142011-12-20 14:43:20 -0800219}
220
221static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700222_mm_cmpnge_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800223{
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700224 return (__m128d)__builtin_ia32_cmpnlepd(__b, __a);
Ying Wanga6720142011-12-20 14:43:20 -0800225}
226
227static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700228_mm_cmpeq_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800229{
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700230 return (__m128d)__builtin_ia32_cmpeqsd(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800231}
232
233static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700234_mm_cmplt_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800235{
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700236 return (__m128d)__builtin_ia32_cmpltsd(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800237}
238
239static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700240_mm_cmple_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800241{
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700242 return (__m128d)__builtin_ia32_cmplesd(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800243}
244
245static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700246_mm_cmpgt_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800247{
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700248 __m128d __c = __builtin_ia32_cmpltsd(__b, __a);
Stephen Hines996e4dc2013-08-13 01:04:14 -0700249 return (__m128d) { __c[0], __a[1] };
Ying Wanga6720142011-12-20 14:43:20 -0800250}
251
252static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700253_mm_cmpge_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800254{
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700255 __m128d __c = __builtin_ia32_cmplesd(__b, __a);
Stephen Hines996e4dc2013-08-13 01:04:14 -0700256 return (__m128d) { __c[0], __a[1] };
Ying Wanga6720142011-12-20 14:43:20 -0800257}
258
259static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700260_mm_cmpord_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800261{
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700262 return (__m128d)__builtin_ia32_cmpordsd(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800263}
264
265static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700266_mm_cmpunord_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800267{
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700268 return (__m128d)__builtin_ia32_cmpunordsd(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800269}
270
271static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700272_mm_cmpneq_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800273{
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700274 return (__m128d)__builtin_ia32_cmpneqsd(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800275}
276
277static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700278_mm_cmpnlt_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800279{
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700280 return (__m128d)__builtin_ia32_cmpnltsd(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800281}
282
283static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700284_mm_cmpnle_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800285{
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700286 return (__m128d)__builtin_ia32_cmpnlesd(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800287}
288
289static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700290_mm_cmpngt_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800291{
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700292 __m128d __c = __builtin_ia32_cmpnltsd(__b, __a);
Stephen Hines996e4dc2013-08-13 01:04:14 -0700293 return (__m128d) { __c[0], __a[1] };
Ying Wanga6720142011-12-20 14:43:20 -0800294}
295
296static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700297_mm_cmpnge_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800298{
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700299 __m128d __c = __builtin_ia32_cmpnlesd(__b, __a);
Stephen Hines996e4dc2013-08-13 01:04:14 -0700300 return (__m128d) { __c[0], __a[1] };
Ying Wanga6720142011-12-20 14:43:20 -0800301}
302
303static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700304_mm_comieq_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800305{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700306 return __builtin_ia32_comisdeq(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800307}
308
309static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700310_mm_comilt_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800311{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700312 return __builtin_ia32_comisdlt(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800313}
314
315static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700316_mm_comile_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800317{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700318 return __builtin_ia32_comisdle(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800319}
320
321static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700322_mm_comigt_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800323{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700324 return __builtin_ia32_comisdgt(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800325}
326
327static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700328_mm_comige_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800329{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700330 return __builtin_ia32_comisdge(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800331}
332
333static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700334_mm_comineq_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800335{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700336 return __builtin_ia32_comisdneq(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800337}
338
339static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700340_mm_ucomieq_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800341{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700342 return __builtin_ia32_ucomisdeq(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800343}
344
345static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700346_mm_ucomilt_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800347{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700348 return __builtin_ia32_ucomisdlt(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800349}
350
351static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700352_mm_ucomile_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800353{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700354 return __builtin_ia32_ucomisdle(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800355}
356
357static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700358_mm_ucomigt_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800359{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700360 return __builtin_ia32_ucomisdgt(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800361}
362
363static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700364_mm_ucomige_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800365{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700366 return __builtin_ia32_ucomisdge(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800367}
368
369static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700370_mm_ucomineq_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800371{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700372 return __builtin_ia32_ucomisdneq(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800373}
374
375static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700376_mm_cvtpd_ps(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800377{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700378 return __builtin_ia32_cvtpd2ps(__a);
Ying Wanga6720142011-12-20 14:43:20 -0800379}
380
381static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700382_mm_cvtps_pd(__m128 __a)
Ying Wanga6720142011-12-20 14:43:20 -0800383{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700384 return __builtin_ia32_cvtps2pd(__a);
Ying Wanga6720142011-12-20 14:43:20 -0800385}
386
387static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700388_mm_cvtepi32_pd(__m128i __a)
Ying Wanga6720142011-12-20 14:43:20 -0800389{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700390 return __builtin_ia32_cvtdq2pd((__v4si)__a);
Ying Wanga6720142011-12-20 14:43:20 -0800391}
392
393static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700394_mm_cvtpd_epi32(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800395{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700396 return __builtin_ia32_cvtpd2dq(__a);
Ying Wanga6720142011-12-20 14:43:20 -0800397}
398
399static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700400_mm_cvtsd_si32(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800401{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700402 return __builtin_ia32_cvtsd2si(__a);
Ying Wanga6720142011-12-20 14:43:20 -0800403}
404
405static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700406_mm_cvtsd_ss(__m128 __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800407{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700408 __a[0] = __b[0];
409 return __a;
Ying Wanga6720142011-12-20 14:43:20 -0800410}
411
412static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700413_mm_cvtsi32_sd(__m128d __a, int __b)
Ying Wanga6720142011-12-20 14:43:20 -0800414{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700415 __a[0] = __b;
416 return __a;
Ying Wanga6720142011-12-20 14:43:20 -0800417}
418
419static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700420_mm_cvtss_sd(__m128d __a, __m128 __b)
Ying Wanga6720142011-12-20 14:43:20 -0800421{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700422 __a[0] = __b[0];
423 return __a;
Ying Wanga6720142011-12-20 14:43:20 -0800424}
425
426static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700427_mm_cvttpd_epi32(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800428{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700429 return (__m128i)__builtin_ia32_cvttpd2dq(__a);
Ying Wanga6720142011-12-20 14:43:20 -0800430}
431
432static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700433_mm_cvttsd_si32(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800434{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700435 return __a[0];
Ying Wanga6720142011-12-20 14:43:20 -0800436}
437
438static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700439_mm_cvtpd_pi32(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800440{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700441 return (__m64)__builtin_ia32_cvtpd2pi(__a);
Ying Wanga6720142011-12-20 14:43:20 -0800442}
443
444static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700445_mm_cvttpd_pi32(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800446{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700447 return (__m64)__builtin_ia32_cvttpd2pi(__a);
Ying Wanga6720142011-12-20 14:43:20 -0800448}
449
450static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700451_mm_cvtpi32_pd(__m64 __a)
Ying Wanga6720142011-12-20 14:43:20 -0800452{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700453 return __builtin_ia32_cvtpi2pd((__v2si)__a);
Ying Wanga6720142011-12-20 14:43:20 -0800454}
455
456static __inline__ double __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700457_mm_cvtsd_f64(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800458{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700459 return __a[0];
Ying Wanga6720142011-12-20 14:43:20 -0800460}
461
462static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700463_mm_load_pd(double const *__dp)
Ying Wanga6720142011-12-20 14:43:20 -0800464{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700465 return *(__m128d*)__dp;
Ying Wanga6720142011-12-20 14:43:20 -0800466}
467
468static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700469_mm_load1_pd(double const *__dp)
Ying Wanga6720142011-12-20 14:43:20 -0800470{
471 struct __mm_load1_pd_struct {
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700472 double __u;
Ying Wanga6720142011-12-20 14:43:20 -0800473 } __attribute__((__packed__, __may_alias__));
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700474 double __u = ((struct __mm_load1_pd_struct*)__dp)->__u;
475 return (__m128d){ __u, __u };
Ying Wanga6720142011-12-20 14:43:20 -0800476}
477
478#define _mm_load_pd1(dp) _mm_load1_pd(dp)
479
480static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700481_mm_loadr_pd(double const *__dp)
Ying Wanga6720142011-12-20 14:43:20 -0800482{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700483 __m128d __u = *(__m128d*)__dp;
484 return __builtin_shufflevector(__u, __u, 1, 0);
Ying Wanga6720142011-12-20 14:43:20 -0800485}
486
487static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700488_mm_loadu_pd(double const *__dp)
Ying Wanga6720142011-12-20 14:43:20 -0800489{
490 struct __loadu_pd {
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700491 __m128d __v;
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700492 } __attribute__((__packed__, __may_alias__));
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700493 return ((struct __loadu_pd*)__dp)->__v;
Ying Wanga6720142011-12-20 14:43:20 -0800494}
495
496static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700497_mm_load_sd(double const *__dp)
Ying Wanga6720142011-12-20 14:43:20 -0800498{
499 struct __mm_load_sd_struct {
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700500 double __u;
Ying Wanga6720142011-12-20 14:43:20 -0800501 } __attribute__((__packed__, __may_alias__));
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700502 double __u = ((struct __mm_load_sd_struct*)__dp)->__u;
503 return (__m128d){ __u, 0 };
Ying Wanga6720142011-12-20 14:43:20 -0800504}
505
506static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700507_mm_loadh_pd(__m128d __a, double const *__dp)
Ying Wanga6720142011-12-20 14:43:20 -0800508{
509 struct __mm_loadh_pd_struct {
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700510 double __u;
Ying Wanga6720142011-12-20 14:43:20 -0800511 } __attribute__((__packed__, __may_alias__));
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700512 double __u = ((struct __mm_loadh_pd_struct*)__dp)->__u;
513 return (__m128d){ __a[0], __u };
Ying Wanga6720142011-12-20 14:43:20 -0800514}
515
516static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700517_mm_loadl_pd(__m128d __a, double const *__dp)
Ying Wanga6720142011-12-20 14:43:20 -0800518{
519 struct __mm_loadl_pd_struct {
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700520 double __u;
Ying Wanga6720142011-12-20 14:43:20 -0800521 } __attribute__((__packed__, __may_alias__));
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700522 double __u = ((struct __mm_loadl_pd_struct*)__dp)->__u;
523 return (__m128d){ __u, __a[1] };
Ying Wanga6720142011-12-20 14:43:20 -0800524}
525
526static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700527_mm_set_sd(double __w)
Ying Wanga6720142011-12-20 14:43:20 -0800528{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700529 return (__m128d){ __w, 0 };
Ying Wanga6720142011-12-20 14:43:20 -0800530}
531
532static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700533_mm_set1_pd(double __w)
Ying Wanga6720142011-12-20 14:43:20 -0800534{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700535 return (__m128d){ __w, __w };
Ying Wanga6720142011-12-20 14:43:20 -0800536}
537
538static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700539_mm_set_pd(double __w, double __x)
Ying Wanga6720142011-12-20 14:43:20 -0800540{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700541 return (__m128d){ __x, __w };
Ying Wanga6720142011-12-20 14:43:20 -0800542}
543
544static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700545_mm_setr_pd(double __w, double __x)
Ying Wanga6720142011-12-20 14:43:20 -0800546{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700547 return (__m128d){ __w, __x };
Ying Wanga6720142011-12-20 14:43:20 -0800548}
549
550static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
551_mm_setzero_pd(void)
552{
553 return (__m128d){ 0, 0 };
554}
555
556static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700557_mm_move_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800558{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700559 return (__m128d){ __b[0], __a[1] };
Ying Wanga6720142011-12-20 14:43:20 -0800560}
561
562static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700563_mm_store_sd(double *__dp, __m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800564{
565 struct __mm_store_sd_struct {
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700566 double __u;
Ying Wanga6720142011-12-20 14:43:20 -0800567 } __attribute__((__packed__, __may_alias__));
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700568 ((struct __mm_store_sd_struct*)__dp)->__u = __a[0];
Ying Wanga6720142011-12-20 14:43:20 -0800569}
570
571static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700572_mm_store1_pd(double *__dp, __m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800573{
574 struct __mm_store1_pd_struct {
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700575 double __u[2];
Ying Wanga6720142011-12-20 14:43:20 -0800576 } __attribute__((__packed__, __may_alias__));
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700577 ((struct __mm_store1_pd_struct*)__dp)->__u[0] = __a[0];
578 ((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0];
Ying Wanga6720142011-12-20 14:43:20 -0800579}
580
581static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700582_mm_store_pd(double *__dp, __m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800583{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700584 *(__m128d *)__dp = __a;
Ying Wanga6720142011-12-20 14:43:20 -0800585}
586
587static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700588_mm_storeu_pd(double *__dp, __m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800589{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700590 __builtin_ia32_storeupd(__dp, __a);
Ying Wanga6720142011-12-20 14:43:20 -0800591}
592
593static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700594_mm_storer_pd(double *__dp, __m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800595{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700596 __a = __builtin_shufflevector(__a, __a, 1, 0);
597 *(__m128d *)__dp = __a;
Ying Wanga6720142011-12-20 14:43:20 -0800598}
599
600static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700601_mm_storeh_pd(double *__dp, __m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800602{
603 struct __mm_storeh_pd_struct {
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700604 double __u;
Ying Wanga6720142011-12-20 14:43:20 -0800605 } __attribute__((__packed__, __may_alias__));
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700606 ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1];
Ying Wanga6720142011-12-20 14:43:20 -0800607}
608
609static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700610_mm_storel_pd(double *__dp, __m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800611{
612 struct __mm_storeh_pd_struct {
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700613 double __u;
Ying Wanga6720142011-12-20 14:43:20 -0800614 } __attribute__((__packed__, __may_alias__));
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700615 ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0];
Ying Wanga6720142011-12-20 14:43:20 -0800616}
617
618static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700619_mm_add_epi8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800620{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700621 return (__m128i)((__v16qi)__a + (__v16qi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800622}
623
624static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700625_mm_add_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800626{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700627 return (__m128i)((__v8hi)__a + (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800628}
629
630static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700631_mm_add_epi32(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800632{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700633 return (__m128i)((__v4si)__a + (__v4si)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800634}
635
636static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700637_mm_add_si64(__m64 __a, __m64 __b)
Ying Wanga6720142011-12-20 14:43:20 -0800638{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700639 return __a + __b;
Ying Wanga6720142011-12-20 14:43:20 -0800640}
641
642static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700643_mm_add_epi64(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800644{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700645 return __a + __b;
Ying Wanga6720142011-12-20 14:43:20 -0800646}
647
648static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700649_mm_adds_epi8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800650{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700651 return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800652}
653
654static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700655_mm_adds_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800656{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700657 return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800658}
659
660static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700661_mm_adds_epu8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800662{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700663 return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800664}
665
666static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700667_mm_adds_epu16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800668{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700669 return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800670}
671
672static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700673_mm_avg_epu8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800674{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700675 return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800676}
677
678static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700679_mm_avg_epu16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800680{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700681 return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800682}
683
684static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700685_mm_madd_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800686{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700687 return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800688}
689
690static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700691_mm_max_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800692{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700693 return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800694}
695
696static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700697_mm_max_epu8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800698{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700699 return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800700}
701
702static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700703_mm_min_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800704{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700705 return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800706}
707
708static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700709_mm_min_epu8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800710{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700711 return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800712}
713
714static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700715_mm_mulhi_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800716{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700717 return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800718}
719
720static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700721_mm_mulhi_epu16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800722{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700723 return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800724}
725
726static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700727_mm_mullo_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800728{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700729 return (__m128i)((__v8hi)__a * (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800730}
731
732static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700733_mm_mul_su32(__m64 __a, __m64 __b)
Ying Wanga6720142011-12-20 14:43:20 -0800734{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700735 return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800736}
737
738static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700739_mm_mul_epu32(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800740{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700741 return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800742}
743
744static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700745_mm_sad_epu8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800746{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700747 return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800748}
749
750static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700751_mm_sub_epi8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800752{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700753 return (__m128i)((__v16qi)__a - (__v16qi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800754}
755
756static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700757_mm_sub_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800758{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700759 return (__m128i)((__v8hi)__a - (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800760}
761
762static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700763_mm_sub_epi32(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800764{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700765 return (__m128i)((__v4si)__a - (__v4si)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800766}
767
768static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700769_mm_sub_si64(__m64 __a, __m64 __b)
Ying Wanga6720142011-12-20 14:43:20 -0800770{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700771 return __a - __b;
Ying Wanga6720142011-12-20 14:43:20 -0800772}
773
774static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700775_mm_sub_epi64(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800776{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700777 return __a - __b;
Ying Wanga6720142011-12-20 14:43:20 -0800778}
779
780static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700781_mm_subs_epi8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800782{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700783 return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800784}
785
786static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700787_mm_subs_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800788{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700789 return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800790}
791
792static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700793_mm_subs_epu8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800794{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700795 return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800796}
797
798static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700799_mm_subs_epu16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800800{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700801 return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800802}
803
804static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700805_mm_and_si128(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800806{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700807 return __a & __b;
Ying Wanga6720142011-12-20 14:43:20 -0800808}
809
810static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700811_mm_andnot_si128(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800812{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700813 return ~__a & __b;
Ying Wanga6720142011-12-20 14:43:20 -0800814}
815
816static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700817_mm_or_si128(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800818{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700819 return __a | __b;
Ying Wanga6720142011-12-20 14:43:20 -0800820}
821
822static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700823_mm_xor_si128(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800824{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700825 return __a ^ __b;
Ying Wanga6720142011-12-20 14:43:20 -0800826}
827
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700828#define _mm_slli_si128(a, imm) __extension__ ({ \
829 (__m128i)__builtin_shufflevector((__v16qi)_mm_setzero_si128(), \
830 (__v16qi)(__m128i)(a), \
831 ((imm)&0xF0) ? 0 : 16 - ((imm)&0xF), \
832 ((imm)&0xF0) ? 0 : 17 - ((imm)&0xF), \
833 ((imm)&0xF0) ? 0 : 18 - ((imm)&0xF), \
834 ((imm)&0xF0) ? 0 : 19 - ((imm)&0xF), \
835 ((imm)&0xF0) ? 0 : 20 - ((imm)&0xF), \
836 ((imm)&0xF0) ? 0 : 21 - ((imm)&0xF), \
837 ((imm)&0xF0) ? 0 : 22 - ((imm)&0xF), \
838 ((imm)&0xF0) ? 0 : 23 - ((imm)&0xF), \
839 ((imm)&0xF0) ? 0 : 24 - ((imm)&0xF), \
840 ((imm)&0xF0) ? 0 : 25 - ((imm)&0xF), \
841 ((imm)&0xF0) ? 0 : 26 - ((imm)&0xF), \
842 ((imm)&0xF0) ? 0 : 27 - ((imm)&0xF), \
843 ((imm)&0xF0) ? 0 : 28 - ((imm)&0xF), \
844 ((imm)&0xF0) ? 0 : 29 - ((imm)&0xF), \
845 ((imm)&0xF0) ? 0 : 30 - ((imm)&0xF), \
846 ((imm)&0xF0) ? 0 : 31 - ((imm)&0xF)); })
847
848#define _mm_bslli_si128(a, imm) \
849 _mm_slli_si128((a), (imm))
Ying Wanga6720142011-12-20 14:43:20 -0800850
851static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700852_mm_slli_epi16(__m128i __a, int __count)
Ying Wanga6720142011-12-20 14:43:20 -0800853{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700854 return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);
Ying Wanga6720142011-12-20 14:43:20 -0800855}
856
857static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700858_mm_sll_epi16(__m128i __a, __m128i __count)
Ying Wanga6720142011-12-20 14:43:20 -0800859{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700860 return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);
Ying Wanga6720142011-12-20 14:43:20 -0800861}
862
863static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700864_mm_slli_epi32(__m128i __a, int __count)
Ying Wanga6720142011-12-20 14:43:20 -0800865{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700866 return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);
Ying Wanga6720142011-12-20 14:43:20 -0800867}
868
869static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700870_mm_sll_epi32(__m128i __a, __m128i __count)
Ying Wanga6720142011-12-20 14:43:20 -0800871{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700872 return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);
Ying Wanga6720142011-12-20 14:43:20 -0800873}
874
875static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700876_mm_slli_epi64(__m128i __a, int __count)
Ying Wanga6720142011-12-20 14:43:20 -0800877{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700878 return __builtin_ia32_psllqi128(__a, __count);
Ying Wanga6720142011-12-20 14:43:20 -0800879}
880
881static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700882_mm_sll_epi64(__m128i __a, __m128i __count)
Ying Wanga6720142011-12-20 14:43:20 -0800883{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700884 return __builtin_ia32_psllq128(__a, __count);
Ying Wanga6720142011-12-20 14:43:20 -0800885}
886
887static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700888_mm_srai_epi16(__m128i __a, int __count)
Ying Wanga6720142011-12-20 14:43:20 -0800889{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700890 return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);
Ying Wanga6720142011-12-20 14:43:20 -0800891}
892
893static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700894_mm_sra_epi16(__m128i __a, __m128i __count)
Ying Wanga6720142011-12-20 14:43:20 -0800895{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700896 return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);
Ying Wanga6720142011-12-20 14:43:20 -0800897}
898
899static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700900_mm_srai_epi32(__m128i __a, int __count)
Ying Wanga6720142011-12-20 14:43:20 -0800901{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700902 return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);
Ying Wanga6720142011-12-20 14:43:20 -0800903}
904
905static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700906_mm_sra_epi32(__m128i __a, __m128i __count)
Ying Wanga6720142011-12-20 14:43:20 -0800907{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700908 return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);
Ying Wanga6720142011-12-20 14:43:20 -0800909}
910
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700911#define _mm_srli_si128(a, imm) __extension__ ({ \
912 (__m128i)__builtin_shufflevector((__v16qi)(__m128i)(a), \
913 (__v16qi)_mm_setzero_si128(), \
914 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 0, \
915 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 1, \
916 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 2, \
917 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 3, \
918 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 4, \
919 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 5, \
920 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 6, \
921 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 7, \
922 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 8, \
923 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 9, \
924 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 10, \
925 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 11, \
926 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 12, \
927 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 13, \
928 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 14, \
929 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 15); })
Ying Wanga6720142011-12-20 14:43:20 -0800930
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -0700931#define _mm_bsrli_si128(a, imm) \
932 _mm_srli_si128((a), (imm))
Ying Wanga6720142011-12-20 14:43:20 -0800933
934static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700935_mm_srli_epi16(__m128i __a, int __count)
Ying Wanga6720142011-12-20 14:43:20 -0800936{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700937 return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);
Ying Wanga6720142011-12-20 14:43:20 -0800938}
939
940static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700941_mm_srl_epi16(__m128i __a, __m128i __count)
Ying Wanga6720142011-12-20 14:43:20 -0800942{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700943 return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);
Ying Wanga6720142011-12-20 14:43:20 -0800944}
945
946static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700947_mm_srli_epi32(__m128i __a, int __count)
Ying Wanga6720142011-12-20 14:43:20 -0800948{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700949 return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);
Ying Wanga6720142011-12-20 14:43:20 -0800950}
951
952static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700953_mm_srl_epi32(__m128i __a, __m128i __count)
Ying Wanga6720142011-12-20 14:43:20 -0800954{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700955 return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);
Ying Wanga6720142011-12-20 14:43:20 -0800956}
957
958static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700959_mm_srli_epi64(__m128i __a, int __count)
Ying Wanga6720142011-12-20 14:43:20 -0800960{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700961 return __builtin_ia32_psrlqi128(__a, __count);
Ying Wanga6720142011-12-20 14:43:20 -0800962}
963
964static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700965_mm_srl_epi64(__m128i __a, __m128i __count)
Ying Wanga6720142011-12-20 14:43:20 -0800966{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700967 return __builtin_ia32_psrlq128(__a, __count);
Ying Wanga6720142011-12-20 14:43:20 -0800968}
969
970static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700971_mm_cmpeq_epi8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800972{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700973 return (__m128i)((__v16qi)__a == (__v16qi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800974}
975
976static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700977_mm_cmpeq_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800978{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700979 return (__m128i)((__v8hi)__a == (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800980}
981
982static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700983_mm_cmpeq_epi32(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800984{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700985 return (__m128i)((__v4si)__a == (__v4si)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800986}
987
988static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700989_mm_cmpgt_epi8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800990{
Ying Wang60999142013-01-07 13:59:36 -0800991 /* This function always performs a signed comparison, but __v16qi is a char
992 which may be signed or unsigned. */
993 typedef signed char __v16qs __attribute__((__vector_size__(16)));
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700994 return (__m128i)((__v16qs)__a > (__v16qs)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800995}
996
997static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700998_mm_cmpgt_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800999{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001000 return (__m128i)((__v8hi)__a > (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -08001001}
1002
1003static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001004_mm_cmpgt_epi32(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001005{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001006 return (__m128i)((__v4si)__a > (__v4si)__b);
Ying Wanga6720142011-12-20 14:43:20 -08001007}
1008
1009static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001010_mm_cmplt_epi8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001011{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001012 return _mm_cmpgt_epi8(__b, __a);
Ying Wanga6720142011-12-20 14:43:20 -08001013}
1014
1015static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001016_mm_cmplt_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001017{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001018 return _mm_cmpgt_epi16(__b, __a);
Ying Wanga6720142011-12-20 14:43:20 -08001019}
1020
1021static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001022_mm_cmplt_epi32(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001023{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001024 return _mm_cmpgt_epi32(__b, __a);
Ying Wanga6720142011-12-20 14:43:20 -08001025}
1026
1027#ifdef __x86_64__
1028static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001029_mm_cvtsi64_sd(__m128d __a, long long __b)
Ying Wanga6720142011-12-20 14:43:20 -08001030{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001031 __a[0] = __b;
1032 return __a;
Ying Wanga6720142011-12-20 14:43:20 -08001033}
1034
1035static __inline__ long long __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001036_mm_cvtsd_si64(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -08001037{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001038 return __builtin_ia32_cvtsd2si64(__a);
Ying Wanga6720142011-12-20 14:43:20 -08001039}
1040
1041static __inline__ long long __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001042_mm_cvttsd_si64(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -08001043{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001044 return __a[0];
Ying Wanga6720142011-12-20 14:43:20 -08001045}
1046#endif
1047
1048static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001049_mm_cvtepi32_ps(__m128i __a)
Ying Wanga6720142011-12-20 14:43:20 -08001050{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001051 return __builtin_ia32_cvtdq2ps((__v4si)__a);
Ying Wanga6720142011-12-20 14:43:20 -08001052}
1053
1054static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001055_mm_cvtps_epi32(__m128 __a)
Ying Wanga6720142011-12-20 14:43:20 -08001056{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001057 return (__m128i)__builtin_ia32_cvtps2dq(__a);
Ying Wanga6720142011-12-20 14:43:20 -08001058}
1059
1060static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001061_mm_cvttps_epi32(__m128 __a)
Ying Wanga6720142011-12-20 14:43:20 -08001062{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001063 return (__m128i)__builtin_ia32_cvttps2dq(__a);
Ying Wanga6720142011-12-20 14:43:20 -08001064}
1065
1066static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001067_mm_cvtsi32_si128(int __a)
Ying Wanga6720142011-12-20 14:43:20 -08001068{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001069 return (__m128i)(__v4si){ __a, 0, 0, 0 };
Ying Wanga6720142011-12-20 14:43:20 -08001070}
1071
1072#ifdef __x86_64__
1073static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001074_mm_cvtsi64_si128(long long __a)
Ying Wanga6720142011-12-20 14:43:20 -08001075{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001076 return (__m128i){ __a, 0 };
Ying Wanga6720142011-12-20 14:43:20 -08001077}
1078#endif
1079
1080static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001081_mm_cvtsi128_si32(__m128i __a)
Ying Wanga6720142011-12-20 14:43:20 -08001082{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001083 __v4si __b = (__v4si)__a;
1084 return __b[0];
Ying Wanga6720142011-12-20 14:43:20 -08001085}
1086
1087#ifdef __x86_64__
1088static __inline__ long long __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001089_mm_cvtsi128_si64(__m128i __a)
Ying Wanga6720142011-12-20 14:43:20 -08001090{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001091 return __a[0];
Ying Wanga6720142011-12-20 14:43:20 -08001092}
1093#endif
1094
1095static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001096_mm_load_si128(__m128i const *__p)
Ying Wanga6720142011-12-20 14:43:20 -08001097{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001098 return *__p;
Ying Wanga6720142011-12-20 14:43:20 -08001099}
1100
1101static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001102_mm_loadu_si128(__m128i const *__p)
Ying Wanga6720142011-12-20 14:43:20 -08001103{
1104 struct __loadu_si128 {
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001105 __m128i __v;
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -07001106 } __attribute__((__packed__, __may_alias__));
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001107 return ((struct __loadu_si128*)__p)->__v;
Ying Wanga6720142011-12-20 14:43:20 -08001108}
1109
1110static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001111_mm_loadl_epi64(__m128i const *__p)
Ying Wanga6720142011-12-20 14:43:20 -08001112{
1113 struct __mm_loadl_epi64_struct {
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001114 long long __u;
Ying Wanga6720142011-12-20 14:43:20 -08001115 } __attribute__((__packed__, __may_alias__));
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001116 return (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0};
Ying Wanga6720142011-12-20 14:43:20 -08001117}
1118
1119static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1120_mm_set_epi64x(long long q1, long long q0)
1121{
1122 return (__m128i){ q0, q1 };
1123}
1124
1125static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1126_mm_set_epi64(__m64 q1, __m64 q0)
1127{
1128 return (__m128i){ (long long)q0, (long long)q1 };
1129}
1130
1131static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1132_mm_set_epi32(int i3, int i2, int i1, int i0)
1133{
1134 return (__m128i)(__v4si){ i0, i1, i2, i3};
1135}
1136
1137static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1138_mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0)
1139{
1140 return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
1141}
1142
1143static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1144_mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
1145{
1146 return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
1147}
1148
1149static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001150_mm_set1_epi64x(long long __q)
Ying Wanga6720142011-12-20 14:43:20 -08001151{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001152 return (__m128i){ __q, __q };
Ying Wanga6720142011-12-20 14:43:20 -08001153}
1154
1155static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001156_mm_set1_epi64(__m64 __q)
Ying Wanga6720142011-12-20 14:43:20 -08001157{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001158 return (__m128i){ (long long)__q, (long long)__q };
Ying Wanga6720142011-12-20 14:43:20 -08001159}
1160
1161static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001162_mm_set1_epi32(int __i)
Ying Wanga6720142011-12-20 14:43:20 -08001163{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001164 return (__m128i)(__v4si){ __i, __i, __i, __i };
Ying Wanga6720142011-12-20 14:43:20 -08001165}
1166
1167static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001168_mm_set1_epi16(short __w)
Ying Wanga6720142011-12-20 14:43:20 -08001169{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001170 return (__m128i)(__v8hi){ __w, __w, __w, __w, __w, __w, __w, __w };
Ying Wanga6720142011-12-20 14:43:20 -08001171}
1172
1173static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001174_mm_set1_epi8(char __b)
Ying Wanga6720142011-12-20 14:43:20 -08001175{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001176 return (__m128i)(__v16qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b };
Ying Wanga6720142011-12-20 14:43:20 -08001177}
1178
1179static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1180_mm_setr_epi64(__m64 q0, __m64 q1)
1181{
1182 return (__m128i){ (long long)q0, (long long)q1 };
1183}
1184
1185static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1186_mm_setr_epi32(int i0, int i1, int i2, int i3)
1187{
1188 return (__m128i)(__v4si){ i0, i1, i2, i3};
1189}
1190
1191static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1192_mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7)
1193{
1194 return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
1195}
1196
1197static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1198_mm_setr_epi8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15)
1199{
1200 return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
1201}
1202
1203static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1204_mm_setzero_si128(void)
1205{
1206 return (__m128i){ 0LL, 0LL };
1207}
1208
1209static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001210_mm_store_si128(__m128i *__p, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001211{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001212 *__p = __b;
Ying Wanga6720142011-12-20 14:43:20 -08001213}
1214
1215static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001216_mm_storeu_si128(__m128i *__p, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001217{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001218 __builtin_ia32_storedqu((char *)__p, (__v16qi)__b);
Ying Wanga6720142011-12-20 14:43:20 -08001219}
1220
1221static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001222_mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)
Ying Wanga6720142011-12-20 14:43:20 -08001223{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001224 __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p);
Ying Wanga6720142011-12-20 14:43:20 -08001225}
1226
1227static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001228_mm_storel_epi64(__m128i *__p, __m128i __a)
Ying Wanga6720142011-12-20 14:43:20 -08001229{
Ying Wang60999142013-01-07 13:59:36 -08001230 struct __mm_storel_epi64_struct {
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001231 long long __u;
Ying Wang60999142013-01-07 13:59:36 -08001232 } __attribute__((__packed__, __may_alias__));
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001233 ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0];
Ying Wanga6720142011-12-20 14:43:20 -08001234}
1235
1236static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001237_mm_stream_pd(double *__p, __m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -08001238{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001239 __builtin_ia32_movntpd(__p, __a);
Ying Wanga6720142011-12-20 14:43:20 -08001240}
1241
1242static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001243_mm_stream_si128(__m128i *__p, __m128i __a)
Ying Wanga6720142011-12-20 14:43:20 -08001244{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001245 __builtin_ia32_movntdq(__p, __a);
Ying Wanga6720142011-12-20 14:43:20 -08001246}
1247
1248static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001249_mm_stream_si32(int *__p, int __a)
Ying Wanga6720142011-12-20 14:43:20 -08001250{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001251 __builtin_ia32_movnti(__p, __a);
Ying Wanga6720142011-12-20 14:43:20 -08001252}
1253
Stephen Hines73a76582014-02-13 02:55:05 -08001254#ifdef __x86_64__
1255static __inline__ void __attribute__((__always_inline__, __nodebug__))
1256_mm_stream_si64(long long *__p, long long __a)
1257{
1258 __builtin_ia32_movnti64(__p, __a);
1259}
1260#endif
1261
Ying Wanga6720142011-12-20 14:43:20 -08001262static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001263_mm_clflush(void const *__p)
Ying Wanga6720142011-12-20 14:43:20 -08001264{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001265 __builtin_ia32_clflush(__p);
Ying Wanga6720142011-12-20 14:43:20 -08001266}
1267
1268static __inline__ void __attribute__((__always_inline__, __nodebug__))
1269_mm_lfence(void)
1270{
1271 __builtin_ia32_lfence();
1272}
1273
1274static __inline__ void __attribute__((__always_inline__, __nodebug__))
1275_mm_mfence(void)
1276{
1277 __builtin_ia32_mfence();
1278}
1279
1280static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001281_mm_packs_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001282{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001283 return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -08001284}
1285
1286static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001287_mm_packs_epi32(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001288{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001289 return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);
Ying Wanga6720142011-12-20 14:43:20 -08001290}
1291
1292static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001293_mm_packus_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001294{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001295 return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -08001296}
1297
1298static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001299_mm_extract_epi16(__m128i __a, int __imm)
Ying Wanga6720142011-12-20 14:43:20 -08001300{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001301 __v8hi __b = (__v8hi)__a;
Stephen Hines73a76582014-02-13 02:55:05 -08001302 return (unsigned short)__b[__imm & 7];
Ying Wanga6720142011-12-20 14:43:20 -08001303}
1304
1305static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001306_mm_insert_epi16(__m128i __a, int __b, int __imm)
Ying Wanga6720142011-12-20 14:43:20 -08001307{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001308 __v8hi __c = (__v8hi)__a;
1309 __c[__imm & 7] = __b;
1310 return (__m128i)__c;
Ying Wanga6720142011-12-20 14:43:20 -08001311}
1312
1313static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001314_mm_movemask_epi8(__m128i __a)
Ying Wanga6720142011-12-20 14:43:20 -08001315{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001316 return __builtin_ia32_pmovmskb128((__v16qi)__a);
Ying Wanga6720142011-12-20 14:43:20 -08001317}
1318
Ying Wang60999142013-01-07 13:59:36 -08001319#define _mm_shuffle_epi32(a, imm) __extension__ ({ \
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -07001320 (__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), \
1321 (__v4si)_mm_set1_epi32(0), \
Ying Wang60999142013-01-07 13:59:36 -08001322 (imm) & 0x3, ((imm) & 0xc) >> 2, \
1323 ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); })
Ying Wanga6720142011-12-20 14:43:20 -08001324
Ying Wang60999142013-01-07 13:59:36 -08001325#define _mm_shufflelo_epi16(a, imm) __extension__ ({ \
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -07001326 (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
1327 (__v8hi)_mm_set1_epi16(0), \
Ying Wang60999142013-01-07 13:59:36 -08001328 (imm) & 0x3, ((imm) & 0xc) >> 2, \
1329 ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
1330 4, 5, 6, 7); })
Ying Wanga6720142011-12-20 14:43:20 -08001331
Ying Wang60999142013-01-07 13:59:36 -08001332#define _mm_shufflehi_epi16(a, imm) __extension__ ({ \
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -07001333 (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
1334 (__v8hi)_mm_set1_epi16(0), \
Ying Wang60999142013-01-07 13:59:36 -08001335 0, 1, 2, 3, \
1336 4 + (((imm) & 0x03) >> 0), \
1337 4 + (((imm) & 0x0c) >> 2), \
1338 4 + (((imm) & 0x30) >> 4), \
1339 4 + (((imm) & 0xc0) >> 6)); })
Ying Wanga6720142011-12-20 14:43:20 -08001340
1341static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001342_mm_unpackhi_epi8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001343{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001344 return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
Ying Wanga6720142011-12-20 14:43:20 -08001345}
1346
1347static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001348_mm_unpackhi_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001349{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001350 return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);
Ying Wanga6720142011-12-20 14:43:20 -08001351}
1352
1353static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001354_mm_unpackhi_epi32(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001355{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001356 return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3);
Ying Wanga6720142011-12-20 14:43:20 -08001357}
1358
1359static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001360_mm_unpackhi_epi64(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001361{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001362 return (__m128i)__builtin_shufflevector(__a, __b, 1, 2+1);
Ying Wanga6720142011-12-20 14:43:20 -08001363}
1364
1365static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001366_mm_unpacklo_epi8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001367{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001368 return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);
Ying Wanga6720142011-12-20 14:43:20 -08001369}
1370
1371static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001372_mm_unpacklo_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001373{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001374 return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);
Ying Wanga6720142011-12-20 14:43:20 -08001375}
1376
1377static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001378_mm_unpacklo_epi32(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001379{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001380 return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1);
Ying Wanga6720142011-12-20 14:43:20 -08001381}
1382
1383static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001384_mm_unpacklo_epi64(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001385{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001386 return (__m128i)__builtin_shufflevector(__a, __b, 0, 2+0);
Ying Wanga6720142011-12-20 14:43:20 -08001387}
1388
1389static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001390_mm_movepi64_pi64(__m128i __a)
Ying Wanga6720142011-12-20 14:43:20 -08001391{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001392 return (__m64)__a[0];
Ying Wanga6720142011-12-20 14:43:20 -08001393}
1394
1395static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hines73a76582014-02-13 02:55:05 -08001396_mm_movpi64_epi64(__m64 __a)
Ying Wanga6720142011-12-20 14:43:20 -08001397{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001398 return (__m128i){ (long long)__a, 0 };
Ying Wanga6720142011-12-20 14:43:20 -08001399}
1400
1401static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001402_mm_move_epi64(__m128i __a)
Ying Wanga6720142011-12-20 14:43:20 -08001403{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001404 return __builtin_shufflevector(__a, (__m128i){ 0 }, 0, 2);
Ying Wanga6720142011-12-20 14:43:20 -08001405}
1406
1407static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001408_mm_unpackhi_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -08001409{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001410 return __builtin_shufflevector(__a, __b, 1, 2+1);
Ying Wanga6720142011-12-20 14:43:20 -08001411}
1412
1413static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001414_mm_unpacklo_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -08001415{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001416 return __builtin_shufflevector(__a, __b, 0, 2+0);
Ying Wanga6720142011-12-20 14:43:20 -08001417}
1418
1419static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001420_mm_movemask_pd(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -08001421{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001422 return __builtin_ia32_movmskpd(__a);
Ying Wanga6720142011-12-20 14:43:20 -08001423}
1424
Ying Wang60999142013-01-07 13:59:36 -08001425#define _mm_shuffle_pd(a, b, i) __extension__ ({ \
Stephen Hinesb4d9c8b2015-03-30 16:04:04 -07001426 __builtin_shufflevector((__m128d)(a), (__m128d)(b), \
1427 (i) & 1, (((i) & 2) >> 1) + 2); })
Ying Wanga6720142011-12-20 14:43:20 -08001428
1429static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Stephen Hines996e4dc2013-08-13 01:04:14 -07001430_mm_castpd_ps(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -08001431{
Stephen Hines996e4dc2013-08-13 01:04:14 -07001432 return (__m128)__a;
Ying Wanga6720142011-12-20 14:43:20 -08001433}
1434
1435static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hines996e4dc2013-08-13 01:04:14 -07001436_mm_castpd_si128(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -08001437{
Stephen Hines996e4dc2013-08-13 01:04:14 -07001438 return (__m128i)__a;
Ying Wanga6720142011-12-20 14:43:20 -08001439}
1440
1441static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hines996e4dc2013-08-13 01:04:14 -07001442_mm_castps_pd(__m128 __a)
Ying Wanga6720142011-12-20 14:43:20 -08001443{
Stephen Hines996e4dc2013-08-13 01:04:14 -07001444 return (__m128d)__a;
Ying Wanga6720142011-12-20 14:43:20 -08001445}
1446
1447static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hines996e4dc2013-08-13 01:04:14 -07001448_mm_castps_si128(__m128 __a)
Ying Wanga6720142011-12-20 14:43:20 -08001449{
Stephen Hines996e4dc2013-08-13 01:04:14 -07001450 return (__m128i)__a;
Ying Wanga6720142011-12-20 14:43:20 -08001451}
1452
1453static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Stephen Hines996e4dc2013-08-13 01:04:14 -07001454_mm_castsi128_ps(__m128i __a)
Ying Wanga6720142011-12-20 14:43:20 -08001455{
Stephen Hines996e4dc2013-08-13 01:04:14 -07001456 return (__m128)__a;
Ying Wanga6720142011-12-20 14:43:20 -08001457}
1458
1459static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hines996e4dc2013-08-13 01:04:14 -07001460_mm_castsi128_pd(__m128i __a)
Ying Wanga6720142011-12-20 14:43:20 -08001461{
Stephen Hines996e4dc2013-08-13 01:04:14 -07001462 return (__m128d)__a;
Ying Wanga6720142011-12-20 14:43:20 -08001463}
1464
1465static __inline__ void __attribute__((__always_inline__, __nodebug__))
1466_mm_pause(void)
1467{
1468 __asm__ volatile ("pause");
1469}
1470
1471#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
1472
1473#endif /* __SSE2__ */
1474
1475#endif /* __EMMINTRIN_H */