blob: f965dce2d52ade4c7f992e87def2cd842f0d0e3a [file] [log] [blame]
Ying Wanga6720142011-12-20 14:43:20 -08001/*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __EMMINTRIN_H
25#define __EMMINTRIN_H
26
27#ifndef __SSE2__
28#error "SSE2 instruction set not enabled"
29#else
30
31#include <xmmintrin.h>
32
33typedef double __m128d __attribute__((__vector_size__(16)));
34typedef long long __m128i __attribute__((__vector_size__(16)));
35
36/* Type defines. */
37typedef double __v2df __attribute__ ((__vector_size__ (16)));
38typedef long long __v2di __attribute__ ((__vector_size__ (16)));
39typedef short __v8hi __attribute__((__vector_size__(16)));
40typedef char __v16qi __attribute__((__vector_size__(16)));
41
42static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070043_mm_add_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -080044{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070045 __a[0] += __b[0];
46 return __a;
Ying Wanga6720142011-12-20 14:43:20 -080047}
48
49static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070050_mm_add_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -080051{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070052 return __a + __b;
Ying Wanga6720142011-12-20 14:43:20 -080053}
54
55static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070056_mm_sub_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -080057{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070058 __a[0] -= __b[0];
59 return __a;
Ying Wanga6720142011-12-20 14:43:20 -080060}
61
62static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070063_mm_sub_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -080064{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070065 return __a - __b;
Ying Wanga6720142011-12-20 14:43:20 -080066}
67
68static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070069_mm_mul_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -080070{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070071 __a[0] *= __b[0];
72 return __a;
Ying Wanga6720142011-12-20 14:43:20 -080073}
74
75static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070076_mm_mul_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -080077{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070078 return __a * __b;
Ying Wanga6720142011-12-20 14:43:20 -080079}
80
81static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070082_mm_div_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -080083{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070084 __a[0] /= __b[0];
85 return __a;
Ying Wanga6720142011-12-20 14:43:20 -080086}
87
88static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070089_mm_div_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -080090{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070091 return __a / __b;
Ying Wanga6720142011-12-20 14:43:20 -080092}
93
94static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070095_mm_sqrt_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -080096{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -070097 __m128d __c = __builtin_ia32_sqrtsd(__b);
98 return (__m128d) { __c[0], __a[1] };
Ying Wanga6720142011-12-20 14:43:20 -080099}
100
101static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700102_mm_sqrt_pd(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800103{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700104 return __builtin_ia32_sqrtpd(__a);
Ying Wanga6720142011-12-20 14:43:20 -0800105}
106
107static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700108_mm_min_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800109{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700110 return __builtin_ia32_minsd(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800111}
112
113static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700114_mm_min_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800115{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700116 return __builtin_ia32_minpd(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800117}
118
119static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700120_mm_max_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800121{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700122 return __builtin_ia32_maxsd(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800123}
124
125static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700126_mm_max_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800127{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700128 return __builtin_ia32_maxpd(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800129}
130
131static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700132_mm_and_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800133{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700134 return (__m128d)((__v4si)__a & (__v4si)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800135}
136
137static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700138_mm_andnot_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800139{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700140 return (__m128d)(~(__v4si)__a & (__v4si)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800141}
142
143static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700144_mm_or_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800145{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700146 return (__m128d)((__v4si)__a | (__v4si)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800147}
148
149static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700150_mm_xor_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800151{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700152 return (__m128d)((__v4si)__a ^ (__v4si)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800153}
154
155static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700156_mm_cmpeq_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800157{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700158 return (__m128d)__builtin_ia32_cmppd(__a, __b, 0);
Ying Wanga6720142011-12-20 14:43:20 -0800159}
160
161static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700162_mm_cmplt_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800163{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700164 return (__m128d)__builtin_ia32_cmppd(__a, __b, 1);
Ying Wanga6720142011-12-20 14:43:20 -0800165}
166
167static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700168_mm_cmple_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800169{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700170 return (__m128d)__builtin_ia32_cmppd(__a, __b, 2);
Ying Wanga6720142011-12-20 14:43:20 -0800171}
172
173static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700174_mm_cmpgt_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800175{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700176 return (__m128d)__builtin_ia32_cmppd(__b, __a, 1);
Ying Wanga6720142011-12-20 14:43:20 -0800177}
178
179static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700180_mm_cmpge_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800181{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700182 return (__m128d)__builtin_ia32_cmppd(__b, __a, 2);
Ying Wanga6720142011-12-20 14:43:20 -0800183}
184
185static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700186_mm_cmpord_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800187{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700188 return (__m128d)__builtin_ia32_cmppd(__a, __b, 7);
Ying Wanga6720142011-12-20 14:43:20 -0800189}
190
191static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700192_mm_cmpunord_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800193{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700194 return (__m128d)__builtin_ia32_cmppd(__a, __b, 3);
Ying Wanga6720142011-12-20 14:43:20 -0800195}
196
197static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700198_mm_cmpneq_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800199{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700200 return (__m128d)__builtin_ia32_cmppd(__a, __b, 4);
Ying Wanga6720142011-12-20 14:43:20 -0800201}
202
203static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700204_mm_cmpnlt_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800205{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700206 return (__m128d)__builtin_ia32_cmppd(__a, __b, 5);
Ying Wanga6720142011-12-20 14:43:20 -0800207}
208
209static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700210_mm_cmpnle_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800211{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700212 return (__m128d)__builtin_ia32_cmppd(__a, __b, 6);
Ying Wanga6720142011-12-20 14:43:20 -0800213}
214
215static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700216_mm_cmpngt_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800217{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700218 return (__m128d)__builtin_ia32_cmppd(__b, __a, 5);
Ying Wanga6720142011-12-20 14:43:20 -0800219}
220
221static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700222_mm_cmpnge_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800223{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700224 return (__m128d)__builtin_ia32_cmppd(__b, __a, 6);
Ying Wanga6720142011-12-20 14:43:20 -0800225}
226
227static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700228_mm_cmpeq_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800229{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700230 return (__m128d)__builtin_ia32_cmpsd(__a, __b, 0);
Ying Wanga6720142011-12-20 14:43:20 -0800231}
232
233static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700234_mm_cmplt_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800235{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700236 return (__m128d)__builtin_ia32_cmpsd(__a, __b, 1);
Ying Wanga6720142011-12-20 14:43:20 -0800237}
238
239static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700240_mm_cmple_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800241{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700242 return (__m128d)__builtin_ia32_cmpsd(__a, __b, 2);
Ying Wanga6720142011-12-20 14:43:20 -0800243}
244
245static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700246_mm_cmpgt_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800247{
Stephen Hines996e4dc2013-08-13 01:04:14 -0700248 __m128d __c = __builtin_ia32_cmpsd(__b, __a, 1);
249 return (__m128d) { __c[0], __a[1] };
Ying Wanga6720142011-12-20 14:43:20 -0800250}
251
252static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700253_mm_cmpge_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800254{
Stephen Hines996e4dc2013-08-13 01:04:14 -0700255 __m128d __c = __builtin_ia32_cmpsd(__b, __a, 2);
256 return (__m128d) { __c[0], __a[1] };
Ying Wanga6720142011-12-20 14:43:20 -0800257}
258
259static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700260_mm_cmpord_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800261{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700262 return (__m128d)__builtin_ia32_cmpsd(__a, __b, 7);
Ying Wanga6720142011-12-20 14:43:20 -0800263}
264
265static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700266_mm_cmpunord_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800267{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700268 return (__m128d)__builtin_ia32_cmpsd(__a, __b, 3);
Ying Wanga6720142011-12-20 14:43:20 -0800269}
270
271static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700272_mm_cmpneq_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800273{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700274 return (__m128d)__builtin_ia32_cmpsd(__a, __b, 4);
Ying Wanga6720142011-12-20 14:43:20 -0800275}
276
277static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700278_mm_cmpnlt_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800279{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700280 return (__m128d)__builtin_ia32_cmpsd(__a, __b, 5);
Ying Wanga6720142011-12-20 14:43:20 -0800281}
282
283static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700284_mm_cmpnle_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800285{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700286 return (__m128d)__builtin_ia32_cmpsd(__a, __b, 6);
Ying Wanga6720142011-12-20 14:43:20 -0800287}
288
289static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700290_mm_cmpngt_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800291{
Stephen Hines996e4dc2013-08-13 01:04:14 -0700292 __m128d __c = __builtin_ia32_cmpsd(__b, __a, 5);
293 return (__m128d) { __c[0], __a[1] };
Ying Wanga6720142011-12-20 14:43:20 -0800294}
295
296static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700297_mm_cmpnge_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800298{
Stephen Hines996e4dc2013-08-13 01:04:14 -0700299 __m128d __c = __builtin_ia32_cmpsd(__b, __a, 6);
300 return (__m128d) { __c[0], __a[1] };
Ying Wanga6720142011-12-20 14:43:20 -0800301}
302
303static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700304_mm_comieq_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800305{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700306 return __builtin_ia32_comisdeq(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800307}
308
309static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700310_mm_comilt_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800311{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700312 return __builtin_ia32_comisdlt(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800313}
314
315static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700316_mm_comile_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800317{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700318 return __builtin_ia32_comisdle(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800319}
320
321static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700322_mm_comigt_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800323{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700324 return __builtin_ia32_comisdgt(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800325}
326
327static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700328_mm_comige_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800329{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700330 return __builtin_ia32_comisdge(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800331}
332
333static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700334_mm_comineq_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800335{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700336 return __builtin_ia32_comisdneq(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800337}
338
339static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700340_mm_ucomieq_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800341{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700342 return __builtin_ia32_ucomisdeq(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800343}
344
345static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700346_mm_ucomilt_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800347{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700348 return __builtin_ia32_ucomisdlt(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800349}
350
351static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700352_mm_ucomile_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800353{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700354 return __builtin_ia32_ucomisdle(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800355}
356
357static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700358_mm_ucomigt_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800359{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700360 return __builtin_ia32_ucomisdgt(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800361}
362
363static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700364_mm_ucomige_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800365{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700366 return __builtin_ia32_ucomisdge(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800367}
368
369static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700370_mm_ucomineq_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800371{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700372 return __builtin_ia32_ucomisdneq(__a, __b);
Ying Wanga6720142011-12-20 14:43:20 -0800373}
374
375static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700376_mm_cvtpd_ps(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800377{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700378 return __builtin_ia32_cvtpd2ps(__a);
Ying Wanga6720142011-12-20 14:43:20 -0800379}
380
381static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700382_mm_cvtps_pd(__m128 __a)
Ying Wanga6720142011-12-20 14:43:20 -0800383{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700384 return __builtin_ia32_cvtps2pd(__a);
Ying Wanga6720142011-12-20 14:43:20 -0800385}
386
387static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700388_mm_cvtepi32_pd(__m128i __a)
Ying Wanga6720142011-12-20 14:43:20 -0800389{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700390 return __builtin_ia32_cvtdq2pd((__v4si)__a);
Ying Wanga6720142011-12-20 14:43:20 -0800391}
392
393static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700394_mm_cvtpd_epi32(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800395{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700396 return __builtin_ia32_cvtpd2dq(__a);
Ying Wanga6720142011-12-20 14:43:20 -0800397}
398
399static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700400_mm_cvtsd_si32(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800401{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700402 return __builtin_ia32_cvtsd2si(__a);
Ying Wanga6720142011-12-20 14:43:20 -0800403}
404
405static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700406_mm_cvtsd_ss(__m128 __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800407{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700408 __a[0] = __b[0];
409 return __a;
Ying Wanga6720142011-12-20 14:43:20 -0800410}
411
412static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700413_mm_cvtsi32_sd(__m128d __a, int __b)
Ying Wanga6720142011-12-20 14:43:20 -0800414{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700415 __a[0] = __b;
416 return __a;
Ying Wanga6720142011-12-20 14:43:20 -0800417}
418
419static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700420_mm_cvtss_sd(__m128d __a, __m128 __b)
Ying Wanga6720142011-12-20 14:43:20 -0800421{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700422 __a[0] = __b[0];
423 return __a;
Ying Wanga6720142011-12-20 14:43:20 -0800424}
425
426static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700427_mm_cvttpd_epi32(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800428{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700429 return (__m128i)__builtin_ia32_cvttpd2dq(__a);
Ying Wanga6720142011-12-20 14:43:20 -0800430}
431
432static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700433_mm_cvttsd_si32(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800434{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700435 return __a[0];
Ying Wanga6720142011-12-20 14:43:20 -0800436}
437
438static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700439_mm_cvtpd_pi32(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800440{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700441 return (__m64)__builtin_ia32_cvtpd2pi(__a);
Ying Wanga6720142011-12-20 14:43:20 -0800442}
443
444static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700445_mm_cvttpd_pi32(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800446{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700447 return (__m64)__builtin_ia32_cvttpd2pi(__a);
Ying Wanga6720142011-12-20 14:43:20 -0800448}
449
450static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700451_mm_cvtpi32_pd(__m64 __a)
Ying Wanga6720142011-12-20 14:43:20 -0800452{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700453 return __builtin_ia32_cvtpi2pd((__v2si)__a);
Ying Wanga6720142011-12-20 14:43:20 -0800454}
455
456static __inline__ double __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700457_mm_cvtsd_f64(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800458{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700459 return __a[0];
Ying Wanga6720142011-12-20 14:43:20 -0800460}
461
462static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700463_mm_load_pd(double const *__dp)
Ying Wanga6720142011-12-20 14:43:20 -0800464{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700465 return *(__m128d*)__dp;
Ying Wanga6720142011-12-20 14:43:20 -0800466}
467
468static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700469_mm_load1_pd(double const *__dp)
Ying Wanga6720142011-12-20 14:43:20 -0800470{
471 struct __mm_load1_pd_struct {
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700472 double __u;
Ying Wanga6720142011-12-20 14:43:20 -0800473 } __attribute__((__packed__, __may_alias__));
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700474 double __u = ((struct __mm_load1_pd_struct*)__dp)->__u;
475 return (__m128d){ __u, __u };
Ying Wanga6720142011-12-20 14:43:20 -0800476}
477
478#define _mm_load_pd1(dp) _mm_load1_pd(dp)
479
480static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700481_mm_loadr_pd(double const *__dp)
Ying Wanga6720142011-12-20 14:43:20 -0800482{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700483 __m128d __u = *(__m128d*)__dp;
484 return __builtin_shufflevector(__u, __u, 1, 0);
Ying Wanga6720142011-12-20 14:43:20 -0800485}
486
487static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700488_mm_loadu_pd(double const *__dp)
Ying Wanga6720142011-12-20 14:43:20 -0800489{
490 struct __loadu_pd {
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700491 __m128d __v;
Ying Wanga6720142011-12-20 14:43:20 -0800492 } __attribute__((packed, may_alias));
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700493 return ((struct __loadu_pd*)__dp)->__v;
Ying Wanga6720142011-12-20 14:43:20 -0800494}
495
496static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700497_mm_load_sd(double const *__dp)
Ying Wanga6720142011-12-20 14:43:20 -0800498{
499 struct __mm_load_sd_struct {
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700500 double __u;
Ying Wanga6720142011-12-20 14:43:20 -0800501 } __attribute__((__packed__, __may_alias__));
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700502 double __u = ((struct __mm_load_sd_struct*)__dp)->__u;
503 return (__m128d){ __u, 0 };
Ying Wanga6720142011-12-20 14:43:20 -0800504}
505
506static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700507_mm_loadh_pd(__m128d __a, double const *__dp)
Ying Wanga6720142011-12-20 14:43:20 -0800508{
509 struct __mm_loadh_pd_struct {
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700510 double __u;
Ying Wanga6720142011-12-20 14:43:20 -0800511 } __attribute__((__packed__, __may_alias__));
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700512 double __u = ((struct __mm_loadh_pd_struct*)__dp)->__u;
513 return (__m128d){ __a[0], __u };
Ying Wanga6720142011-12-20 14:43:20 -0800514}
515
516static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700517_mm_loadl_pd(__m128d __a, double const *__dp)
Ying Wanga6720142011-12-20 14:43:20 -0800518{
519 struct __mm_loadl_pd_struct {
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700520 double __u;
Ying Wanga6720142011-12-20 14:43:20 -0800521 } __attribute__((__packed__, __may_alias__));
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700522 double __u = ((struct __mm_loadl_pd_struct*)__dp)->__u;
523 return (__m128d){ __u, __a[1] };
Ying Wanga6720142011-12-20 14:43:20 -0800524}
525
526static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700527_mm_set_sd(double __w)
Ying Wanga6720142011-12-20 14:43:20 -0800528{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700529 return (__m128d){ __w, 0 };
Ying Wanga6720142011-12-20 14:43:20 -0800530}
531
532static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700533_mm_set1_pd(double __w)
Ying Wanga6720142011-12-20 14:43:20 -0800534{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700535 return (__m128d){ __w, __w };
Ying Wanga6720142011-12-20 14:43:20 -0800536}
537
538static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700539_mm_set_pd(double __w, double __x)
Ying Wanga6720142011-12-20 14:43:20 -0800540{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700541 return (__m128d){ __x, __w };
Ying Wanga6720142011-12-20 14:43:20 -0800542}
543
544static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700545_mm_setr_pd(double __w, double __x)
Ying Wanga6720142011-12-20 14:43:20 -0800546{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700547 return (__m128d){ __w, __x };
Ying Wanga6720142011-12-20 14:43:20 -0800548}
549
550static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
551_mm_setzero_pd(void)
552{
553 return (__m128d){ 0, 0 };
554}
555
556static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700557_mm_move_sd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -0800558{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700559 return (__m128d){ __b[0], __a[1] };
Ying Wanga6720142011-12-20 14:43:20 -0800560}
561
562static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700563_mm_store_sd(double *__dp, __m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800564{
565 struct __mm_store_sd_struct {
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700566 double __u;
Ying Wanga6720142011-12-20 14:43:20 -0800567 } __attribute__((__packed__, __may_alias__));
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700568 ((struct __mm_store_sd_struct*)__dp)->__u = __a[0];
Ying Wanga6720142011-12-20 14:43:20 -0800569}
570
571static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700572_mm_store1_pd(double *__dp, __m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800573{
574 struct __mm_store1_pd_struct {
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700575 double __u[2];
Ying Wanga6720142011-12-20 14:43:20 -0800576 } __attribute__((__packed__, __may_alias__));
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700577 ((struct __mm_store1_pd_struct*)__dp)->__u[0] = __a[0];
578 ((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0];
Ying Wanga6720142011-12-20 14:43:20 -0800579}
580
581static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700582_mm_store_pd(double *__dp, __m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800583{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700584 *(__m128d *)__dp = __a;
Ying Wanga6720142011-12-20 14:43:20 -0800585}
586
587static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700588_mm_storeu_pd(double *__dp, __m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800589{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700590 __builtin_ia32_storeupd(__dp, __a);
Ying Wanga6720142011-12-20 14:43:20 -0800591}
592
593static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700594_mm_storer_pd(double *__dp, __m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800595{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700596 __a = __builtin_shufflevector(__a, __a, 1, 0);
597 *(__m128d *)__dp = __a;
Ying Wanga6720142011-12-20 14:43:20 -0800598}
599
600static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700601_mm_storeh_pd(double *__dp, __m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800602{
603 struct __mm_storeh_pd_struct {
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700604 double __u;
Ying Wanga6720142011-12-20 14:43:20 -0800605 } __attribute__((__packed__, __may_alias__));
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700606 ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1];
Ying Wanga6720142011-12-20 14:43:20 -0800607}
608
609static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700610_mm_storel_pd(double *__dp, __m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -0800611{
612 struct __mm_storeh_pd_struct {
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700613 double __u;
Ying Wanga6720142011-12-20 14:43:20 -0800614 } __attribute__((__packed__, __may_alias__));
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700615 ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0];
Ying Wanga6720142011-12-20 14:43:20 -0800616}
617
618static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700619_mm_add_epi8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800620{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700621 return (__m128i)((__v16qi)__a + (__v16qi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800622}
623
624static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700625_mm_add_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800626{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700627 return (__m128i)((__v8hi)__a + (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800628}
629
630static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700631_mm_add_epi32(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800632{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700633 return (__m128i)((__v4si)__a + (__v4si)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800634}
635
636static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700637_mm_add_si64(__m64 __a, __m64 __b)
Ying Wanga6720142011-12-20 14:43:20 -0800638{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700639 return __a + __b;
Ying Wanga6720142011-12-20 14:43:20 -0800640}
641
642static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700643_mm_add_epi64(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800644{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700645 return __a + __b;
Ying Wanga6720142011-12-20 14:43:20 -0800646}
647
648static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700649_mm_adds_epi8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800650{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700651 return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800652}
653
654static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700655_mm_adds_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800656{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700657 return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800658}
659
660static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700661_mm_adds_epu8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800662{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700663 return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800664}
665
666static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700667_mm_adds_epu16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800668{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700669 return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800670}
671
672static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700673_mm_avg_epu8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800674{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700675 return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800676}
677
678static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700679_mm_avg_epu16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800680{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700681 return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800682}
683
684static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700685_mm_madd_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800686{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700687 return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800688}
689
690static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700691_mm_max_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800692{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700693 return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800694}
695
696static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700697_mm_max_epu8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800698{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700699 return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800700}
701
702static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700703_mm_min_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800704{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700705 return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800706}
707
708static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700709_mm_min_epu8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800710{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700711 return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800712}
713
714static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700715_mm_mulhi_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800716{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700717 return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800718}
719
720static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700721_mm_mulhi_epu16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800722{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700723 return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800724}
725
726static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700727_mm_mullo_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800728{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700729 return (__m128i)((__v8hi)__a * (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800730}
731
732static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700733_mm_mul_su32(__m64 __a, __m64 __b)
Ying Wanga6720142011-12-20 14:43:20 -0800734{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700735 return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800736}
737
738static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700739_mm_mul_epu32(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800740{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700741 return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800742}
743
744static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700745_mm_sad_epu8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800746{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700747 return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800748}
749
750static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700751_mm_sub_epi8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800752{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700753 return (__m128i)((__v16qi)__a - (__v16qi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800754}
755
756static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700757_mm_sub_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800758{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700759 return (__m128i)((__v8hi)__a - (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800760}
761
762static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700763_mm_sub_epi32(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800764{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700765 return (__m128i)((__v4si)__a - (__v4si)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800766}
767
768static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700769_mm_sub_si64(__m64 __a, __m64 __b)
Ying Wanga6720142011-12-20 14:43:20 -0800770{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700771 return __a - __b;
Ying Wanga6720142011-12-20 14:43:20 -0800772}
773
774static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700775_mm_sub_epi64(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800776{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700777 return __a - __b;
Ying Wanga6720142011-12-20 14:43:20 -0800778}
779
780static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700781_mm_subs_epi8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800782{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700783 return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800784}
785
786static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700787_mm_subs_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800788{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700789 return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800790}
791
792static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700793_mm_subs_epu8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800794{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700795 return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800796}
797
798static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700799_mm_subs_epu16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800800{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700801 return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800802}
803
804static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700805_mm_and_si128(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800806{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700807 return __a & __b;
Ying Wanga6720142011-12-20 14:43:20 -0800808}
809
810static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700811_mm_andnot_si128(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800812{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700813 return ~__a & __b;
Ying Wanga6720142011-12-20 14:43:20 -0800814}
815
816static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700817_mm_or_si128(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800818{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700819 return __a | __b;
Ying Wanga6720142011-12-20 14:43:20 -0800820}
821
822static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700823_mm_xor_si128(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800824{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700825 return __a ^ __b;
Ying Wanga6720142011-12-20 14:43:20 -0800826}
827
Ying Wang60999142013-01-07 13:59:36 -0800828#define _mm_slli_si128(a, count) __extension__ ({ \
829 __m128i __a = (a); \
830 (__m128i)__builtin_ia32_pslldqi128(__a, (count)*8); })
Ying Wanga6720142011-12-20 14:43:20 -0800831
832static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700833_mm_slli_epi16(__m128i __a, int __count)
Ying Wanga6720142011-12-20 14:43:20 -0800834{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700835 return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);
Ying Wanga6720142011-12-20 14:43:20 -0800836}
837
838static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700839_mm_sll_epi16(__m128i __a, __m128i __count)
Ying Wanga6720142011-12-20 14:43:20 -0800840{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700841 return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);
Ying Wanga6720142011-12-20 14:43:20 -0800842}
843
844static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700845_mm_slli_epi32(__m128i __a, int __count)
Ying Wanga6720142011-12-20 14:43:20 -0800846{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700847 return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);
Ying Wanga6720142011-12-20 14:43:20 -0800848}
849
850static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700851_mm_sll_epi32(__m128i __a, __m128i __count)
Ying Wanga6720142011-12-20 14:43:20 -0800852{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700853 return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);
Ying Wanga6720142011-12-20 14:43:20 -0800854}
855
856static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700857_mm_slli_epi64(__m128i __a, int __count)
Ying Wanga6720142011-12-20 14:43:20 -0800858{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700859 return __builtin_ia32_psllqi128(__a, __count);
Ying Wanga6720142011-12-20 14:43:20 -0800860}
861
862static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700863_mm_sll_epi64(__m128i __a, __m128i __count)
Ying Wanga6720142011-12-20 14:43:20 -0800864{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700865 return __builtin_ia32_psllq128(__a, __count);
Ying Wanga6720142011-12-20 14:43:20 -0800866}
867
868static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700869_mm_srai_epi16(__m128i __a, int __count)
Ying Wanga6720142011-12-20 14:43:20 -0800870{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700871 return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);
Ying Wanga6720142011-12-20 14:43:20 -0800872}
873
874static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700875_mm_sra_epi16(__m128i __a, __m128i __count)
Ying Wanga6720142011-12-20 14:43:20 -0800876{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700877 return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);
Ying Wanga6720142011-12-20 14:43:20 -0800878}
879
880static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700881_mm_srai_epi32(__m128i __a, int __count)
Ying Wanga6720142011-12-20 14:43:20 -0800882{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700883 return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);
Ying Wanga6720142011-12-20 14:43:20 -0800884}
885
886static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700887_mm_sra_epi32(__m128i __a, __m128i __count)
Ying Wanga6720142011-12-20 14:43:20 -0800888{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700889 return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);
Ying Wanga6720142011-12-20 14:43:20 -0800890}
891
892
Ying Wang60999142013-01-07 13:59:36 -0800893#define _mm_srli_si128(a, count) __extension__ ({ \
894 __m128i __a = (a); \
895 (__m128i)__builtin_ia32_psrldqi128(__a, (count)*8); })
Ying Wanga6720142011-12-20 14:43:20 -0800896
897static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700898_mm_srli_epi16(__m128i __a, int __count)
Ying Wanga6720142011-12-20 14:43:20 -0800899{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700900 return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);
Ying Wanga6720142011-12-20 14:43:20 -0800901}
902
903static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700904_mm_srl_epi16(__m128i __a, __m128i __count)
Ying Wanga6720142011-12-20 14:43:20 -0800905{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700906 return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);
Ying Wanga6720142011-12-20 14:43:20 -0800907}
908
909static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700910_mm_srli_epi32(__m128i __a, int __count)
Ying Wanga6720142011-12-20 14:43:20 -0800911{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700912 return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);
Ying Wanga6720142011-12-20 14:43:20 -0800913}
914
915static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700916_mm_srl_epi32(__m128i __a, __m128i __count)
Ying Wanga6720142011-12-20 14:43:20 -0800917{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700918 return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);
Ying Wanga6720142011-12-20 14:43:20 -0800919}
920
921static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700922_mm_srli_epi64(__m128i __a, int __count)
Ying Wanga6720142011-12-20 14:43:20 -0800923{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700924 return __builtin_ia32_psrlqi128(__a, __count);
Ying Wanga6720142011-12-20 14:43:20 -0800925}
926
927static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700928_mm_srl_epi64(__m128i __a, __m128i __count)
Ying Wanga6720142011-12-20 14:43:20 -0800929{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700930 return __builtin_ia32_psrlq128(__a, __count);
Ying Wanga6720142011-12-20 14:43:20 -0800931}
932
933static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700934_mm_cmpeq_epi8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800935{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700936 return (__m128i)((__v16qi)__a == (__v16qi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800937}
938
939static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700940_mm_cmpeq_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800941{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700942 return (__m128i)((__v8hi)__a == (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800943}
944
945static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700946_mm_cmpeq_epi32(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800947{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700948 return (__m128i)((__v4si)__a == (__v4si)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800949}
950
951static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700952_mm_cmpgt_epi8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800953{
Ying Wang60999142013-01-07 13:59:36 -0800954 /* This function always performs a signed comparison, but __v16qi is a char
955 which may be signed or unsigned. */
956 typedef signed char __v16qs __attribute__((__vector_size__(16)));
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700957 return (__m128i)((__v16qs)__a > (__v16qs)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800958}
959
960static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700961_mm_cmpgt_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800962{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700963 return (__m128i)((__v8hi)__a > (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800964}
965
966static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700967_mm_cmpgt_epi32(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800968{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700969 return (__m128i)((__v4si)__a > (__v4si)__b);
Ying Wanga6720142011-12-20 14:43:20 -0800970}
971
972static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700973_mm_cmplt_epi8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800974{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700975 return _mm_cmpgt_epi8(__b, __a);
Ying Wanga6720142011-12-20 14:43:20 -0800976}
977
978static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700979_mm_cmplt_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800980{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700981 return _mm_cmpgt_epi16(__b, __a);
Ying Wanga6720142011-12-20 14:43:20 -0800982}
983
984static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700985_mm_cmplt_epi32(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -0800986{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700987 return _mm_cmpgt_epi32(__b, __a);
Ying Wanga6720142011-12-20 14:43:20 -0800988}
989
990#ifdef __x86_64__
991static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700992_mm_cvtsi64_sd(__m128d __a, long long __b)
Ying Wanga6720142011-12-20 14:43:20 -0800993{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700994 __a[0] = __b;
995 return __a;
Ying Wanga6720142011-12-20 14:43:20 -0800996}
997
998static __inline__ long long __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -0700999_mm_cvtsd_si64(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -08001000{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001001 return __builtin_ia32_cvtsd2si64(__a);
Ying Wanga6720142011-12-20 14:43:20 -08001002}
1003
1004static __inline__ long long __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001005_mm_cvttsd_si64(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -08001006{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001007 return __a[0];
Ying Wanga6720142011-12-20 14:43:20 -08001008}
1009#endif
1010
1011static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001012_mm_cvtepi32_ps(__m128i __a)
Ying Wanga6720142011-12-20 14:43:20 -08001013{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001014 return __builtin_ia32_cvtdq2ps((__v4si)__a);
Ying Wanga6720142011-12-20 14:43:20 -08001015}
1016
1017static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001018_mm_cvtps_epi32(__m128 __a)
Ying Wanga6720142011-12-20 14:43:20 -08001019{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001020 return (__m128i)__builtin_ia32_cvtps2dq(__a);
Ying Wanga6720142011-12-20 14:43:20 -08001021}
1022
1023static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001024_mm_cvttps_epi32(__m128 __a)
Ying Wanga6720142011-12-20 14:43:20 -08001025{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001026 return (__m128i)__builtin_ia32_cvttps2dq(__a);
Ying Wanga6720142011-12-20 14:43:20 -08001027}
1028
1029static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001030_mm_cvtsi32_si128(int __a)
Ying Wanga6720142011-12-20 14:43:20 -08001031{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001032 return (__m128i)(__v4si){ __a, 0, 0, 0 };
Ying Wanga6720142011-12-20 14:43:20 -08001033}
1034
1035#ifdef __x86_64__
1036static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001037_mm_cvtsi64_si128(long long __a)
Ying Wanga6720142011-12-20 14:43:20 -08001038{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001039 return (__m128i){ __a, 0 };
Ying Wanga6720142011-12-20 14:43:20 -08001040}
1041#endif
1042
1043static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001044_mm_cvtsi128_si32(__m128i __a)
Ying Wanga6720142011-12-20 14:43:20 -08001045{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001046 __v4si __b = (__v4si)__a;
1047 return __b[0];
Ying Wanga6720142011-12-20 14:43:20 -08001048}
1049
1050#ifdef __x86_64__
1051static __inline__ long long __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001052_mm_cvtsi128_si64(__m128i __a)
Ying Wanga6720142011-12-20 14:43:20 -08001053{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001054 return __a[0];
Ying Wanga6720142011-12-20 14:43:20 -08001055}
1056#endif
1057
1058static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001059_mm_load_si128(__m128i const *__p)
Ying Wanga6720142011-12-20 14:43:20 -08001060{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001061 return *__p;
Ying Wanga6720142011-12-20 14:43:20 -08001062}
1063
1064static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001065_mm_loadu_si128(__m128i const *__p)
Ying Wanga6720142011-12-20 14:43:20 -08001066{
1067 struct __loadu_si128 {
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001068 __m128i __v;
Ying Wanga6720142011-12-20 14:43:20 -08001069 } __attribute__((packed, may_alias));
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001070 return ((struct __loadu_si128*)__p)->__v;
Ying Wanga6720142011-12-20 14:43:20 -08001071}
1072
1073static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001074_mm_loadl_epi64(__m128i const *__p)
Ying Wanga6720142011-12-20 14:43:20 -08001075{
1076 struct __mm_loadl_epi64_struct {
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001077 long long __u;
Ying Wanga6720142011-12-20 14:43:20 -08001078 } __attribute__((__packed__, __may_alias__));
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001079 return (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0};
Ying Wanga6720142011-12-20 14:43:20 -08001080}
1081
1082static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1083_mm_set_epi64x(long long q1, long long q0)
1084{
1085 return (__m128i){ q0, q1 };
1086}
1087
1088static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1089_mm_set_epi64(__m64 q1, __m64 q0)
1090{
1091 return (__m128i){ (long long)q0, (long long)q1 };
1092}
1093
1094static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1095_mm_set_epi32(int i3, int i2, int i1, int i0)
1096{
1097 return (__m128i)(__v4si){ i0, i1, i2, i3};
1098}
1099
1100static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1101_mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0)
1102{
1103 return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
1104}
1105
1106static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1107_mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
1108{
1109 return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
1110}
1111
1112static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001113_mm_set1_epi64x(long long __q)
Ying Wanga6720142011-12-20 14:43:20 -08001114{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001115 return (__m128i){ __q, __q };
Ying Wanga6720142011-12-20 14:43:20 -08001116}
1117
1118static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001119_mm_set1_epi64(__m64 __q)
Ying Wanga6720142011-12-20 14:43:20 -08001120{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001121 return (__m128i){ (long long)__q, (long long)__q };
Ying Wanga6720142011-12-20 14:43:20 -08001122}
1123
1124static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001125_mm_set1_epi32(int __i)
Ying Wanga6720142011-12-20 14:43:20 -08001126{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001127 return (__m128i)(__v4si){ __i, __i, __i, __i };
Ying Wanga6720142011-12-20 14:43:20 -08001128}
1129
1130static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001131_mm_set1_epi16(short __w)
Ying Wanga6720142011-12-20 14:43:20 -08001132{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001133 return (__m128i)(__v8hi){ __w, __w, __w, __w, __w, __w, __w, __w };
Ying Wanga6720142011-12-20 14:43:20 -08001134}
1135
1136static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001137_mm_set1_epi8(char __b)
Ying Wanga6720142011-12-20 14:43:20 -08001138{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001139 return (__m128i)(__v16qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b };
Ying Wanga6720142011-12-20 14:43:20 -08001140}
1141
1142static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1143_mm_setr_epi64(__m64 q0, __m64 q1)
1144{
1145 return (__m128i){ (long long)q0, (long long)q1 };
1146}
1147
1148static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1149_mm_setr_epi32(int i0, int i1, int i2, int i3)
1150{
1151 return (__m128i)(__v4si){ i0, i1, i2, i3};
1152}
1153
1154static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1155_mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7)
1156{
1157 return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
1158}
1159
1160static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1161_mm_setr_epi8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15)
1162{
1163 return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
1164}
1165
1166static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
1167_mm_setzero_si128(void)
1168{
1169 return (__m128i){ 0LL, 0LL };
1170}
1171
1172static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001173_mm_store_si128(__m128i *__p, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001174{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001175 *__p = __b;
Ying Wanga6720142011-12-20 14:43:20 -08001176}
1177
1178static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001179_mm_storeu_si128(__m128i *__p, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001180{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001181 __builtin_ia32_storedqu((char *)__p, (__v16qi)__b);
Ying Wanga6720142011-12-20 14:43:20 -08001182}
1183
1184static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001185_mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)
Ying Wanga6720142011-12-20 14:43:20 -08001186{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001187 __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p);
Ying Wanga6720142011-12-20 14:43:20 -08001188}
1189
1190static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001191_mm_storel_epi64(__m128i *__p, __m128i __a)
Ying Wanga6720142011-12-20 14:43:20 -08001192{
Ying Wang60999142013-01-07 13:59:36 -08001193 struct __mm_storel_epi64_struct {
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001194 long long __u;
Ying Wang60999142013-01-07 13:59:36 -08001195 } __attribute__((__packed__, __may_alias__));
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001196 ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0];
Ying Wanga6720142011-12-20 14:43:20 -08001197}
1198
1199static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001200_mm_stream_pd(double *__p, __m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -08001201{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001202 __builtin_ia32_movntpd(__p, __a);
Ying Wanga6720142011-12-20 14:43:20 -08001203}
1204
1205static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001206_mm_stream_si128(__m128i *__p, __m128i __a)
Ying Wanga6720142011-12-20 14:43:20 -08001207{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001208 __builtin_ia32_movntdq(__p, __a);
Ying Wanga6720142011-12-20 14:43:20 -08001209}
1210
1211static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001212_mm_stream_si32(int *__p, int __a)
Ying Wanga6720142011-12-20 14:43:20 -08001213{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001214 __builtin_ia32_movnti(__p, __a);
Ying Wanga6720142011-12-20 14:43:20 -08001215}
1216
1217static __inline__ void __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001218_mm_clflush(void const *__p)
Ying Wanga6720142011-12-20 14:43:20 -08001219{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001220 __builtin_ia32_clflush(__p);
Ying Wanga6720142011-12-20 14:43:20 -08001221}
1222
1223static __inline__ void __attribute__((__always_inline__, __nodebug__))
1224_mm_lfence(void)
1225{
1226 __builtin_ia32_lfence();
1227}
1228
1229static __inline__ void __attribute__((__always_inline__, __nodebug__))
1230_mm_mfence(void)
1231{
1232 __builtin_ia32_mfence();
1233}
1234
1235static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001236_mm_packs_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001237{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001238 return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -08001239}
1240
1241static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001242_mm_packs_epi32(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001243{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001244 return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);
Ying Wanga6720142011-12-20 14:43:20 -08001245}
1246
1247static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001248_mm_packus_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001249{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001250 return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);
Ying Wanga6720142011-12-20 14:43:20 -08001251}
1252
1253static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001254_mm_extract_epi16(__m128i __a, int __imm)
Ying Wanga6720142011-12-20 14:43:20 -08001255{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001256 __v8hi __b = (__v8hi)__a;
1257 return (unsigned short)__b[__imm];
Ying Wanga6720142011-12-20 14:43:20 -08001258}
1259
1260static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001261_mm_insert_epi16(__m128i __a, int __b, int __imm)
Ying Wanga6720142011-12-20 14:43:20 -08001262{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001263 __v8hi __c = (__v8hi)__a;
1264 __c[__imm & 7] = __b;
1265 return (__m128i)__c;
Ying Wanga6720142011-12-20 14:43:20 -08001266}
1267
1268static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001269_mm_movemask_epi8(__m128i __a)
Ying Wanga6720142011-12-20 14:43:20 -08001270{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001271 return __builtin_ia32_pmovmskb128((__v16qi)__a);
Ying Wanga6720142011-12-20 14:43:20 -08001272}
1273
Ying Wang60999142013-01-07 13:59:36 -08001274#define _mm_shuffle_epi32(a, imm) __extension__ ({ \
1275 __m128i __a = (a); \
1276 (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si) _mm_set1_epi32(0), \
1277 (imm) & 0x3, ((imm) & 0xc) >> 2, \
1278 ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); })
Ying Wanga6720142011-12-20 14:43:20 -08001279
Ying Wang60999142013-01-07 13:59:36 -08001280#define _mm_shufflelo_epi16(a, imm) __extension__ ({ \
1281 __m128i __a = (a); \
1282 (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \
1283 (imm) & 0x3, ((imm) & 0xc) >> 2, \
1284 ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
1285 4, 5, 6, 7); })
Ying Wanga6720142011-12-20 14:43:20 -08001286
Ying Wang60999142013-01-07 13:59:36 -08001287#define _mm_shufflehi_epi16(a, imm) __extension__ ({ \
1288 __m128i __a = (a); \
1289 (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \
1290 0, 1, 2, 3, \
1291 4 + (((imm) & 0x03) >> 0), \
1292 4 + (((imm) & 0x0c) >> 2), \
1293 4 + (((imm) & 0x30) >> 4), \
1294 4 + (((imm) & 0xc0) >> 6)); })
Ying Wanga6720142011-12-20 14:43:20 -08001295
1296static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001297_mm_unpackhi_epi8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001298{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001299 return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
Ying Wanga6720142011-12-20 14:43:20 -08001300}
1301
1302static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001303_mm_unpackhi_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001304{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001305 return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);
Ying Wanga6720142011-12-20 14:43:20 -08001306}
1307
1308static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001309_mm_unpackhi_epi32(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001310{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001311 return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3);
Ying Wanga6720142011-12-20 14:43:20 -08001312}
1313
1314static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001315_mm_unpackhi_epi64(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001316{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001317 return (__m128i)__builtin_shufflevector(__a, __b, 1, 2+1);
Ying Wanga6720142011-12-20 14:43:20 -08001318}
1319
1320static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001321_mm_unpacklo_epi8(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001322{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001323 return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);
Ying Wanga6720142011-12-20 14:43:20 -08001324}
1325
1326static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001327_mm_unpacklo_epi16(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001328{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001329 return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);
Ying Wanga6720142011-12-20 14:43:20 -08001330}
1331
1332static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001333_mm_unpacklo_epi32(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001334{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001335 return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1);
Ying Wanga6720142011-12-20 14:43:20 -08001336}
1337
1338static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001339_mm_unpacklo_epi64(__m128i __a, __m128i __b)
Ying Wanga6720142011-12-20 14:43:20 -08001340{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001341 return (__m128i)__builtin_shufflevector(__a, __b, 0, 2+0);
Ying Wanga6720142011-12-20 14:43:20 -08001342}
1343
1344static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001345_mm_movepi64_pi64(__m128i __a)
Ying Wanga6720142011-12-20 14:43:20 -08001346{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001347 return (__m64)__a[0];
Ying Wanga6720142011-12-20 14:43:20 -08001348}
1349
1350static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001351_mm_movpi64_pi64(__m64 __a)
Ying Wanga6720142011-12-20 14:43:20 -08001352{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001353 return (__m128i){ (long long)__a, 0 };
Ying Wanga6720142011-12-20 14:43:20 -08001354}
1355
1356static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001357_mm_move_epi64(__m128i __a)
Ying Wanga6720142011-12-20 14:43:20 -08001358{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001359 return __builtin_shufflevector(__a, (__m128i){ 0 }, 0, 2);
Ying Wanga6720142011-12-20 14:43:20 -08001360}
1361
1362static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001363_mm_unpackhi_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -08001364{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001365 return __builtin_shufflevector(__a, __b, 1, 2+1);
Ying Wanga6720142011-12-20 14:43:20 -08001366}
1367
1368static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001369_mm_unpacklo_pd(__m128d __a, __m128d __b)
Ying Wanga6720142011-12-20 14:43:20 -08001370{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001371 return __builtin_shufflevector(__a, __b, 0, 2+0);
Ying Wanga6720142011-12-20 14:43:20 -08001372}
1373
1374static __inline__ int __attribute__((__always_inline__, __nodebug__))
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001375_mm_movemask_pd(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -08001376{
Stephen Hinesc6ee7df2013-04-02 18:41:57 -07001377 return __builtin_ia32_movmskpd(__a);
Ying Wanga6720142011-12-20 14:43:20 -08001378}
1379
Ying Wang60999142013-01-07 13:59:36 -08001380#define _mm_shuffle_pd(a, b, i) __extension__ ({ \
1381 __m128d __a = (a); \
1382 __m128d __b = (b); \
1383 __builtin_shufflevector(__a, __b, (i) & 1, (((i) & 2) >> 1) + 2); })
Ying Wanga6720142011-12-20 14:43:20 -08001384
1385static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Stephen Hines996e4dc2013-08-13 01:04:14 -07001386_mm_castpd_ps(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -08001387{
Stephen Hines996e4dc2013-08-13 01:04:14 -07001388 return (__m128)__a;
Ying Wanga6720142011-12-20 14:43:20 -08001389}
1390
1391static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hines996e4dc2013-08-13 01:04:14 -07001392_mm_castpd_si128(__m128d __a)
Ying Wanga6720142011-12-20 14:43:20 -08001393{
Stephen Hines996e4dc2013-08-13 01:04:14 -07001394 return (__m128i)__a;
Ying Wanga6720142011-12-20 14:43:20 -08001395}
1396
1397static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hines996e4dc2013-08-13 01:04:14 -07001398_mm_castps_pd(__m128 __a)
Ying Wanga6720142011-12-20 14:43:20 -08001399{
Stephen Hines996e4dc2013-08-13 01:04:14 -07001400 return (__m128d)__a;
Ying Wanga6720142011-12-20 14:43:20 -08001401}
1402
1403static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Stephen Hines996e4dc2013-08-13 01:04:14 -07001404_mm_castps_si128(__m128 __a)
Ying Wanga6720142011-12-20 14:43:20 -08001405{
Stephen Hines996e4dc2013-08-13 01:04:14 -07001406 return (__m128i)__a;
Ying Wanga6720142011-12-20 14:43:20 -08001407}
1408
1409static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Stephen Hines996e4dc2013-08-13 01:04:14 -07001410_mm_castsi128_ps(__m128i __a)
Ying Wanga6720142011-12-20 14:43:20 -08001411{
Stephen Hines996e4dc2013-08-13 01:04:14 -07001412 return (__m128)__a;
Ying Wanga6720142011-12-20 14:43:20 -08001413}
1414
1415static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Stephen Hines996e4dc2013-08-13 01:04:14 -07001416_mm_castsi128_pd(__m128i __a)
Ying Wanga6720142011-12-20 14:43:20 -08001417{
Stephen Hines996e4dc2013-08-13 01:04:14 -07001418 return (__m128d)__a;
Ying Wanga6720142011-12-20 14:43:20 -08001419}
1420
1421static __inline__ void __attribute__((__always_inline__, __nodebug__))
1422_mm_pause(void)
1423{
1424 __asm__ volatile ("pause");
1425}
1426
1427#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
1428
1429#endif /* __SSE2__ */
1430
1431#endif /* __EMMINTRIN_H */