blob: e10b77d1b2349e3fccc019be6471f51aadcfb39f [file] [log] [blame]
Benjamin Kramer0930b6e2010-08-20 16:47:17 +00001/*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
Anders Carlsson37f2f002008-12-24 01:45:22 +00002 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
Benjamin Kramer0930b6e2010-08-20 16:47:17 +000023
Anders Carlsson37f2f002008-12-24 01:45:22 +000024#ifndef __EMMINTRIN_H
25#define __EMMINTRIN_H
26
27#ifndef __SSE2__
28#error "SSE2 instruction set not enabled"
29#else
30
31#include <xmmintrin.h>
32
33typedef double __m128d __attribute__((__vector_size__(16)));
34typedef long long __m128i __attribute__((__vector_size__(16)));
35
Eric Christopherfaf94042010-08-26 02:09:25 +000036/* Type defines. */
37typedef double __v2df __attribute__ ((__vector_size__ (16)));
38typedef long long __v2di __attribute__ ((__vector_size__ (16)));
Anders Carlsson07603aa2008-12-24 02:41:00 +000039typedef short __v8hi __attribute__((__vector_size__(16)));
Anders Carlsson4bf4e302009-09-18 19:18:19 +000040typedef char __v16qi __attribute__((__vector_size__(16)));
Anders Carlsson37f2f002008-12-24 01:45:22 +000041
Chris Lattner1bddbcb2010-03-22 18:14:12 +000042static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000043_mm_add_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000044{
Eli Friedman80c80042009-06-06 02:13:04 +000045 a[0] += b[0];
46 return a;
Anders Carlsson37f2f002008-12-24 01:45:22 +000047}
48
Chris Lattner1bddbcb2010-03-22 18:14:12 +000049static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000050_mm_add_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000051{
52 return a + b;
53}
54
Chris Lattner1bddbcb2010-03-22 18:14:12 +000055static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000056_mm_sub_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000057{
Eli Friedman80c80042009-06-06 02:13:04 +000058 a[0] -= b[0];
59 return a;
Anders Carlsson37f2f002008-12-24 01:45:22 +000060}
61
Chris Lattner1bddbcb2010-03-22 18:14:12 +000062static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000063_mm_sub_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000064{
65 return a - b;
66}
67
Chris Lattner1bddbcb2010-03-22 18:14:12 +000068static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000069_mm_mul_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000070{
Eli Friedman80c80042009-06-06 02:13:04 +000071 a[0] *= b[0];
72 return a;
Anders Carlsson37f2f002008-12-24 01:45:22 +000073}
74
Chris Lattner1bddbcb2010-03-22 18:14:12 +000075static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000076_mm_mul_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000077{
78 return a * b;
79}
80
Chris Lattner1bddbcb2010-03-22 18:14:12 +000081static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000082_mm_div_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000083{
Eli Friedman80c80042009-06-06 02:13:04 +000084 a[0] /= b[0];
85 return a;
Anders Carlsson37f2f002008-12-24 01:45:22 +000086}
87
Chris Lattner1bddbcb2010-03-22 18:14:12 +000088static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000089_mm_div_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000090{
91 return a / b;
92}
93
Chris Lattner1bddbcb2010-03-22 18:14:12 +000094static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000095_mm_sqrt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000096{
97 __m128d c = __builtin_ia32_sqrtsd(b);
98 return (__m128d) { c[0], a[1] };
99}
100
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000101static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000102_mm_sqrt_pd(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000103{
104 return __builtin_ia32_sqrtpd(a);
105}
106
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000107static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000108_mm_min_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000109{
110 return __builtin_ia32_minsd(a, b);
111}
112
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000113static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000114_mm_min_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000115{
116 return __builtin_ia32_minpd(a, b);
117}
118
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000119static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000120_mm_max_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000121{
122 return __builtin_ia32_maxsd(a, b);
123}
124
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000125static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000126_mm_max_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000127{
128 return __builtin_ia32_maxpd(a, b);
129}
130
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000131static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000132_mm_and_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000133{
Eli Friedman17d2e3a2009-06-06 03:45:06 +0000134 return (__m128d)((__v4si)a & (__v4si)b);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000135}
136
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000137static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000138_mm_andnot_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000139{
Eli Friedman17d2e3a2009-06-06 03:45:06 +0000140 return (__m128d)(~(__v4si)a & (__v4si)b);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000141}
142
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000143static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000144_mm_or_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000145{
Eli Friedman17d2e3a2009-06-06 03:45:06 +0000146 return (__m128d)((__v4si)a | (__v4si)b);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000147}
148
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000149static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000150_mm_xor_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000151{
Eli Friedman17d2e3a2009-06-06 03:45:06 +0000152 return (__m128d)((__v4si)a ^ (__v4si)b);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000153}
154
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000155static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000156_mm_cmpeq_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000157{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000158 return (__m128d)__builtin_ia32_cmppd(a, b, 0);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000159}
160
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000161static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000162_mm_cmplt_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000163{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000164 return (__m128d)__builtin_ia32_cmppd(a, b, 1);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000165}
166
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000167static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000168_mm_cmple_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000169{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000170 return (__m128d)__builtin_ia32_cmppd(a, b, 2);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000171}
172
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000173static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000174_mm_cmpgt_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000175{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000176 return (__m128d)__builtin_ia32_cmppd(b, a, 1);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000177}
178
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000179static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000180_mm_cmpge_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000181{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000182 return (__m128d)__builtin_ia32_cmppd(b, a, 2);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000183}
184
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000185static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000186_mm_cmpord_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000187{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000188 return (__m128d)__builtin_ia32_cmppd(a, b, 7);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000189}
190
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000191static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000192_mm_cmpunord_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000193{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000194 return (__m128d)__builtin_ia32_cmppd(a, b, 3);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000195}
196
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000197static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000198_mm_cmpneq_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000199{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000200 return (__m128d)__builtin_ia32_cmppd(a, b, 4);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000201}
202
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000203static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000204_mm_cmpnlt_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000205{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000206 return (__m128d)__builtin_ia32_cmppd(a, b, 5);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000207}
208
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000209static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000210_mm_cmpnle_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000211{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000212 return (__m128d)__builtin_ia32_cmppd(a, b, 6);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000213}
214
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000215static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000216_mm_cmpngt_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000217{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000218 return (__m128d)__builtin_ia32_cmppd(b, a, 5);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000219}
220
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000221static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000222_mm_cmpnge_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000223{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000224 return (__m128d)__builtin_ia32_cmppd(b, a, 6);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000225}
226
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000227static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000228_mm_cmpeq_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000229{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000230 return (__m128d)__builtin_ia32_cmpsd(a, b, 0);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000231}
232
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000233static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000234_mm_cmplt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000235{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000236 return (__m128d)__builtin_ia32_cmpsd(a, b, 1);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000237}
238
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000239static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000240_mm_cmple_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000241{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000242 return (__m128d)__builtin_ia32_cmpsd(a, b, 2);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000243}
244
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000245static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000246_mm_cmpgt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000247{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000248 return (__m128d)__builtin_ia32_cmpsd(b, a, 1);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000249}
250
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000251static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000252_mm_cmpge_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000253{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000254 return (__m128d)__builtin_ia32_cmpsd(b, a, 2);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000255}
256
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000257static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000258_mm_cmpord_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000259{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000260 return (__m128d)__builtin_ia32_cmpsd(a, b, 7);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000261}
262
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000263static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000264_mm_cmpunord_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000265{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000266 return (__m128d)__builtin_ia32_cmpsd(a, b, 3);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000267}
268
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000269static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000270_mm_cmpneq_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000271{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000272 return (__m128d)__builtin_ia32_cmpsd(a, b, 4);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000273}
274
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000275static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000276_mm_cmpnlt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000277{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000278 return (__m128d)__builtin_ia32_cmpsd(a, b, 5);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000279}
280
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000281static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000282_mm_cmpnle_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000283{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000284 return (__m128d)__builtin_ia32_cmpsd(a, b, 6);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000285}
286
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000287static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000288_mm_cmpngt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000289{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000290 return (__m128d)__builtin_ia32_cmpsd(b, a, 5);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000291}
292
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000293static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000294_mm_cmpnge_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000295{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000296 return (__m128d)__builtin_ia32_cmpsd(b, a, 6);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000297}
298
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000299static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000300_mm_comieq_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000301{
302 return __builtin_ia32_comisdeq(a, b);
303}
304
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000305static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000306_mm_comilt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000307{
308 return __builtin_ia32_comisdlt(a, b);
309}
310
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000311static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000312_mm_comile_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000313{
314 return __builtin_ia32_comisdle(a, b);
315}
316
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000317static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000318_mm_comigt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000319{
320 return __builtin_ia32_comisdgt(a, b);
321}
322
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000323static __inline__ int __attribute__((__always_inline__, __nodebug__))
Eli Friedman8052a4f2011-10-06 20:31:50 +0000324_mm_comige_sd(__m128d a, __m128d b)
325{
326 return __builtin_ia32_comisdge(a, b);
327}
328
329static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000330_mm_comineq_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000331{
332 return __builtin_ia32_comisdneq(a, b);
333}
334
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000335static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000336_mm_ucomieq_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000337{
338 return __builtin_ia32_ucomisdeq(a, b);
339}
340
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000341static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000342_mm_ucomilt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000343{
344 return __builtin_ia32_ucomisdlt(a, b);
345}
346
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000347static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000348_mm_ucomile_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000349{
350 return __builtin_ia32_ucomisdle(a, b);
351}
352
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000353static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000354_mm_ucomigt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000355{
356 return __builtin_ia32_ucomisdgt(a, b);
357}
358
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000359static __inline__ int __attribute__((__always_inline__, __nodebug__))
Eli Friedmanef534ce2011-08-29 21:26:24 +0000360_mm_ucomige_sd(__m128d a, __m128d b)
361{
362 return __builtin_ia32_ucomisdge(a, b);
363}
364
365static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000366_mm_ucomineq_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000367{
368 return __builtin_ia32_ucomisdneq(a, b);
369}
370
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000371static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000372_mm_cvtpd_ps(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000373{
374 return __builtin_ia32_cvtpd2ps(a);
375}
376
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000377static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000378_mm_cvtps_pd(__m128 a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000379{
380 return __builtin_ia32_cvtps2pd(a);
381}
382
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000383static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000384_mm_cvtepi32_pd(__m128i a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000385{
386 return __builtin_ia32_cvtdq2pd((__v4si)a);
387}
388
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000389static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000390_mm_cvtpd_epi32(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000391{
392 return __builtin_ia32_cvtpd2dq(a);
393}
394
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000395static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000396_mm_cvtsd_si32(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000397{
398 return __builtin_ia32_cvtsd2si(a);
399}
400
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000401static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000402_mm_cvtsd_ss(__m128 a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000403{
Eli Friedman80c80042009-06-06 02:13:04 +0000404 a[0] = b[0];
405 return a;
Anders Carlsson37f2f002008-12-24 01:45:22 +0000406}
407
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000408static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000409_mm_cvtsi32_sd(__m128d a, int b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000410{
Eli Friedman098136a2009-06-06 08:08:06 +0000411 a[0] = b;
412 return a;
Anders Carlsson37f2f002008-12-24 01:45:22 +0000413}
414
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000415static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000416_mm_cvtss_sd(__m128d a, __m128 b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000417{
Eli Friedman80c80042009-06-06 02:13:04 +0000418 a[0] = b[0];
419 return a;
Anders Carlsson37f2f002008-12-24 01:45:22 +0000420}
421
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000422static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000423_mm_cvttpd_epi32(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000424{
425 return (__m128i)__builtin_ia32_cvttpd2dq(a);
426}
427
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000428static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000429_mm_cvttsd_si32(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000430{
Eli Friedman80c80042009-06-06 02:13:04 +0000431 return a[0];
Anders Carlsson37f2f002008-12-24 01:45:22 +0000432}
433
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000434static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000435_mm_cvtpd_pi32(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000436{
437 return (__m64)__builtin_ia32_cvtpd2pi(a);
438}
439
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000440static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000441_mm_cvttpd_pi32(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000442{
443 return (__m64)__builtin_ia32_cvttpd2pi(a);
444}
445
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000446static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000447_mm_cvtpi32_pd(__m64 a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000448{
449 return __builtin_ia32_cvtpi2pd((__v2si)a);
450}
451
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000452static __inline__ double __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000453_mm_cvtsd_f64(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000454{
455 return a[0];
456}
457
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000458static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000459_mm_load_pd(double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000460{
461 return *(__m128d*)dp;
462}
463
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000464static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000465_mm_load1_pd(double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000466{
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000467 struct __mm_load1_pd_struct {
468 double u;
469 } __attribute__((__packed__, __may_alias__));
470 double u = ((struct __mm_load1_pd_struct*)dp)->u;
471 return (__m128d){ u, u };
Anders Carlsson445afa02008-12-24 02:11:54 +0000472}
473
Eli Friedmandb7351a2009-06-02 05:55:48 +0000474#define _mm_load_pd1(dp) _mm_load1_pd(dp)
475
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000476static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000477_mm_loadr_pd(double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000478{
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000479 __m128d u = *(__m128d*)dp;
480 return __builtin_shufflevector(u, u, 1, 0);
Anders Carlsson445afa02008-12-24 02:11:54 +0000481}
482
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000483static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000484_mm_loadu_pd(double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000485{
Bill Wendlingeed92a12011-05-13 00:11:39 +0000486 struct __loadu_pd {
487 __m128d v;
Bill Wendling8e3ec9c2011-05-13 01:24:00 +0000488 } __attribute__((packed, may_alias));
Bill Wendlingeed92a12011-05-13 00:11:39 +0000489 return ((struct __loadu_pd*)dp)->v;
Anders Carlsson445afa02008-12-24 02:11:54 +0000490}
491
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000492static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000493_mm_load_sd(double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000494{
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000495 struct __mm_load_sd_struct {
496 double u;
497 } __attribute__((__packed__, __may_alias__));
498 double u = ((struct __mm_load_sd_struct*)dp)->u;
499 return (__m128d){ u, 0 };
Anders Carlsson445afa02008-12-24 02:11:54 +0000500}
501
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000502static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000503_mm_loadh_pd(__m128d a, double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000504{
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000505 struct __mm_loadh_pd_struct {
506 double u;
507 } __attribute__((__packed__, __may_alias__));
508 double u = ((struct __mm_loadh_pd_struct*)dp)->u;
509 return (__m128d){ a[0], u };
Anders Carlsson445afa02008-12-24 02:11:54 +0000510}
511
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000512static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000513_mm_loadl_pd(__m128d a, double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000514{
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000515 struct __mm_loadl_pd_struct {
516 double u;
517 } __attribute__((__packed__, __may_alias__));
518 double u = ((struct __mm_loadl_pd_struct*)dp)->u;
519 return (__m128d){ u, a[1] };
Anders Carlsson445afa02008-12-24 02:11:54 +0000520}
521
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000522static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000523_mm_set_sd(double w)
Anders Carlsson445afa02008-12-24 02:11:54 +0000524{
525 return (__m128d){ w, 0 };
526}
527
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000528static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000529_mm_set1_pd(double w)
Anders Carlsson445afa02008-12-24 02:11:54 +0000530{
531 return (__m128d){ w, w };
532}
533
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000534static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000535_mm_set_pd(double w, double x)
Anders Carlsson445afa02008-12-24 02:11:54 +0000536{
Anders Carlsson9436ed52009-09-18 17:03:55 +0000537 return (__m128d){ x, w };
Anders Carlsson445afa02008-12-24 02:11:54 +0000538}
539
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000540static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000541_mm_setr_pd(double w, double x)
Anders Carlsson445afa02008-12-24 02:11:54 +0000542{
Anders Carlsson9436ed52009-09-18 17:03:55 +0000543 return (__m128d){ w, x };
Anders Carlsson445afa02008-12-24 02:11:54 +0000544}
545
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000546static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000547_mm_setzero_pd(void)
Anders Carlsson445afa02008-12-24 02:11:54 +0000548{
549 return (__m128d){ 0, 0 };
550}
551
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000552static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000553_mm_move_sd(__m128d a, __m128d b)
Anders Carlsson445afa02008-12-24 02:11:54 +0000554{
555 return (__m128d){ b[0], a[1] };
556}
557
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000558static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000559_mm_store_sd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000560{
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000561 struct __mm_store_sd_struct {
562 double u;
563 } __attribute__((__packed__, __may_alias__));
564 ((struct __mm_store_sd_struct*)dp)->u = a[0];
Anders Carlsson445afa02008-12-24 02:11:54 +0000565}
566
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000567static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000568_mm_store1_pd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000569{
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000570 struct __mm_store1_pd_struct {
571 double u[2];
572 } __attribute__((__packed__, __may_alias__));
573 ((struct __mm_store1_pd_struct*)dp)->u[0] = a[0];
574 ((struct __mm_store1_pd_struct*)dp)->u[1] = a[0];
Anders Carlsson445afa02008-12-24 02:11:54 +0000575}
576
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000577static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000578_mm_store_pd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000579{
580 *(__m128d *)dp = a;
581}
582
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000583static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000584_mm_storeu_pd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000585{
586 __builtin_ia32_storeupd(dp, a);
587}
588
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000589static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000590_mm_storer_pd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000591{
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000592 a = __builtin_shufflevector(a, a, 1, 0);
593 *(__m128d *)dp = a;
Anders Carlsson445afa02008-12-24 02:11:54 +0000594}
595
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000596static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000597_mm_storeh_pd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000598{
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000599 struct __mm_storeh_pd_struct {
600 double u;
601 } __attribute__((__packed__, __may_alias__));
602 ((struct __mm_storeh_pd_struct*)dp)->u = a[1];
Anders Carlsson445afa02008-12-24 02:11:54 +0000603}
604
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000605static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000606_mm_storel_pd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000607{
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000608 struct __mm_storeh_pd_struct {
609 double u;
610 } __attribute__((__packed__, __may_alias__));
611 ((struct __mm_storeh_pd_struct*)dp)->u = a[0];
Anders Carlsson445afa02008-12-24 02:11:54 +0000612}
613
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000614static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000615_mm_add_epi8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000616{
617 return (__m128i)((__v16qi)a + (__v16qi)b);
618}
619
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000620static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000621_mm_add_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000622{
623 return (__m128i)((__v8hi)a + (__v8hi)b);
624}
625
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000626static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000627_mm_add_epi32(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000628{
629 return (__m128i)((__v4si)a + (__v4si)b);
630}
631
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000632static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000633_mm_add_si64(__m64 a, __m64 b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000634{
635 return a + b;
636}
637
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000638static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000639_mm_add_epi64(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000640{
641 return a + b;
642}
643
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000644static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000645_mm_adds_epi8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000646{
647 return (__m128i)__builtin_ia32_paddsb128((__v16qi)a, (__v16qi)b);
648}
649
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000650static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000651_mm_adds_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000652{
653 return (__m128i)__builtin_ia32_paddsw128((__v8hi)a, (__v8hi)b);
654}
655
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000656static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000657_mm_adds_epu8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000658{
659 return (__m128i)__builtin_ia32_paddusb128((__v16qi)a, (__v16qi)b);
660}
661
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000662static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000663_mm_adds_epu16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000664{
665 return (__m128i)__builtin_ia32_paddusw128((__v8hi)a, (__v8hi)b);
666}
667
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000668static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000669_mm_avg_epu8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000670{
671 return (__m128i)__builtin_ia32_pavgb128((__v16qi)a, (__v16qi)b);
672}
673
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000674static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000675_mm_avg_epu16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000676{
677 return (__m128i)__builtin_ia32_pavgw128((__v8hi)a, (__v8hi)b);
678}
679
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000680static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000681_mm_madd_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000682{
683 return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)a, (__v8hi)b);
684}
685
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000686static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000687_mm_max_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000688{
689 return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)a, (__v8hi)b);
690}
691
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000692static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000693_mm_max_epu8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000694{
695 return (__m128i)__builtin_ia32_pmaxub128((__v16qi)a, (__v16qi)b);
696}
697
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000698static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000699_mm_min_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000700{
701 return (__m128i)__builtin_ia32_pminsw128((__v8hi)a, (__v8hi)b);
702}
703
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000704static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000705_mm_min_epu8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000706{
707 return (__m128i)__builtin_ia32_pminub128((__v16qi)a, (__v16qi)b);
708}
709
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000710static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000711_mm_mulhi_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000712{
713 return (__m128i)__builtin_ia32_pmulhw128((__v8hi)a, (__v8hi)b);
714}
715
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000716static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000717_mm_mulhi_epu16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000718{
719 return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)a, (__v8hi)b);
720}
721
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000722static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000723_mm_mullo_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000724{
Eli Friedman098136a2009-06-06 08:08:06 +0000725 return (__m128i)((__v8hi)a * (__v8hi)b);
Anders Carlsson07603aa2008-12-24 02:41:00 +0000726}
727
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000728static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000729_mm_mul_su32(__m64 a, __m64 b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000730{
731 return __builtin_ia32_pmuludq((__v2si)a, (__v2si)b);
732}
733
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000734static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000735_mm_mul_epu32(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000736{
737 return __builtin_ia32_pmuludq128((__v4si)a, (__v4si)b);
738}
739
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000740static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Anders Carlssonae8ecdd2009-04-06 21:55:22 +0000741_mm_sad_epu8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000742{
743 return __builtin_ia32_psadbw128((__v16qi)a, (__v16qi)b);
744}
745
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000746static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000747_mm_sub_epi8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000748{
749 return (__m128i)((__v16qi)a - (__v16qi)b);
750}
751
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000752static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000753_mm_sub_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000754{
755 return (__m128i)((__v8hi)a - (__v8hi)b);
756}
757
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000758static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000759_mm_sub_epi32(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000760{
761 return (__m128i)((__v4si)a - (__v4si)b);
762}
763
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000764static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000765_mm_sub_si64(__m64 a, __m64 b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000766{
767 return a - b;
768}
769
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000770static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000771_mm_sub_epi64(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000772{
773 return a - b;
774}
775
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000776static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000777_mm_subs_epi8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000778{
779 return (__m128i)__builtin_ia32_psubsb128((__v16qi)a, (__v16qi)b);
780}
781
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000782static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000783_mm_subs_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000784{
785 return (__m128i)__builtin_ia32_psubsw128((__v8hi)a, (__v8hi)b);
786}
787
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000788static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000789_mm_subs_epu8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000790{
791 return (__m128i)__builtin_ia32_psubusb128((__v16qi)a, (__v16qi)b);
792}
793
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000794static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000795_mm_subs_epu16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000796{
797 return (__m128i)__builtin_ia32_psubusw128((__v8hi)a, (__v8hi)b);
798}
799
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000800static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000801_mm_and_si128(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000802{
Eli Friedman80c80042009-06-06 02:13:04 +0000803 return a & b;
Anders Carlsson0727df02008-12-25 23:48:58 +0000804}
805
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000806static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000807_mm_andnot_si128(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000808{
Eli Friedman80c80042009-06-06 02:13:04 +0000809 return ~a & b;
Anders Carlsson0727df02008-12-25 23:48:58 +0000810}
811
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000812static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000813_mm_or_si128(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000814{
Eli Friedman80c80042009-06-06 02:13:04 +0000815 return a | b;
Anders Carlsson0727df02008-12-25 23:48:58 +0000816}
817
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000818static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000819_mm_xor_si128(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000820{
Eli Friedman80c80042009-06-06 02:13:04 +0000821 return a ^ b;
Anders Carlsson0727df02008-12-25 23:48:58 +0000822}
823
Bob Wilson32bae372011-11-05 06:08:06 +0000824#define _mm_slli_si128(a, count) __extension__ ({ \
825 __m128i __a = (a); \
826 (__m128i)__builtin_ia32_pslldqi128(__a, (count)*8); })
Anders Carlsson0727df02008-12-25 23:48:58 +0000827
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000828static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000829_mm_slli_epi16(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000830{
831 return (__m128i)__builtin_ia32_psllwi128((__v8hi)a, count);
832}
833
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000834static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000835_mm_sll_epi16(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000836{
837 return (__m128i)__builtin_ia32_psllw128((__v8hi)a, (__v8hi)count);
838}
839
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000840static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000841_mm_slli_epi32(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000842{
843 return (__m128i)__builtin_ia32_pslldi128((__v4si)a, count);
844}
845
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000846static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000847_mm_sll_epi32(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000848{
849 return (__m128i)__builtin_ia32_pslld128((__v4si)a, (__v4si)count);
850}
851
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000852static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000853_mm_slli_epi64(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000854{
855 return __builtin_ia32_psllqi128(a, count);
856}
857
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000858static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000859_mm_sll_epi64(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000860{
861 return __builtin_ia32_psllq128(a, count);
862}
863
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000864static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000865_mm_srai_epi16(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000866{
867 return (__m128i)__builtin_ia32_psrawi128((__v8hi)a, count);
868}
869
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000870static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000871_mm_sra_epi16(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000872{
873 return (__m128i)__builtin_ia32_psraw128((__v8hi)a, (__v8hi)count);
874}
875
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000876static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000877_mm_srai_epi32(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000878{
879 return (__m128i)__builtin_ia32_psradi128((__v4si)a, count);
880}
881
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000882static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000883_mm_sra_epi32(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000884{
885 return (__m128i)__builtin_ia32_psrad128((__v4si)a, (__v4si)count);
886}
887
Chris Lattnerfcb0b252010-10-01 06:58:49 +0000888
Bob Wilson32bae372011-11-05 06:08:06 +0000889#define _mm_srli_si128(a, count) __extension__ ({ \
890 __m128i __a = (a); \
891 (__m128i)__builtin_ia32_psrldqi128(__a, (count)*8); })
Anders Carlsson0727df02008-12-25 23:48:58 +0000892
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000893static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000894_mm_srli_epi16(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000895{
896 return (__m128i)__builtin_ia32_psrlwi128((__v8hi)a, count);
897}
898
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000899static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000900_mm_srl_epi16(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000901{
902 return (__m128i)__builtin_ia32_psrlw128((__v8hi)a, (__v8hi)count);
903}
904
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000905static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000906_mm_srli_epi32(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000907{
908 return (__m128i)__builtin_ia32_psrldi128((__v4si)a, count);
909}
910
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000911static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000912_mm_srl_epi32(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000913{
914 return (__m128i)__builtin_ia32_psrld128((__v4si)a, (__v4si)count);
915}
916
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000917static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000918_mm_srli_epi64(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000919{
920 return __builtin_ia32_psrlqi128(a, count);
921}
922
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000923static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000924_mm_srl_epi64(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000925{
926 return __builtin_ia32_psrlq128(a, count);
927}
928
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000929static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000930_mm_cmpeq_epi8(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000931{
Eli Friedman3a266f22009-07-22 17:08:01 +0000932 return (__m128i)((__v16qi)a == (__v16qi)b);
Anders Carlsson0727df02008-12-25 23:48:58 +0000933}
934
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000935static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000936_mm_cmpeq_epi16(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000937{
Eli Friedman3a266f22009-07-22 17:08:01 +0000938 return (__m128i)((__v8hi)a == (__v8hi)b);
Anders Carlsson0727df02008-12-25 23:48:58 +0000939}
940
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000941static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000942_mm_cmpeq_epi32(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000943{
Eli Friedman3a266f22009-07-22 17:08:01 +0000944 return (__m128i)((__v4si)a == (__v4si)b);
Anders Carlsson0727df02008-12-25 23:48:58 +0000945}
946
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000947static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000948_mm_cmpgt_epi8(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000949{
Nick Lewycky279114c2012-02-04 02:16:48 +0000950 /* This function always performs a signed comparison, but __v16qi is a char
951 which may be signed or unsigned. */
Nick Lewyckyf42f85c2012-02-03 23:57:48 +0000952 typedef signed char __v16qs __attribute__((__vector_size__(16)));
953 return (__m128i)((__v16qs)a > (__v16qs)b);
Anders Carlsson0727df02008-12-25 23:48:58 +0000954}
955
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000956static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000957_mm_cmpgt_epi16(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000958{
Eli Friedman3a266f22009-07-22 17:08:01 +0000959 return (__m128i)((__v8hi)a > (__v8hi)b);
Anders Carlsson0727df02008-12-25 23:48:58 +0000960}
961
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000962static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000963_mm_cmpgt_epi32(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000964{
Eli Friedman3a266f22009-07-22 17:08:01 +0000965 return (__m128i)((__v4si)a > (__v4si)b);
Anders Carlsson0727df02008-12-25 23:48:58 +0000966}
967
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000968static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000969_mm_cmplt_epi8(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000970{
Eli Friedman3a266f22009-07-22 17:08:01 +0000971 return _mm_cmpgt_epi8(b,a);
Anders Carlsson0727df02008-12-25 23:48:58 +0000972}
973
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000974static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000975_mm_cmplt_epi16(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000976{
Eli Friedman3a266f22009-07-22 17:08:01 +0000977 return _mm_cmpgt_epi16(b,a);
Anders Carlsson0727df02008-12-25 23:48:58 +0000978}
979
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000980static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000981_mm_cmplt_epi32(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000982{
Eli Friedman3a266f22009-07-22 17:08:01 +0000983 return _mm_cmpgt_epi32(b,a);
Anders Carlsson0727df02008-12-25 23:48:58 +0000984}
985
986#ifdef __x86_64__
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000987static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000988_mm_cvtsi64_sd(__m128d a, long long b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000989{
Eli Friedman80c80042009-06-06 02:13:04 +0000990 a[0] = b;
991 return a;
Anders Carlsson0727df02008-12-25 23:48:58 +0000992}
993
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000994static __inline__ long long __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000995_mm_cvtsd_si64(__m128d a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000996{
997 return __builtin_ia32_cvtsd2si64(a);
998}
999
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001000static __inline__ long long __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001001_mm_cvttsd_si64(__m128d a)
Anders Carlsson0727df02008-12-25 23:48:58 +00001002{
Eli Friedman80c80042009-06-06 02:13:04 +00001003 return a[0];
Anders Carlsson0727df02008-12-25 23:48:58 +00001004}
1005#endif
1006
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001007static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001008_mm_cvtepi32_ps(__m128i a)
Anders Carlsson0727df02008-12-25 23:48:58 +00001009{
1010 return __builtin_ia32_cvtdq2ps((__v4si)a);
1011}
1012
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001013static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001014_mm_cvtps_epi32(__m128 a)
Anders Carlsson0727df02008-12-25 23:48:58 +00001015{
1016 return (__m128i)__builtin_ia32_cvtps2dq(a);
1017}
1018
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001019static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001020_mm_cvttps_epi32(__m128 a)
Anders Carlsson0727df02008-12-25 23:48:58 +00001021{
1022 return (__m128i)__builtin_ia32_cvttps2dq(a);
1023}
1024
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001025static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001026_mm_cvtsi32_si128(int a)
Anders Carlsson0727df02008-12-25 23:48:58 +00001027{
1028 return (__m128i)(__v4si){ a, 0, 0, 0 };
1029}
1030
1031#ifdef __x86_64__
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001032static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001033_mm_cvtsi64_si128(long long a)
Anders Carlsson0727df02008-12-25 23:48:58 +00001034{
1035 return (__m128i){ a, 0 };
1036}
1037#endif
1038
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001039static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001040_mm_cvtsi128_si32(__m128i a)
Anders Carlsson0727df02008-12-25 23:48:58 +00001041{
1042 __v4si b = (__v4si)a;
1043 return b[0];
1044}
1045
1046#ifdef __x86_64__
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001047static __inline__ long long __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001048_mm_cvtsi128_si64(__m128i a)
Anders Carlsson0727df02008-12-25 23:48:58 +00001049{
1050 return a[0];
1051}
1052#endif
1053
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001054static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001055_mm_load_si128(__m128i const *p)
Anders Carlsson0727df02008-12-25 23:48:58 +00001056{
1057 return *p;
1058}
1059
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001060static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001061_mm_loadu_si128(__m128i const *p)
Anders Carlsson0727df02008-12-25 23:48:58 +00001062{
Bill Wendlingeed92a12011-05-13 00:11:39 +00001063 struct __loadu_si128 {
1064 __m128i v;
Bill Wendling8e3ec9c2011-05-13 01:24:00 +00001065 } __attribute__((packed, may_alias));
Bill Wendlingeed92a12011-05-13 00:11:39 +00001066 return ((struct __loadu_si128*)p)->v;
Anders Carlsson0727df02008-12-25 23:48:58 +00001067}
1068
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001069static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001070_mm_loadl_epi64(__m128i const *p)
Anders Carlsson0727df02008-12-25 23:48:58 +00001071{
Eli Friedman7c06f6b2011-09-15 23:15:27 +00001072 struct __mm_loadl_epi64_struct {
1073 long long u;
1074 } __attribute__((__packed__, __may_alias__));
1075 return (__m128i) { ((struct __mm_loadl_epi64_struct*)p)->u, 0};
Anders Carlsson0727df02008-12-25 23:48:58 +00001076}
1077
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001078static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Anders Carlsson9436ed52009-09-18 17:03:55 +00001079_mm_set_epi64x(long long q1, long long q0)
1080{
1081 return (__m128i){ q0, q1 };
1082}
1083
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001084static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001085_mm_set_epi64(__m64 q1, __m64 q0)
Anders Carlsson0727df02008-12-25 23:48:58 +00001086{
1087 return (__m128i){ (long long)q0, (long long)q1 };
1088}
1089
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001090static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001091_mm_set_epi32(int i3, int i2, int i1, int i0)
Anders Carlsson0727df02008-12-25 23:48:58 +00001092{
1093 return (__m128i)(__v4si){ i0, i1, i2, i3};
1094}
1095
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001096static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001097_mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0)
Anders Carlsson0727df02008-12-25 23:48:58 +00001098{
1099 return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
1100}
1101
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001102static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001103_mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
Anders Carlsson0727df02008-12-25 23:48:58 +00001104{
1105 return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
1106}
1107
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001108static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Anders Carlsson9436ed52009-09-18 17:03:55 +00001109_mm_set1_epi64x(long long q)
1110{
1111 return (__m128i){ q, q };
1112}
1113
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001114static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001115_mm_set1_epi64(__m64 q)
Anders Carlsson0727df02008-12-25 23:48:58 +00001116{
1117 return (__m128i){ (long long)q, (long long)q };
1118}
1119
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001120static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001121_mm_set1_epi32(int i)
Anders Carlsson0727df02008-12-25 23:48:58 +00001122{
1123 return (__m128i)(__v4si){ i, i, i, i };
1124}
1125
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001126static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001127_mm_set1_epi16(short w)
Anders Carlsson0727df02008-12-25 23:48:58 +00001128{
1129 return (__m128i)(__v8hi){ w, w, w, w, w, w, w, w };
1130}
1131
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001132static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001133_mm_set1_epi8(char b)
Anders Carlsson0727df02008-12-25 23:48:58 +00001134{
1135 return (__m128i)(__v16qi){ b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, b };
1136}
1137
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001138static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001139_mm_setr_epi64(__m64 q0, __m64 q1)
Anders Carlsson0727df02008-12-25 23:48:58 +00001140{
1141 return (__m128i){ (long long)q0, (long long)q1 };
1142}
1143
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001144static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001145_mm_setr_epi32(int i0, int i1, int i2, int i3)
Anders Carlsson0727df02008-12-25 23:48:58 +00001146{
1147 return (__m128i)(__v4si){ i0, i1, i2, i3};
1148}
1149
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001150static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001151_mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7)
Anders Carlsson0727df02008-12-25 23:48:58 +00001152{
1153 return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
1154}
1155
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001156static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001157_mm_setr_epi8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15)
Anders Carlsson0727df02008-12-25 23:48:58 +00001158{
1159 return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
1160}
1161
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001162static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001163_mm_setzero_si128(void)
Anders Carlsson0727df02008-12-25 23:48:58 +00001164{
1165 return (__m128i){ 0LL, 0LL };
1166}
1167
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001168static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001169_mm_store_si128(__m128i *p, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +00001170{
1171 *p = b;
1172}
1173
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001174static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001175_mm_storeu_si128(__m128i *p, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +00001176{
1177 __builtin_ia32_storedqu((char *)p, (__v16qi)b);
1178}
1179
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001180static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001181_mm_maskmoveu_si128(__m128i d, __m128i n, char *p)
Anders Carlsson0727df02008-12-25 23:48:58 +00001182{
1183 __builtin_ia32_maskmovdqu((__v16qi)d, (__v16qi)n, p);
1184}
1185
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001186static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001187_mm_storel_epi64(__m128i *p, __m128i a)
Anders Carlsson0727df02008-12-25 23:48:58 +00001188{
1189 __builtin_ia32_storelv4si((__v2si *)p, a);
1190}
1191
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001192static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001193_mm_stream_pd(double *p, __m128d a)
Anders Carlsson0727df02008-12-25 23:48:58 +00001194{
1195 __builtin_ia32_movntpd(p, a);
1196}
1197
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001198static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001199_mm_stream_si128(__m128i *p, __m128i a)
Anders Carlsson0727df02008-12-25 23:48:58 +00001200{
1201 __builtin_ia32_movntdq(p, a);
1202}
1203
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001204static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001205_mm_stream_si32(int *p, int a)
Anders Carlsson0727df02008-12-25 23:48:58 +00001206{
1207 __builtin_ia32_movnti(p, a);
1208}
1209
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001210static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001211_mm_clflush(void const *p)
Anders Carlsson0727df02008-12-25 23:48:58 +00001212{
1213 __builtin_ia32_clflush(p);
1214}
1215
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001216static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001217_mm_lfence(void)
Anders Carlsson0727df02008-12-25 23:48:58 +00001218{
1219 __builtin_ia32_lfence();
1220}
1221
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001222static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001223_mm_mfence(void)
Anders Carlsson0727df02008-12-25 23:48:58 +00001224{
1225 __builtin_ia32_mfence();
1226}
1227
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001228static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001229_mm_packs_epi16(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001230{
1231 return (__m128i)__builtin_ia32_packsswb128((__v8hi)a, (__v8hi)b);
1232}
1233
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001234static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001235_mm_packs_epi32(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001236{
1237 return (__m128i)__builtin_ia32_packssdw128((__v4si)a, (__v4si)b);
1238}
1239
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001240static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001241_mm_packus_epi16(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001242{
1243 return (__m128i)__builtin_ia32_packuswb128((__v8hi)a, (__v8hi)b);
1244}
1245
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001246static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001247_mm_extract_epi16(__m128i a, int imm)
Anders Carlsson45470752008-12-26 00:45:50 +00001248{
1249 __v8hi b = (__v8hi)a;
Chris Lattnerd6b84b92010-08-20 16:08:33 +00001250 return (unsigned short)b[imm];
Anders Carlsson45470752008-12-26 00:45:50 +00001251}
1252
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001253static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001254_mm_insert_epi16(__m128i a, int b, int imm)
Anders Carlsson45470752008-12-26 00:45:50 +00001255{
Eli Friedman80c80042009-06-06 02:13:04 +00001256 __v8hi c = (__v8hi)a;
1257 c[imm & 7] = b;
Eli Friedman17d2e3a2009-06-06 03:45:06 +00001258 return (__m128i)c;
Anders Carlsson45470752008-12-26 00:45:50 +00001259}
1260
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001261static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001262_mm_movemask_epi8(__m128i a)
Anders Carlsson45470752008-12-26 00:45:50 +00001263{
1264 return __builtin_ia32_pmovmskb128((__v16qi)a);
1265}
1266
Bob Wilson32bae372011-11-05 06:08:06 +00001267#define _mm_shuffle_epi32(a, imm) __extension__ ({ \
1268 __m128i __a = (a); \
1269 (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si) _mm_set1_epi32(0), \
1270 (imm) & 0x3, ((imm) & 0xc) >> 2, \
1271 ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); })
Chris Lattner85e59d72011-04-25 20:42:40 +00001272
Bob Wilson32bae372011-11-05 06:08:06 +00001273#define _mm_shufflelo_epi16(a, imm) __extension__ ({ \
1274 __m128i __a = (a); \
1275 (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \
1276 (imm) & 0x3, ((imm) & 0xc) >> 2, \
1277 ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
1278 4, 5, 6, 7); })
Chris Lattner85e59d72011-04-25 20:42:40 +00001279
Bob Wilson32bae372011-11-05 06:08:06 +00001280#define _mm_shufflehi_epi16(a, imm) __extension__ ({ \
1281 __m128i __a = (a); \
1282 (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \
1283 0, 1, 2, 3, \
1284 4 + (((imm) & 0x03) >> 0), \
1285 4 + (((imm) & 0x0c) >> 2), \
1286 4 + (((imm) & 0x30) >> 4), \
1287 4 + (((imm) & 0xc0) >> 6)); })
Anders Carlsson45470752008-12-26 00:45:50 +00001288
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001289static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001290_mm_unpackhi_epi8(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001291{
Anders Carlsson92d66862008-12-26 00:50:47 +00001292 return (__m128i)__builtin_shufflevector((__v16qi)a, (__v16qi)b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
Anders Carlsson45470752008-12-26 00:45:50 +00001293}
1294
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001295static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001296_mm_unpackhi_epi16(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001297{
Anders Carlsson92d66862008-12-26 00:50:47 +00001298 return (__m128i)__builtin_shufflevector((__v8hi)a, (__v8hi)b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);
Anders Carlsson45470752008-12-26 00:45:50 +00001299}
1300
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001301static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001302_mm_unpackhi_epi32(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001303{
Anders Carlsson92d66862008-12-26 00:50:47 +00001304 return (__m128i)__builtin_shufflevector((__v4si)a, (__v4si)b, 2, 4+2, 3, 4+3);
Anders Carlsson45470752008-12-26 00:45:50 +00001305}
1306
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001307static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001308_mm_unpackhi_epi64(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001309{
Anders Carlsson92d66862008-12-26 00:50:47 +00001310 return (__m128i)__builtin_shufflevector(a, b, 1, 2+1);
Anders Carlsson45470752008-12-26 00:45:50 +00001311}
1312
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001313static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001314_mm_unpacklo_epi8(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001315{
Anders Carlsson92d66862008-12-26 00:50:47 +00001316 return (__m128i)__builtin_shufflevector((__v16qi)a, (__v16qi)b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);
Anders Carlsson45470752008-12-26 00:45:50 +00001317}
1318
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001319static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001320_mm_unpacklo_epi16(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001321{
Anders Carlsson92d66862008-12-26 00:50:47 +00001322 return (__m128i)__builtin_shufflevector((__v8hi)a, (__v8hi)b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);
Anders Carlsson45470752008-12-26 00:45:50 +00001323}
1324
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001325static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001326_mm_unpacklo_epi32(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001327{
Anders Carlsson92d66862008-12-26 00:50:47 +00001328 return (__m128i)__builtin_shufflevector((__v4si)a, (__v4si)b, 0, 4+0, 1, 4+1);
Anders Carlsson45470752008-12-26 00:45:50 +00001329}
1330
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001331static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001332_mm_unpacklo_epi64(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001333{
Anders Carlsson92d66862008-12-26 00:50:47 +00001334 return (__m128i)__builtin_shufflevector(a, b, 0, 2+0);
Anders Carlsson45470752008-12-26 00:45:50 +00001335}
1336
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001337static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001338_mm_movepi64_pi64(__m128i a)
Anders Carlsson45470752008-12-26 00:45:50 +00001339{
1340 return (__m64)a[0];
1341}
1342
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001343static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001344_mm_movpi64_pi64(__m64 a)
Anders Carlsson45470752008-12-26 00:45:50 +00001345{
1346 return (__m128i){ (long long)a, 0 };
1347}
1348
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001349static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001350_mm_move_epi64(__m128i a)
Anders Carlsson45470752008-12-26 00:45:50 +00001351{
Eli Friedman80c80042009-06-06 02:13:04 +00001352 return __builtin_shufflevector(a, (__m128i){ 0 }, 0, 2);
Anders Carlsson45470752008-12-26 00:45:50 +00001353}
1354
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001355static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001356_mm_unpackhi_pd(__m128d a, __m128d b)
Anders Carlsson45470752008-12-26 00:45:50 +00001357{
Anders Carlsson92d66862008-12-26 00:50:47 +00001358 return __builtin_shufflevector(a, b, 1, 2+1);
Anders Carlsson45470752008-12-26 00:45:50 +00001359}
1360
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001361static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001362_mm_unpacklo_pd(__m128d a, __m128d b)
Anders Carlsson45470752008-12-26 00:45:50 +00001363{
Anders Carlsson92d66862008-12-26 00:50:47 +00001364 return __builtin_shufflevector(a, b, 0, 2+0);
Anders Carlsson45470752008-12-26 00:45:50 +00001365}
1366
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001367static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001368_mm_movemask_pd(__m128d a)
Anders Carlsson45470752008-12-26 00:45:50 +00001369{
1370 return __builtin_ia32_movmskpd(a);
1371}
1372
Bob Wilson32bae372011-11-05 06:08:06 +00001373#define _mm_shuffle_pd(a, b, i) __extension__ ({ \
1374 __m128d __a = (a); \
1375 __m128d __b = (b); \
1376 __builtin_shufflevector(__a, __b, (i) & 1, (((i) & 2) >> 1) + 2); })
Anders Carlsson45470752008-12-26 00:45:50 +00001377
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001378static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001379_mm_castpd_ps(__m128d in)
Anders Carlsson45470752008-12-26 00:45:50 +00001380{
1381 return (__m128)in;
1382}
1383
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001384static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001385_mm_castpd_si128(__m128d in)
Anders Carlsson45470752008-12-26 00:45:50 +00001386{
1387 return (__m128i)in;
1388}
1389
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001390static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001391_mm_castps_pd(__m128 in)
Anders Carlsson45470752008-12-26 00:45:50 +00001392{
1393 return (__m128d)in;
1394}
1395
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001396static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001397_mm_castps_si128(__m128 in)
Anders Carlsson45470752008-12-26 00:45:50 +00001398{
1399 return (__m128i)in;
1400}
1401
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001402static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001403_mm_castsi128_ps(__m128i in)
Anders Carlsson45470752008-12-26 00:45:50 +00001404{
1405 return (__m128)in;
1406}
1407
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001408static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001409_mm_castsi128_pd(__m128i in)
Anders Carlsson45470752008-12-26 00:45:50 +00001410{
1411 return (__m128d)in;
1412}
1413
Chris Lattner1bddbcb2010-03-22 18:14:12 +00001414static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001415_mm_pause(void)
Anders Carlssonf1bc6602008-12-26 00:49:43 +00001416{
Anders Carlsson4bcd44d2008-12-26 02:22:10 +00001417 __asm__ volatile ("pause");
Anders Carlssonf1bc6602008-12-26 00:49:43 +00001418}
1419
Anders Carlssonbbd1fa22009-01-21 01:49:39 +00001420#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
Anders Carlssonf1bc6602008-12-26 00:49:43 +00001421
Anders Carlsson37f2f002008-12-24 01:45:22 +00001422#endif /* __SSE2__ */
1423
1424#endif /* __EMMINTRIN_H */