blob: 1061bf3b79aba8f7aa17471718006a8b3ea95775 [file] [log] [blame]
Anders Carlsson37f2f002008-12-24 01:45:22 +00001/*===---- xmmintrin.h - SSE intrinsics -------------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __EMMINTRIN_H
25#define __EMMINTRIN_H
26
27#ifndef __SSE2__
28#error "SSE2 instruction set not enabled"
29#else
30
31#include <xmmintrin.h>
32
33typedef double __m128d __attribute__((__vector_size__(16)));
34typedef long long __m128i __attribute__((__vector_size__(16)));
35
36typedef int __v4si __attribute__((__vector_size__(16)));
Anders Carlsson07603aa2008-12-24 02:41:00 +000037typedef short __v8hi __attribute__((__vector_size__(16)));
Anders Carlsson445afa02008-12-24 02:11:54 +000038typedef char __v16qi __attribute__((__vector_size__(16)));
Anders Carlsson37f2f002008-12-24 01:45:22 +000039
Anders Carlssona2f12ae2009-02-14 01:00:11 +000040static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000041_mm_add_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000042{
Eli Friedman80c80042009-06-06 02:13:04 +000043 a[0] += b[0];
44 return a;
Anders Carlsson37f2f002008-12-24 01:45:22 +000045}
46
Anders Carlssona2f12ae2009-02-14 01:00:11 +000047static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000048_mm_add_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000049{
50 return a + b;
51}
52
Anders Carlssona2f12ae2009-02-14 01:00:11 +000053static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000054_mm_sub_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000055{
Eli Friedman80c80042009-06-06 02:13:04 +000056 a[0] -= b[0];
57 return a;
Anders Carlsson37f2f002008-12-24 01:45:22 +000058}
59
Anders Carlssona2f12ae2009-02-14 01:00:11 +000060static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000061_mm_sub_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000062{
63 return a - b;
64}
65
Anders Carlssona2f12ae2009-02-14 01:00:11 +000066static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000067_mm_mul_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000068{
Eli Friedman80c80042009-06-06 02:13:04 +000069 a[0] *= b[0];
70 return a;
Anders Carlsson37f2f002008-12-24 01:45:22 +000071}
72
Anders Carlssona2f12ae2009-02-14 01:00:11 +000073static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000074_mm_mul_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000075{
76 return a * b;
77}
78
Anders Carlssona2f12ae2009-02-14 01:00:11 +000079static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000080_mm_div_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000081{
Eli Friedman80c80042009-06-06 02:13:04 +000082 a[0] /= b[0];
83 return a;
Anders Carlsson37f2f002008-12-24 01:45:22 +000084}
85
Anders Carlssona2f12ae2009-02-14 01:00:11 +000086static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000087_mm_div_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000088{
89 return a / b;
90}
91
Anders Carlssona2f12ae2009-02-14 01:00:11 +000092static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000093_mm_sqrt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000094{
95 __m128d c = __builtin_ia32_sqrtsd(b);
96 return (__m128d) { c[0], a[1] };
97}
98
Anders Carlssona2f12ae2009-02-14 01:00:11 +000099static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000100_mm_sqrt_pd(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000101{
102 return __builtin_ia32_sqrtpd(a);
103}
104
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000105static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000106_mm_min_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000107{
108 return __builtin_ia32_minsd(a, b);
109}
110
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000111static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000112_mm_min_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000113{
114 return __builtin_ia32_minpd(a, b);
115}
116
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000117static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000118_mm_max_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000119{
120 return __builtin_ia32_maxsd(a, b);
121}
122
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000123static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000124_mm_max_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000125{
126 return __builtin_ia32_maxpd(a, b);
127}
128
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000129static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000130_mm_and_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000131{
Eli Friedman80c80042009-06-06 02:13:04 +0000132 return (__m128)((__v4si)a & (__v4si)b);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000133}
134
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000135static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000136_mm_andnot_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000137{
Eli Friedman80c80042009-06-06 02:13:04 +0000138 return (__m128)(~(__v4si)a & (__v4si)b);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000139}
140
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000141static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000142_mm_or_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000143{
Eli Friedman80c80042009-06-06 02:13:04 +0000144 return (__m128)((__v4si)a | (__v4si)b);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000145}
146
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000147static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000148_mm_xor_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000149{
Eli Friedman80c80042009-06-06 02:13:04 +0000150 return (__m128)((__v4si)a ^ (__v4si)b);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000151}
152
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000153static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000154_mm_cmpeq_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000155{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000156 return (__m128d)__builtin_ia32_cmppd(a, b, 0);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000157}
158
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000159static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000160_mm_cmplt_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000161{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000162 return (__m128d)__builtin_ia32_cmppd(a, b, 1);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000163}
164
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000165static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000166_mm_cmple_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000167{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000168 return (__m128d)__builtin_ia32_cmppd(a, b, 2);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000169}
170
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000171static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000172_mm_cmpgt_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000173{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000174 return (__m128d)__builtin_ia32_cmppd(b, a, 1);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000175}
176
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000177static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000178_mm_cmpge_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000179{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000180 return (__m128d)__builtin_ia32_cmppd(b, a, 2);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000181}
182
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000183static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000184_mm_cmpord_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000185{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000186 return (__m128d)__builtin_ia32_cmppd(a, b, 7);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000187}
188
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000189static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000190_mm_cmpunord_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000191{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000192 return (__m128d)__builtin_ia32_cmppd(a, b, 3);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000193}
194
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000195static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000196_mm_cmpneq_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000197{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000198 return (__m128d)__builtin_ia32_cmppd(a, b, 4);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000199}
200
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000201static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000202_mm_cmpnlt_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000203{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000204 return (__m128d)__builtin_ia32_cmppd(a, b, 5);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000205}
206
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000207static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000208_mm_cmpnle_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000209{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000210 return (__m128d)__builtin_ia32_cmppd(a, b, 6);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000211}
212
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000213static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000214_mm_cmpngt_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000215{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000216 return (__m128d)__builtin_ia32_cmppd(b, a, 5);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000217}
218
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000219static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000220_mm_cmpnge_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000221{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000222 return (__m128d)__builtin_ia32_cmppd(b, a, 6);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000223}
224
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000225static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000226_mm_cmpeq_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000227{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000228 return (__m128d)__builtin_ia32_cmpsd(a, b, 0);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000229}
230
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000231static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000232_mm_cmplt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000233{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000234 return (__m128d)__builtin_ia32_cmpsd(a, b, 1);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000235}
236
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000237static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000238_mm_cmple_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000239{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000240 return (__m128d)__builtin_ia32_cmpsd(a, b, 2);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000241}
242
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000243static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000244_mm_cmpgt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000245{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000246 return (__m128d)__builtin_ia32_cmpsd(b, a, 1);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000247}
248
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000249static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000250_mm_cmpge_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000251{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000252 return (__m128d)__builtin_ia32_cmpsd(b, a, 2);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000253}
254
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000255static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000256_mm_cmpord_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000257{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000258 return (__m128d)__builtin_ia32_cmpsd(a, b, 7);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000259}
260
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000261static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000262_mm_cmpunord_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000263{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000264 return (__m128d)__builtin_ia32_cmpsd(a, b, 3);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000265}
266
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000267static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000268_mm_cmpneq_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000269{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000270 return (__m128d)__builtin_ia32_cmpsd(a, b, 4);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000271}
272
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000273static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000274_mm_cmpnlt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000275{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000276 return (__m128d)__builtin_ia32_cmpsd(a, b, 5);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000277}
278
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000279static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000280_mm_cmpnle_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000281{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000282 return (__m128d)__builtin_ia32_cmpsd(a, b, 6);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000283}
284
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000285static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000286_mm_cmpngt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000287{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000288 return (__m128d)__builtin_ia32_cmpsd(b, a, 5);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000289}
290
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000291static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000292_mm_cmpnge_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000293{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000294 return (__m128d)__builtin_ia32_cmpsd(b, a, 6);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000295}
296
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000297static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000298_mm_comieq_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000299{
300 return __builtin_ia32_comisdeq(a, b);
301}
302
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000303static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000304_mm_comilt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000305{
306 return __builtin_ia32_comisdlt(a, b);
307}
308
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000309static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000310_mm_comile_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000311{
312 return __builtin_ia32_comisdle(a, b);
313}
314
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000315static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000316_mm_comigt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000317{
318 return __builtin_ia32_comisdgt(a, b);
319}
320
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000321static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000322_mm_comineq_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000323{
324 return __builtin_ia32_comisdneq(a, b);
325}
326
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000327static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000328_mm_ucomieq_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000329{
330 return __builtin_ia32_ucomisdeq(a, b);
331}
332
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000333static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000334_mm_ucomilt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000335{
336 return __builtin_ia32_ucomisdlt(a, b);
337}
338
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000339static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000340_mm_ucomile_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000341{
342 return __builtin_ia32_ucomisdle(a, b);
343}
344
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000345static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000346_mm_ucomigt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000347{
348 return __builtin_ia32_ucomisdgt(a, b);
349}
350
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000351static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000352_mm_ucomineq_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000353{
354 return __builtin_ia32_ucomisdneq(a, b);
355}
356
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000357static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000358_mm_cvtpd_ps(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000359{
360 return __builtin_ia32_cvtpd2ps(a);
361}
362
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000363static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000364_mm_cvtps_pd(__m128 a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000365{
366 return __builtin_ia32_cvtps2pd(a);
367}
368
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000369static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000370_mm_cvtepi32_pd(__m128i a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000371{
372 return __builtin_ia32_cvtdq2pd((__v4si)a);
373}
374
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000375static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000376_mm_cvtpd_epi32(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000377{
378 return __builtin_ia32_cvtpd2dq(a);
379}
380
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000381static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000382_mm_cvtsd_si32(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000383{
384 return __builtin_ia32_cvtsd2si(a);
385}
386
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000387static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000388_mm_cvtsd_ss(__m128 a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000389{
Eli Friedman80c80042009-06-06 02:13:04 +0000390 a[0] = b[0];
391 return a;
Anders Carlsson37f2f002008-12-24 01:45:22 +0000392}
393
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000394static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000395_mm_cvtsi32_sd(__m128d a, int b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000396{
397 return __builtin_ia32_cvtsi2sd(a, b);
398}
399
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000400static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000401_mm_cvtss_sd(__m128d a, __m128 b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000402{
Eli Friedman80c80042009-06-06 02:13:04 +0000403 a[0] = b[0];
404 return a;
Anders Carlsson37f2f002008-12-24 01:45:22 +0000405}
406
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000407static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000408_mm_cvttpd_epi32(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000409{
410 return (__m128i)__builtin_ia32_cvttpd2dq(a);
411}
412
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000413static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000414_mm_cvttsd_si32(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000415{
Eli Friedman80c80042009-06-06 02:13:04 +0000416 return a[0];
Anders Carlsson37f2f002008-12-24 01:45:22 +0000417}
418
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000419static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000420_mm_cvtpd_pi32(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000421{
422 return (__m64)__builtin_ia32_cvtpd2pi(a);
423}
424
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000425static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000426_mm_cvttpd_pi32(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000427{
428 return (__m64)__builtin_ia32_cvttpd2pi(a);
429}
430
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000431static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000432_mm_cvtpi32_pd(__m64 a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000433{
434 return __builtin_ia32_cvtpi2pd((__v2si)a);
435}
436
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000437static inline double __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000438_mm_cvtsd_f64(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000439{
440 return a[0];
441}
442
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000443static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000444_mm_load_pd(double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000445{
446 return *(__m128d*)dp;
447}
448
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000449static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000450_mm_load1_pd(double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000451{
452 return (__m128d){ dp[0], dp[0] };
453}
454
Eli Friedmandb7351a2009-06-02 05:55:48 +0000455#define _mm_load_pd1(dp) _mm_load1_pd(dp)
456
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000457static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000458_mm_loadr_pd(double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000459{
460 return (__m128d){ dp[1], dp[0] };
461}
462
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000463static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000464_mm_loadu_pd(double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000465{
466 return __builtin_ia32_loadupd(dp);
467}
468
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000469static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000470_mm_load_sd(double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000471{
472 return (__m128d){ *dp, 0.0 };
473}
474
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000475static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000476_mm_loadh_pd(__m128d a, double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000477{
478 return __builtin_shufflevector(a, *(__m128d *)dp, 0, 2);
479}
480
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000481static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000482_mm_loadl_pd(__m128d a, double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000483{
484 return __builtin_shufflevector(a, *(__m128d *)dp, 2, 1);
485}
486
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000487static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000488_mm_set_sd(double w)
Anders Carlsson445afa02008-12-24 02:11:54 +0000489{
490 return (__m128d){ w, 0 };
491}
492
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000493static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000494_mm_set1_pd(double w)
Anders Carlsson445afa02008-12-24 02:11:54 +0000495{
496 return (__m128d){ w, w };
497}
498
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000499static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000500_mm_set_pd(double w, double x)
Anders Carlsson445afa02008-12-24 02:11:54 +0000501{
502 return (__m128d){ w, x };
503}
504
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000505static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000506_mm_setr_pd(double w, double x)
Anders Carlsson445afa02008-12-24 02:11:54 +0000507{
508 return (__m128d){ x, w };
509}
510
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000511static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000512_mm_setzero_pd(void)
Anders Carlsson445afa02008-12-24 02:11:54 +0000513{
514 return (__m128d){ 0, 0 };
515}
516
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000517static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000518_mm_move_sd(__m128d a, __m128d b)
Anders Carlsson445afa02008-12-24 02:11:54 +0000519{
520 return (__m128d){ b[0], a[1] };
521}
522
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000523static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000524_mm_store_sd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000525{
526 dp[0] = a[0];
527}
528
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000529static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000530_mm_store1_pd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000531{
532 dp[0] = a[0];
533 dp[1] = a[0];
534}
535
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000536static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000537_mm_store_pd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000538{
539 *(__m128d *)dp = a;
540}
541
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000542static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000543_mm_storeu_pd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000544{
545 __builtin_ia32_storeupd(dp, a);
546}
547
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000548static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000549_mm_storer_pd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000550{
551 dp[0] = a[1];
552 dp[1] = a[0];
553}
554
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000555static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000556_mm_storeh_pd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000557{
558 dp[0] = a[1];
559}
560
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000561static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000562_mm_storel_pd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000563{
564 dp[0] = a[0];
565}
566
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000567static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000568_mm_add_epi8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000569{
570 return (__m128i)((__v16qi)a + (__v16qi)b);
571}
572
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000573static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000574_mm_add_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000575{
576 return (__m128i)((__v8hi)a + (__v8hi)b);
577}
578
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000579static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000580_mm_add_epi32(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000581{
582 return (__m128i)((__v4si)a + (__v4si)b);
583}
584
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000585static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000586_mm_add_si64(__m64 a, __m64 b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000587{
588 return a + b;
589}
590
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000591static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000592_mm_add_epi64(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000593{
594 return a + b;
595}
596
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000597static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000598_mm_adds_epi8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000599{
600 return (__m128i)__builtin_ia32_paddsb128((__v16qi)a, (__v16qi)b);
601}
602
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000603static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000604_mm_adds_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000605{
606 return (__m128i)__builtin_ia32_paddsw128((__v8hi)a, (__v8hi)b);
607}
608
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000609static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000610_mm_adds_epu8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000611{
612 return (__m128i)__builtin_ia32_paddusb128((__v16qi)a, (__v16qi)b);
613}
614
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000615static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000616_mm_adds_epu16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000617{
618 return (__m128i)__builtin_ia32_paddusw128((__v8hi)a, (__v8hi)b);
619}
620
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000621static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000622_mm_avg_epu8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000623{
624 return (__m128i)__builtin_ia32_pavgb128((__v16qi)a, (__v16qi)b);
625}
626
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000627static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000628_mm_avg_epu16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000629{
630 return (__m128i)__builtin_ia32_pavgw128((__v8hi)a, (__v8hi)b);
631}
632
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000633static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000634_mm_madd_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000635{
636 return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)a, (__v8hi)b);
637}
638
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000639static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000640_mm_max_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000641{
642 return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)a, (__v8hi)b);
643}
644
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000645static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000646_mm_max_epu8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000647{
648 return (__m128i)__builtin_ia32_pmaxub128((__v16qi)a, (__v16qi)b);
649}
650
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000651static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000652_mm_min_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000653{
654 return (__m128i)__builtin_ia32_pminsw128((__v8hi)a, (__v8hi)b);
655}
656
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000657static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000658_mm_min_epu8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000659{
660 return (__m128i)__builtin_ia32_pminub128((__v16qi)a, (__v16qi)b);
661}
662
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000663static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000664_mm_mulhi_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000665{
666 return (__m128i)__builtin_ia32_pmulhw128((__v8hi)a, (__v8hi)b);
667}
668
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000669static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000670_mm_mulhi_epu16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000671{
672 return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)a, (__v8hi)b);
673}
674
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000675static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000676_mm_mullo_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000677{
678 return (__m128i)__builtin_ia32_pmullw128((__v8hi)a, (__v8hi)b);
679}
680
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000681static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000682_mm_mul_su32(__m64 a, __m64 b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000683{
684 return __builtin_ia32_pmuludq((__v2si)a, (__v2si)b);
685}
686
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000687static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000688_mm_mul_epu32(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000689{
690 return __builtin_ia32_pmuludq128((__v4si)a, (__v4si)b);
691}
692
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000693static inline __m128i __attribute__((__always_inline__, __nodebug__))
Anders Carlssonae8ecdd2009-04-06 21:55:22 +0000694_mm_sad_epu8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000695{
696 return __builtin_ia32_psadbw128((__v16qi)a, (__v16qi)b);
697}
698
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000699static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000700_mm_sub_epi8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000701{
702 return (__m128i)((__v16qi)a - (__v16qi)b);
703}
704
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000705static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000706_mm_sub_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000707{
708 return (__m128i)((__v8hi)a - (__v8hi)b);
709}
710
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000711static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000712_mm_sub_epi32(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000713{
714 return (__m128i)((__v4si)a - (__v4si)b);
715}
716
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000717static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000718_mm_sub_si64(__m64 a, __m64 b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000719{
720 return a - b;
721}
722
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000723static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000724_mm_sub_epi64(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000725{
726 return a - b;
727}
728
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000729static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000730_mm_subs_epi8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000731{
732 return (__m128i)__builtin_ia32_psubsb128((__v16qi)a, (__v16qi)b);
733}
734
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000735static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000736_mm_subs_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000737{
738 return (__m128i)__builtin_ia32_psubsw128((__v8hi)a, (__v8hi)b);
739}
740
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000741static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000742_mm_subs_epu8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000743{
744 return (__m128i)__builtin_ia32_psubusb128((__v16qi)a, (__v16qi)b);
745}
746
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000747static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000748_mm_subs_epu16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000749{
750 return (__m128i)__builtin_ia32_psubusw128((__v8hi)a, (__v8hi)b);
751}
752
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000753static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000754_mm_and_si128(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000755{
Eli Friedman80c80042009-06-06 02:13:04 +0000756 return a & b;
Anders Carlsson0727df02008-12-25 23:48:58 +0000757}
758
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000759static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000760_mm_andnot_si128(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000761{
Eli Friedman80c80042009-06-06 02:13:04 +0000762 return ~a & b;
Anders Carlsson0727df02008-12-25 23:48:58 +0000763}
764
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000765static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000766_mm_or_si128(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000767{
Eli Friedman80c80042009-06-06 02:13:04 +0000768 return a | b;
Anders Carlsson0727df02008-12-25 23:48:58 +0000769}
770
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000771static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000772_mm_xor_si128(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000773{
Eli Friedman80c80042009-06-06 02:13:04 +0000774 return a ^ b;
Anders Carlsson0727df02008-12-25 23:48:58 +0000775}
776
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000777static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000778_mm_slli_si128(__m128i a, int imm)
Anders Carlsson0727df02008-12-25 23:48:58 +0000779{
780 return __builtin_ia32_pslldqi128(a, imm * 8);
781}
782
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000783static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000784_mm_slli_epi16(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000785{
786 return (__m128i)__builtin_ia32_psllwi128((__v8hi)a, count);
787}
788
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000789static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000790_mm_sll_epi16(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000791{
792 return (__m128i)__builtin_ia32_psllw128((__v8hi)a, (__v8hi)count);
793}
794
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000795static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000796_mm_slli_epi32(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000797{
798 return (__m128i)__builtin_ia32_pslldi128((__v4si)a, count);
799}
800
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000801static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000802_mm_sll_epi32(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000803{
804 return (__m128i)__builtin_ia32_pslld128((__v4si)a, (__v4si)count);
805}
806
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000807static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000808_mm_slli_epi64(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000809{
810 return __builtin_ia32_psllqi128(a, count);
811}
812
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000813static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000814_mm_sll_epi64(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000815{
816 return __builtin_ia32_psllq128(a, count);
817}
818
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000819static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000820_mm_srai_epi16(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000821{
822 return (__m128i)__builtin_ia32_psrawi128((__v8hi)a, count);
823}
824
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000825static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000826_mm_sra_epi16(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000827{
828 return (__m128i)__builtin_ia32_psraw128((__v8hi)a, (__v8hi)count);
829}
830
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000831static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000832_mm_srai_epi32(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000833{
834 return (__m128i)__builtin_ia32_psradi128((__v4si)a, count);
835}
836
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000837static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000838_mm_sra_epi32(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000839{
840 return (__m128i)__builtin_ia32_psrad128((__v4si)a, (__v4si)count);
841}
842
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000843static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000844_mm_srli_si128(__m128i a, int imm)
Anders Carlsson0727df02008-12-25 23:48:58 +0000845{
846 return __builtin_ia32_psrldqi128(a, imm * 8);
847}
848
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000849static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000850_mm_srli_epi16(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000851{
852 return (__m128i)__builtin_ia32_psrlwi128((__v8hi)a, count);
853}
854
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000855static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000856_mm_srl_epi16(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000857{
858 return (__m128i)__builtin_ia32_psrlw128((__v8hi)a, (__v8hi)count);
859}
860
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000861static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000862_mm_srli_epi32(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000863{
864 return (__m128i)__builtin_ia32_psrldi128((__v4si)a, count);
865}
866
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000867static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000868_mm_srl_epi32(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000869{
870 return (__m128i)__builtin_ia32_psrld128((__v4si)a, (__v4si)count);
871}
872
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000873static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000874_mm_srli_epi64(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000875{
876 return __builtin_ia32_psrlqi128(a, count);
877}
878
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000879static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000880_mm_srl_epi64(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000881{
882 return __builtin_ia32_psrlq128(a, count);
883}
884
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000885static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000886_mm_cmpeq_epi8(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000887{
888 return (__m128i)__builtin_ia32_pcmpeqb128((__v16qi)a, (__v16qi)b);
889}
890
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000891static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000892_mm_cmpeq_epi16(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000893{
894 return (__m128i)__builtin_ia32_pcmpeqw128((__v8hi)a, (__v8hi)b);
895}
896
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000897static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000898_mm_cmpeq_epi32(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000899{
900 return (__m128i)__builtin_ia32_pcmpeqd128((__v4si)a, (__v4si)b);
901}
902
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000903static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000904_mm_cmpgt_epi8(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000905{
906 return (__m128i)__builtin_ia32_pcmpgtb128((__v16qi)a, (__v16qi)b);
907}
908
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000909static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000910_mm_cmpgt_epi16(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000911{
912 return (__m128i)__builtin_ia32_pcmpgtw128((__v8hi)a, (__v8hi)b);
913}
914
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000915static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000916_mm_cmpgt_epi32(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000917{
918 return (__m128i)__builtin_ia32_pcmpgtd128((__v4si)a, (__v4si)b);
919}
920
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000921static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000922_mm_cmplt_epi8(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000923{
924 return (__m128i)__builtin_ia32_pcmpgtb128((__v16qi)b, (__v16qi)a);
925}
926
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000927static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000928_mm_cmplt_epi16(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000929{
930 return (__m128i)__builtin_ia32_pcmpgtw128((__v8hi)b, (__v8hi)a);
931}
932
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000933static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000934_mm_cmplt_epi32(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000935{
936 return (__m128i)__builtin_ia32_pcmpgtd128((__v4si)b, (__v4si)a);
937}
938
939#ifdef __x86_64__
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000940static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000941_mm_cvtsi64_sd(__m128d a, long long b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000942{
Eli Friedman80c80042009-06-06 02:13:04 +0000943 a[0] = b;
944 return a;
Anders Carlsson0727df02008-12-25 23:48:58 +0000945}
946
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000947static inline long long __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000948_mm_cvtsd_si64(__m128d a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000949{
950 return __builtin_ia32_cvtsd2si64(a);
951}
952
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000953static inline long long __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000954_mm_cvttsd_si64(__m128d a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000955{
Eli Friedman80c80042009-06-06 02:13:04 +0000956 return a[0];
Anders Carlsson0727df02008-12-25 23:48:58 +0000957}
958#endif
959
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000960static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000961_mm_cvtepi32_ps(__m128i a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000962{
963 return __builtin_ia32_cvtdq2ps((__v4si)a);
964}
965
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000966static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000967_mm_cvtps_epi32(__m128 a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000968{
969 return (__m128i)__builtin_ia32_cvtps2dq(a);
970}
971
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000972static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000973_mm_cvttps_epi32(__m128 a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000974{
975 return (__m128i)__builtin_ia32_cvttps2dq(a);
976}
977
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000978static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000979_mm_cvtsi32_si128(int a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000980{
981 return (__m128i)(__v4si){ a, 0, 0, 0 };
982}
983
984#ifdef __x86_64__
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000985static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000986_mm_cvtsi64_si128(long long a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000987{
988 return (__m128i){ a, 0 };
989}
990#endif
991
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000992static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000993_mm_cvtsi128_si32(__m128i a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000994{
995 __v4si b = (__v4si)a;
996 return b[0];
997}
998
999#ifdef __x86_64__
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001000static inline long long __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001001_mm_cvtsi128_si64(__m128i a)
Anders Carlsson0727df02008-12-25 23:48:58 +00001002{
1003 return a[0];
1004}
1005#endif
1006
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001007static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001008_mm_load_si128(__m128i const *p)
Anders Carlsson0727df02008-12-25 23:48:58 +00001009{
1010 return *p;
1011}
1012
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001013static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001014_mm_loadu_si128(__m128i const *p)
Anders Carlsson0727df02008-12-25 23:48:58 +00001015{
1016 return (__m128i)__builtin_ia32_loaddqu((char const *)p);
1017}
1018
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001019static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001020_mm_loadl_epi64(__m128i const *p)
Anders Carlsson0727df02008-12-25 23:48:58 +00001021{
1022 return (__m128i)__builtin_ia32_loadlv4si((__v2si *)p);
1023}
1024
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001025static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001026_mm_set_epi64(__m64 q1, __m64 q0)
Anders Carlsson0727df02008-12-25 23:48:58 +00001027{
1028 return (__m128i){ (long long)q0, (long long)q1 };
1029}
1030
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001031static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001032_mm_set_epi32(int i3, int i2, int i1, int i0)
Anders Carlsson0727df02008-12-25 23:48:58 +00001033{
1034 return (__m128i)(__v4si){ i0, i1, i2, i3};
1035}
1036
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001037static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001038_mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0)
Anders Carlsson0727df02008-12-25 23:48:58 +00001039{
1040 return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
1041}
1042
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001043static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001044_mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
Anders Carlsson0727df02008-12-25 23:48:58 +00001045{
1046 return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
1047}
1048
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001049static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001050_mm_set1_epi64(__m64 q)
Anders Carlsson0727df02008-12-25 23:48:58 +00001051{
1052 return (__m128i){ (long long)q, (long long)q };
1053}
1054
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001055static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001056_mm_set1_epi32(int i)
Anders Carlsson0727df02008-12-25 23:48:58 +00001057{
1058 return (__m128i)(__v4si){ i, i, i, i };
1059}
1060
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001061static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001062_mm_set1_epi16(short w)
Anders Carlsson0727df02008-12-25 23:48:58 +00001063{
1064 return (__m128i)(__v8hi){ w, w, w, w, w, w, w, w };
1065}
1066
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001067static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001068_mm_set1_epi8(char b)
Anders Carlsson0727df02008-12-25 23:48:58 +00001069{
1070 return (__m128i)(__v16qi){ b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, b };
1071}
1072
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001073static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001074_mm_setr_epi64(__m64 q0, __m64 q1)
Anders Carlsson0727df02008-12-25 23:48:58 +00001075{
1076 return (__m128i){ (long long)q0, (long long)q1 };
1077}
1078
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001079static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001080_mm_setr_epi32(int i0, int i1, int i2, int i3)
Anders Carlsson0727df02008-12-25 23:48:58 +00001081{
1082 return (__m128i)(__v4si){ i0, i1, i2, i3};
1083}
1084
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001085static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001086_mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7)
Anders Carlsson0727df02008-12-25 23:48:58 +00001087{
1088 return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
1089}
1090
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001091static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001092_mm_setr_epi8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15)
Anders Carlsson0727df02008-12-25 23:48:58 +00001093{
1094 return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
1095}
1096
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001097static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001098_mm_setzero_si128(void)
Anders Carlsson0727df02008-12-25 23:48:58 +00001099{
1100 return (__m128i){ 0LL, 0LL };
1101}
1102
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001103static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001104_mm_store_si128(__m128i *p, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +00001105{
1106 *p = b;
1107}
1108
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001109static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001110_mm_storeu_si128(__m128i *p, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +00001111{
1112 __builtin_ia32_storedqu((char *)p, (__v16qi)b);
1113}
1114
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001115static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001116_mm_maskmoveu_si128(__m128i d, __m128i n, char *p)
Anders Carlsson0727df02008-12-25 23:48:58 +00001117{
1118 __builtin_ia32_maskmovdqu((__v16qi)d, (__v16qi)n, p);
1119}
1120
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001121static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001122_mm_storel_epi64(__m128i *p, __m128i a)
Anders Carlsson0727df02008-12-25 23:48:58 +00001123{
1124 __builtin_ia32_storelv4si((__v2si *)p, a);
1125}
1126
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001127static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001128_mm_stream_pd(double *p, __m128d a)
Anders Carlsson0727df02008-12-25 23:48:58 +00001129{
1130 __builtin_ia32_movntpd(p, a);
1131}
1132
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001133static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001134_mm_stream_si128(__m128i *p, __m128i a)
Anders Carlsson0727df02008-12-25 23:48:58 +00001135{
1136 __builtin_ia32_movntdq(p, a);
1137}
1138
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001139static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001140_mm_stream_si32(int *p, int a)
Anders Carlsson0727df02008-12-25 23:48:58 +00001141{
1142 __builtin_ia32_movnti(p, a);
1143}
1144
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001145static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001146_mm_clflush(void const *p)
Anders Carlsson0727df02008-12-25 23:48:58 +00001147{
1148 __builtin_ia32_clflush(p);
1149}
1150
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001151static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001152_mm_lfence(void)
Anders Carlsson0727df02008-12-25 23:48:58 +00001153{
1154 __builtin_ia32_lfence();
1155}
1156
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001157static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001158_mm_mfence(void)
Anders Carlsson0727df02008-12-25 23:48:58 +00001159{
1160 __builtin_ia32_mfence();
1161}
1162
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001163static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001164_mm_packs_epi16(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001165{
1166 return (__m128i)__builtin_ia32_packsswb128((__v8hi)a, (__v8hi)b);
1167}
1168
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001169static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001170_mm_packs_epi32(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001171{
1172 return (__m128i)__builtin_ia32_packssdw128((__v4si)a, (__v4si)b);
1173}
1174
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001175static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001176_mm_packus_epi16(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001177{
1178 return (__m128i)__builtin_ia32_packuswb128((__v8hi)a, (__v8hi)b);
1179}
1180
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001181static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001182_mm_extract_epi16(__m128i a, int imm)
Anders Carlsson45470752008-12-26 00:45:50 +00001183{
1184 __v8hi b = (__v8hi)a;
1185 return b[imm];
1186}
1187
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001188static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001189_mm_insert_epi16(__m128i a, int b, int imm)
Anders Carlsson45470752008-12-26 00:45:50 +00001190{
Eli Friedman80c80042009-06-06 02:13:04 +00001191 __v8hi c = (__v8hi)a;
1192 c[imm & 7] = b;
1193 return c;
Anders Carlsson45470752008-12-26 00:45:50 +00001194}
1195
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001196static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001197_mm_movemask_epi8(__m128i a)
Anders Carlsson45470752008-12-26 00:45:50 +00001198{
1199 return __builtin_ia32_pmovmskb128((__v16qi)a);
1200}
1201
Anders Carlsson92d66862008-12-26 00:50:47 +00001202#define _mm_shuffle_epi32(a, imm) ((__m128i)__builtin_ia32_pshufd((__v4si)(a), (imm)))
1203#define _mm_shufflehi_epi16(a, imm) ((__m128i)__builtin_ia32_pshufhw((__v8hi)(a), (imm)))
1204#define _mm_shufflelo_epi16(a, imm) ((__m128i)__builtin_ia32_pshuflw((__v8hi)(a), (imm)))
Anders Carlsson45470752008-12-26 00:45:50 +00001205
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001206static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001207_mm_unpackhi_epi8(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001208{
Anders Carlsson92d66862008-12-26 00:50:47 +00001209 return (__m128i)__builtin_shufflevector((__v16qi)a, (__v16qi)b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
Anders Carlsson45470752008-12-26 00:45:50 +00001210}
1211
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001212static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001213_mm_unpackhi_epi16(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001214{
Anders Carlsson92d66862008-12-26 00:50:47 +00001215 return (__m128i)__builtin_shufflevector((__v8hi)a, (__v8hi)b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);
Anders Carlsson45470752008-12-26 00:45:50 +00001216}
1217
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001218static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001219_mm_unpackhi_epi32(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001220{
Anders Carlsson92d66862008-12-26 00:50:47 +00001221 return (__m128i)__builtin_shufflevector((__v4si)a, (__v4si)b, 2, 4+2, 3, 4+3);
Anders Carlsson45470752008-12-26 00:45:50 +00001222}
1223
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001224static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001225_mm_unpackhi_epi64(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001226{
Anders Carlsson92d66862008-12-26 00:50:47 +00001227 return (__m128i)__builtin_shufflevector(a, b, 1, 2+1);
Anders Carlsson45470752008-12-26 00:45:50 +00001228}
1229
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001230static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001231_mm_unpacklo_epi8(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001232{
Anders Carlsson92d66862008-12-26 00:50:47 +00001233 return (__m128i)__builtin_shufflevector((__v16qi)a, (__v16qi)b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);
Anders Carlsson45470752008-12-26 00:45:50 +00001234}
1235
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001236static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001237_mm_unpacklo_epi16(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001238{
Anders Carlsson92d66862008-12-26 00:50:47 +00001239 return (__m128i)__builtin_shufflevector((__v8hi)a, (__v8hi)b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);
Anders Carlsson45470752008-12-26 00:45:50 +00001240}
1241
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001242static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001243_mm_unpacklo_epi32(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001244{
Anders Carlsson92d66862008-12-26 00:50:47 +00001245 return (__m128i)__builtin_shufflevector((__v4si)a, (__v4si)b, 0, 4+0, 1, 4+1);
Anders Carlsson45470752008-12-26 00:45:50 +00001246}
1247
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001248static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001249_mm_unpacklo_epi64(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001250{
Anders Carlsson92d66862008-12-26 00:50:47 +00001251 return (__m128i)__builtin_shufflevector(a, b, 0, 2+0);
Anders Carlsson45470752008-12-26 00:45:50 +00001252}
1253
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001254static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001255_mm_movepi64_pi64(__m128i a)
Anders Carlsson45470752008-12-26 00:45:50 +00001256{
1257 return (__m64)a[0];
1258}
1259
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001260static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001261_mm_movpi64_pi64(__m64 a)
Anders Carlsson45470752008-12-26 00:45:50 +00001262{
1263 return (__m128i){ (long long)a, 0 };
1264}
1265
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001266static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001267_mm_move_epi64(__m128i a)
Anders Carlsson45470752008-12-26 00:45:50 +00001268{
Eli Friedman80c80042009-06-06 02:13:04 +00001269 return __builtin_shufflevector(a, (__m128i){ 0 }, 0, 2);
Anders Carlsson45470752008-12-26 00:45:50 +00001270}
1271
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001272static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001273_mm_unpackhi_pd(__m128d a, __m128d b)
Anders Carlsson45470752008-12-26 00:45:50 +00001274{
Anders Carlsson92d66862008-12-26 00:50:47 +00001275 return __builtin_shufflevector(a, b, 1, 2+1);
Anders Carlsson45470752008-12-26 00:45:50 +00001276}
1277
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001278static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001279_mm_unpacklo_pd(__m128d a, __m128d b)
Anders Carlsson45470752008-12-26 00:45:50 +00001280{
Anders Carlsson92d66862008-12-26 00:50:47 +00001281 return __builtin_shufflevector(a, b, 0, 2+0);
Anders Carlsson45470752008-12-26 00:45:50 +00001282}
1283
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001284static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001285_mm_movemask_pd(__m128d a)
Anders Carlsson45470752008-12-26 00:45:50 +00001286{
1287 return __builtin_ia32_movmskpd(a);
1288}
1289
1290#define _mm_shuffle_pd(a, b, i) (__builtin_ia32_shufpd((a), (b), (i)))
1291
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001292static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001293_mm_castpd_ps(__m128d in)
Anders Carlsson45470752008-12-26 00:45:50 +00001294{
1295 return (__m128)in;
1296}
1297
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001298static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001299_mm_castpd_si128(__m128d in)
Anders Carlsson45470752008-12-26 00:45:50 +00001300{
1301 return (__m128i)in;
1302}
1303
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001304static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001305_mm_castps_pd(__m128 in)
Anders Carlsson45470752008-12-26 00:45:50 +00001306{
1307 return (__m128d)in;
1308}
1309
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001310static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001311_mm_castps_si128(__m128 in)
Anders Carlsson45470752008-12-26 00:45:50 +00001312{
1313 return (__m128i)in;
1314}
1315
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001316static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001317_mm_castsi128_ps(__m128i in)
Anders Carlsson45470752008-12-26 00:45:50 +00001318{
1319 return (__m128)in;
1320}
1321
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001322static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001323_mm_castsi128_pd(__m128i in)
Anders Carlsson45470752008-12-26 00:45:50 +00001324{
1325 return (__m128d)in;
1326}
1327
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001328static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001329_mm_pause(void)
Anders Carlssonf1bc6602008-12-26 00:49:43 +00001330{
Anders Carlsson4bcd44d2008-12-26 02:22:10 +00001331 __asm__ volatile ("pause");
Anders Carlssonf1bc6602008-12-26 00:49:43 +00001332}
1333
Anders Carlssonbbd1fa22009-01-21 01:49:39 +00001334#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
Anders Carlssonf1bc6602008-12-26 00:49:43 +00001335
Anders Carlsson37f2f002008-12-24 01:45:22 +00001336#endif /* __SSE2__ */
1337
1338#endif /* __EMMINTRIN_H */