blob: c96000aa1b76e04e8d937ff70905254d5b1159b2 [file] [log] [blame]
Anders Carlsson37f2f002008-12-24 01:45:22 +00001/*===---- xmmintrin.h - SSE intrinsics -------------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __EMMINTRIN_H
25#define __EMMINTRIN_H
26
27#ifndef __SSE2__
28#error "SSE2 instruction set not enabled"
29#else
30
31#include <xmmintrin.h>
32
33typedef double __m128d __attribute__((__vector_size__(16)));
34typedef long long __m128i __attribute__((__vector_size__(16)));
35
36typedef int __v4si __attribute__((__vector_size__(16)));
Anders Carlsson07603aa2008-12-24 02:41:00 +000037typedef short __v8hi __attribute__((__vector_size__(16)));
Anders Carlsson445afa02008-12-24 02:11:54 +000038typedef char __v16qi __attribute__((__vector_size__(16)));
Anders Carlsson37f2f002008-12-24 01:45:22 +000039
Anders Carlssona2f12ae2009-02-14 01:00:11 +000040static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000041_mm_add_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000042{
43 return __builtin_ia32_addsd(a, b);
44}
45
Anders Carlssona2f12ae2009-02-14 01:00:11 +000046static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000047_mm_add_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000048{
49 return a + b;
50}
51
Anders Carlssona2f12ae2009-02-14 01:00:11 +000052static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000053_mm_sub_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000054{
55 return __builtin_ia32_subsd(a, b);
56}
57
Anders Carlssona2f12ae2009-02-14 01:00:11 +000058static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000059_mm_sub_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000060{
61 return a - b;
62}
63
Anders Carlssona2f12ae2009-02-14 01:00:11 +000064static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000065_mm_mul_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000066{
67 return __builtin_ia32_mulsd(a, b);
68}
69
Anders Carlssona2f12ae2009-02-14 01:00:11 +000070static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000071_mm_mul_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000072{
73 return a * b;
74}
75
Anders Carlssona2f12ae2009-02-14 01:00:11 +000076static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000077_mm_div_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000078{
79 return __builtin_ia32_divsd(a, b);
80}
81
Anders Carlssona2f12ae2009-02-14 01:00:11 +000082static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000083_mm_div_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000084{
85 return a / b;
86}
87
Anders Carlssona2f12ae2009-02-14 01:00:11 +000088static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000089_mm_sqrt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000090{
91 __m128d c = __builtin_ia32_sqrtsd(b);
92 return (__m128d) { c[0], a[1] };
93}
94
Anders Carlssona2f12ae2009-02-14 01:00:11 +000095static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000096_mm_sqrt_pd(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +000097{
98 return __builtin_ia32_sqrtpd(a);
99}
100
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000101static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000102_mm_min_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000103{
104 return __builtin_ia32_minsd(a, b);
105}
106
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000107static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000108_mm_min_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000109{
110 return __builtin_ia32_minpd(a, b);
111}
112
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000113static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000114_mm_max_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000115{
116 return __builtin_ia32_maxsd(a, b);
117}
118
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000119static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000120_mm_max_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000121{
122 return __builtin_ia32_maxpd(a, b);
123}
124
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000125static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000126_mm_and_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000127{
128 return __builtin_ia32_andpd(a, b);
129}
130
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000131static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000132_mm_andnot_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000133{
134 return __builtin_ia32_andnpd(a, b);
135}
136
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000137static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000138_mm_or_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000139{
140 return __builtin_ia32_orpd(a, b);
141}
142
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000143static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000144_mm_xor_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000145{
146 return __builtin_ia32_xorpd(a, b);
147}
148
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000149static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000150_mm_cmpeq_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000151{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000152 return (__m128d)__builtin_ia32_cmppd(a, b, 0);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000153}
154
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000155static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000156_mm_cmplt_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000157{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000158 return (__m128d)__builtin_ia32_cmppd(a, b, 1);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000159}
160
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000161static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000162_mm_cmple_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000163{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000164 return (__m128d)__builtin_ia32_cmppd(a, b, 2);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000165}
166
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000167static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000168_mm_cmpgt_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000169{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000170 return (__m128d)__builtin_ia32_cmppd(b, a, 1);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000171}
172
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000173static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000174_mm_cmpge_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000175{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000176 return (__m128d)__builtin_ia32_cmppd(b, a, 2);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000177}
178
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000179static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000180_mm_cmpord_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000181{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000182 return (__m128d)__builtin_ia32_cmppd(a, b, 7);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000183}
184
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000185static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000186_mm_cmpunord_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000187{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000188 return (__m128d)__builtin_ia32_cmppd(a, b, 3);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000189}
190
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000191static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000192_mm_cmpneq_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000193{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000194 return (__m128d)__builtin_ia32_cmppd(a, b, 4);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000195}
196
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000197static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000198_mm_cmpnlt_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000199{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000200 return (__m128d)__builtin_ia32_cmppd(a, b, 5);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000201}
202
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000203static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000204_mm_cmpnle_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000205{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000206 return (__m128d)__builtin_ia32_cmppd(a, b, 6);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000207}
208
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000209static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000210_mm_cmpngt_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000211{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000212 return (__m128d)__builtin_ia32_cmppd(b, a, 5);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000213}
214
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000215static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000216_mm_cmpnge_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000217{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000218 return (__m128d)__builtin_ia32_cmppd(b, a, 6);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000219}
220
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000221static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000222_mm_cmpeq_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000223{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000224 return (__m128d)__builtin_ia32_cmpsd(a, b, 0);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000225}
226
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000227static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000228_mm_cmplt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000229{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000230 return (__m128d)__builtin_ia32_cmpsd(a, b, 1);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000231}
232
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000233static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000234_mm_cmple_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000235{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000236 return (__m128d)__builtin_ia32_cmpsd(a, b, 2);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000237}
238
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000239static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000240_mm_cmpgt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000241{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000242 return (__m128d)__builtin_ia32_cmpsd(b, a, 1);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000243}
244
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000245static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000246_mm_cmpge_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000247{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000248 return (__m128d)__builtin_ia32_cmpsd(b, a, 2);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000249}
250
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000251static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000252_mm_cmpord_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000253{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000254 return (__m128d)__builtin_ia32_cmpsd(a, b, 7);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000255}
256
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000257static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000258_mm_cmpunord_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000259{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000260 return (__m128d)__builtin_ia32_cmpsd(a, b, 3);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000261}
262
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000263static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000264_mm_cmpneq_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000265{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000266 return (__m128d)__builtin_ia32_cmpsd(a, b, 4);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000267}
268
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000269static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000270_mm_cmpnlt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000271{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000272 return (__m128d)__builtin_ia32_cmpsd(a, b, 5);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000273}
274
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000275static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000276_mm_cmpnle_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000277{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000278 return (__m128d)__builtin_ia32_cmpsd(a, b, 6);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000279}
280
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000281static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000282_mm_cmpngt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000283{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000284 return (__m128d)__builtin_ia32_cmpsd(b, a, 5);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000285}
286
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000287static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000288_mm_cmpnge_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000289{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000290 return (__m128d)__builtin_ia32_cmpsd(b, a, 6);
Anders Carlsson37f2f002008-12-24 01:45:22 +0000291}
292
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000293static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000294_mm_comieq_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000295{
296 return __builtin_ia32_comisdeq(a, b);
297}
298
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000299static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000300_mm_comilt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000301{
302 return __builtin_ia32_comisdlt(a, b);
303}
304
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000305static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000306_mm_comile_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000307{
308 return __builtin_ia32_comisdle(a, b);
309}
310
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000311static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000312_mm_comigt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000313{
314 return __builtin_ia32_comisdgt(a, b);
315}
316
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000317static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000318_mm_comineq_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000319{
320 return __builtin_ia32_comisdneq(a, b);
321}
322
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000323static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000324_mm_ucomieq_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000325{
326 return __builtin_ia32_ucomisdeq(a, b);
327}
328
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000329static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000330_mm_ucomilt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000331{
332 return __builtin_ia32_ucomisdlt(a, b);
333}
334
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000335static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000336_mm_ucomile_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000337{
338 return __builtin_ia32_ucomisdle(a, b);
339}
340
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000341static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000342_mm_ucomigt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000343{
344 return __builtin_ia32_ucomisdgt(a, b);
345}
346
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000347static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000348_mm_ucomineq_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000349{
350 return __builtin_ia32_ucomisdneq(a, b);
351}
352
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000353static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000354_mm_cvtpd_ps(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000355{
356 return __builtin_ia32_cvtpd2ps(a);
357}
358
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000359static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000360_mm_cvtps_pd(__m128 a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000361{
362 return __builtin_ia32_cvtps2pd(a);
363}
364
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000365static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000366_mm_cvtepi32_pd(__m128i a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000367{
368 return __builtin_ia32_cvtdq2pd((__v4si)a);
369}
370
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000371static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000372_mm_cvtpd_epi32(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000373{
374 return __builtin_ia32_cvtpd2dq(a);
375}
376
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000377static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000378_mm_cvtsd_si32(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000379{
380 return __builtin_ia32_cvtsd2si(a);
381}
382
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000383static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000384_mm_cvtsd_ss(__m128 a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000385{
386 return __builtin_ia32_cvtsd2ss(a, b);
387}
388
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000389static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000390_mm_cvtsi32_sd(__m128d a, int b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000391{
392 return __builtin_ia32_cvtsi2sd(a, b);
393}
394
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000395static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000396_mm_cvtss_sd(__m128d a, __m128 b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000397{
398 return __builtin_ia32_cvtss2sd(a, b);
399}
400
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000401static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000402_mm_cvttpd_epi32(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000403{
404 return (__m128i)__builtin_ia32_cvttpd2dq(a);
405}
406
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000407static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000408_mm_cvttsd_si32(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000409{
410 return __builtin_ia32_cvttsd2si(a);
411}
412
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000413static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000414_mm_cvtpd_pi32(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000415{
416 return (__m64)__builtin_ia32_cvtpd2pi(a);
417}
418
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000419static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000420_mm_cvttpd_pi32(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000421{
422 return (__m64)__builtin_ia32_cvttpd2pi(a);
423}
424
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000425static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000426_mm_cvtpi32_pd(__m64 a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000427{
428 return __builtin_ia32_cvtpi2pd((__v2si)a);
429}
430
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000431static inline double __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000432_mm_cvtsd_f64(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000433{
434 return a[0];
435}
436
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000437static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000438_mm_load_pd(double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000439{
440 return *(__m128d*)dp;
441}
442
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000443static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000444_mm_load1_pd(double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000445{
446 return (__m128d){ dp[0], dp[0] };
447}
448
Eli Friedmandb7351a2009-06-02 05:55:48 +0000449#define _mm_load_pd1(dp) _mm_load1_pd(dp)
450
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000451static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000452_mm_loadr_pd(double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000453{
454 return (__m128d){ dp[1], dp[0] };
455}
456
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000457static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000458_mm_loadu_pd(double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000459{
460 return __builtin_ia32_loadupd(dp);
461}
462
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000463static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000464_mm_load_sd(double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000465{
466 return (__m128d){ *dp, 0.0 };
467}
468
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000469static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000470_mm_loadh_pd(__m128d a, double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000471{
472 return __builtin_shufflevector(a, *(__m128d *)dp, 0, 2);
473}
474
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000475static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000476_mm_loadl_pd(__m128d a, double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000477{
478 return __builtin_shufflevector(a, *(__m128d *)dp, 2, 1);
479}
480
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000481static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000482_mm_set_sd(double w)
Anders Carlsson445afa02008-12-24 02:11:54 +0000483{
484 return (__m128d){ w, 0 };
485}
486
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000487static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000488_mm_set1_pd(double w)
Anders Carlsson445afa02008-12-24 02:11:54 +0000489{
490 return (__m128d){ w, w };
491}
492
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000493static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000494_mm_set_pd(double w, double x)
Anders Carlsson445afa02008-12-24 02:11:54 +0000495{
496 return (__m128d){ w, x };
497}
498
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000499static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000500_mm_setr_pd(double w, double x)
Anders Carlsson445afa02008-12-24 02:11:54 +0000501{
502 return (__m128d){ x, w };
503}
504
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000505static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000506_mm_setzero_pd(void)
Anders Carlsson445afa02008-12-24 02:11:54 +0000507{
508 return (__m128d){ 0, 0 };
509}
510
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000511static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000512_mm_move_sd(__m128d a, __m128d b)
Anders Carlsson445afa02008-12-24 02:11:54 +0000513{
514 return (__m128d){ b[0], a[1] };
515}
516
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000517static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000518_mm_store_sd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000519{
520 dp[0] = a[0];
521}
522
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000523static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000524_mm_store1_pd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000525{
526 dp[0] = a[0];
527 dp[1] = a[0];
528}
529
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000530static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000531_mm_store_pd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000532{
533 *(__m128d *)dp = a;
534}
535
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000536static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000537_mm_storeu_pd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000538{
539 __builtin_ia32_storeupd(dp, a);
540}
541
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000542static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000543_mm_storer_pd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000544{
545 dp[0] = a[1];
546 dp[1] = a[0];
547}
548
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000549static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000550_mm_storeh_pd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000551{
552 dp[0] = a[1];
553}
554
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000555static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000556_mm_storel_pd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000557{
558 dp[0] = a[0];
559}
560
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000561static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000562_mm_add_epi8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000563{
564 return (__m128i)((__v16qi)a + (__v16qi)b);
565}
566
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000567static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000568_mm_add_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000569{
570 return (__m128i)((__v8hi)a + (__v8hi)b);
571}
572
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000573static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000574_mm_add_epi32(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000575{
576 return (__m128i)((__v4si)a + (__v4si)b);
577}
578
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000579static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000580_mm_add_si64(__m64 a, __m64 b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000581{
582 return a + b;
583}
584
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000585static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000586_mm_add_epi64(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000587{
588 return a + b;
589}
590
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000591static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000592_mm_adds_epi8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000593{
594 return (__m128i)__builtin_ia32_paddsb128((__v16qi)a, (__v16qi)b);
595}
596
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000597static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000598_mm_adds_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000599{
600 return (__m128i)__builtin_ia32_paddsw128((__v8hi)a, (__v8hi)b);
601}
602
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000603static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000604_mm_adds_epu8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000605{
606 return (__m128i)__builtin_ia32_paddusb128((__v16qi)a, (__v16qi)b);
607}
608
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000609static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000610_mm_adds_epu16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000611{
612 return (__m128i)__builtin_ia32_paddusw128((__v8hi)a, (__v8hi)b);
613}
614
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000615static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000616_mm_avg_epu8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000617{
618 return (__m128i)__builtin_ia32_pavgb128((__v16qi)a, (__v16qi)b);
619}
620
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000621static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000622_mm_avg_epu16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000623{
624 return (__m128i)__builtin_ia32_pavgw128((__v8hi)a, (__v8hi)b);
625}
626
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000627static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000628_mm_madd_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000629{
630 return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)a, (__v8hi)b);
631}
632
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000633static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000634_mm_max_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000635{
636 return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)a, (__v8hi)b);
637}
638
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000639static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000640_mm_max_epu8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000641{
642 return (__m128i)__builtin_ia32_pmaxub128((__v16qi)a, (__v16qi)b);
643}
644
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000645static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000646_mm_min_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000647{
648 return (__m128i)__builtin_ia32_pminsw128((__v8hi)a, (__v8hi)b);
649}
650
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000651static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000652_mm_min_epu8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000653{
654 return (__m128i)__builtin_ia32_pminub128((__v16qi)a, (__v16qi)b);
655}
656
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000657static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000658_mm_mulhi_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000659{
660 return (__m128i)__builtin_ia32_pmulhw128((__v8hi)a, (__v8hi)b);
661}
662
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000663static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000664_mm_mulhi_epu16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000665{
666 return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)a, (__v8hi)b);
667}
668
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000669static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000670_mm_mullo_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000671{
672 return (__m128i)__builtin_ia32_pmullw128((__v8hi)a, (__v8hi)b);
673}
674
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000675static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000676_mm_mul_su32(__m64 a, __m64 b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000677{
678 return __builtin_ia32_pmuludq((__v2si)a, (__v2si)b);
679}
680
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000681static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000682_mm_mul_epu32(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000683{
684 return __builtin_ia32_pmuludq128((__v4si)a, (__v4si)b);
685}
686
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000687static inline __m128i __attribute__((__always_inline__, __nodebug__))
Anders Carlssonae8ecdd2009-04-06 21:55:22 +0000688_mm_sad_epu8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000689{
690 return __builtin_ia32_psadbw128((__v16qi)a, (__v16qi)b);
691}
692
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000693static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000694_mm_sub_epi8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000695{
696 return (__m128i)((__v16qi)a - (__v16qi)b);
697}
698
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000699static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000700_mm_sub_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000701{
702 return (__m128i)((__v8hi)a - (__v8hi)b);
703}
704
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000705static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000706_mm_sub_epi32(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000707{
708 return (__m128i)((__v4si)a - (__v4si)b);
709}
710
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000711static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000712_mm_sub_si64(__m64 a, __m64 b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000713{
714 return a - b;
715}
716
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000717static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000718_mm_sub_epi64(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000719{
720 return a - b;
721}
722
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000723static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000724_mm_subs_epi8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000725{
726 return (__m128i)__builtin_ia32_psubsb128((__v16qi)a, (__v16qi)b);
727}
728
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000729static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000730_mm_subs_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000731{
732 return (__m128i)__builtin_ia32_psubsw128((__v8hi)a, (__v8hi)b);
733}
734
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000735static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000736_mm_subs_epu8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000737{
738 return (__m128i)__builtin_ia32_psubusb128((__v16qi)a, (__v16qi)b);
739}
740
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000741static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000742_mm_subs_epu16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000743{
744 return (__m128i)__builtin_ia32_psubusw128((__v8hi)a, (__v8hi)b);
745}
746
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000747static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000748_mm_and_si128(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000749{
750 return __builtin_ia32_pand128(a, b);
751}
752
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000753static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000754_mm_andnot_si128(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000755{
756 return __builtin_ia32_pandn128(a, b);
757}
758
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000759static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000760_mm_or_si128(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000761{
762 return __builtin_ia32_por128(a, b);
763}
764
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000765static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000766_mm_xor_si128(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000767{
768 return __builtin_ia32_pxor128(a, b);
769}
770
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000771static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000772_mm_slli_si128(__m128i a, int imm)
Anders Carlsson0727df02008-12-25 23:48:58 +0000773{
774 return __builtin_ia32_pslldqi128(a, imm * 8);
775}
776
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000777static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000778_mm_slli_epi16(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000779{
780 return (__m128i)__builtin_ia32_psllwi128((__v8hi)a, count);
781}
782
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000783static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000784_mm_sll_epi16(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000785{
786 return (__m128i)__builtin_ia32_psllw128((__v8hi)a, (__v8hi)count);
787}
788
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000789static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000790_mm_slli_epi32(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000791{
792 return (__m128i)__builtin_ia32_pslldi128((__v4si)a, count);
793}
794
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000795static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000796_mm_sll_epi32(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000797{
798 return (__m128i)__builtin_ia32_pslld128((__v4si)a, (__v4si)count);
799}
800
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000801static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000802_mm_slli_epi64(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000803{
804 return __builtin_ia32_psllqi128(a, count);
805}
806
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000807static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000808_mm_sll_epi64(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000809{
810 return __builtin_ia32_psllq128(a, count);
811}
812
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000813static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000814_mm_srai_epi16(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000815{
816 return (__m128i)__builtin_ia32_psrawi128((__v8hi)a, count);
817}
818
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000819static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000820_mm_sra_epi16(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000821{
822 return (__m128i)__builtin_ia32_psraw128((__v8hi)a, (__v8hi)count);
823}
824
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000825static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000826_mm_srai_epi32(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000827{
828 return (__m128i)__builtin_ia32_psradi128((__v4si)a, count);
829}
830
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000831static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000832_mm_sra_epi32(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000833{
834 return (__m128i)__builtin_ia32_psrad128((__v4si)a, (__v4si)count);
835}
836
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000837static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000838_mm_srli_si128(__m128i a, int imm)
Anders Carlsson0727df02008-12-25 23:48:58 +0000839{
840 return __builtin_ia32_psrldqi128(a, imm * 8);
841}
842
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000843static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000844_mm_srli_epi16(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000845{
846 return (__m128i)__builtin_ia32_psrlwi128((__v8hi)a, count);
847}
848
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000849static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000850_mm_srl_epi16(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000851{
852 return (__m128i)__builtin_ia32_psrlw128((__v8hi)a, (__v8hi)count);
853}
854
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000855static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000856_mm_srli_epi32(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000857{
858 return (__m128i)__builtin_ia32_psrldi128((__v4si)a, count);
859}
860
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000861static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000862_mm_srl_epi32(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000863{
864 return (__m128i)__builtin_ia32_psrld128((__v4si)a, (__v4si)count);
865}
866
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000867static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000868_mm_srli_epi64(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000869{
870 return __builtin_ia32_psrlqi128(a, count);
871}
872
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000873static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000874_mm_srl_epi64(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000875{
876 return __builtin_ia32_psrlq128(a, count);
877}
878
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000879static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000880_mm_cmpeq_epi8(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000881{
882 return (__m128i)__builtin_ia32_pcmpeqb128((__v16qi)a, (__v16qi)b);
883}
884
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000885static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000886_mm_cmpeq_epi16(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000887{
888 return (__m128i)__builtin_ia32_pcmpeqw128((__v8hi)a, (__v8hi)b);
889}
890
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000891static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000892_mm_cmpeq_epi32(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000893{
894 return (__m128i)__builtin_ia32_pcmpeqd128((__v4si)a, (__v4si)b);
895}
896
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000897static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000898_mm_cmpgt_epi8(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000899{
900 return (__m128i)__builtin_ia32_pcmpgtb128((__v16qi)a, (__v16qi)b);
901}
902
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000903static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000904_mm_cmpgt_epi16(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000905{
906 return (__m128i)__builtin_ia32_pcmpgtw128((__v8hi)a, (__v8hi)b);
907}
908
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000909static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000910_mm_cmpgt_epi32(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000911{
912 return (__m128i)__builtin_ia32_pcmpgtd128((__v4si)a, (__v4si)b);
913}
914
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000915static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000916_mm_cmplt_epi8(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000917{
918 return (__m128i)__builtin_ia32_pcmpgtb128((__v16qi)b, (__v16qi)a);
919}
920
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000921static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000922_mm_cmplt_epi16(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000923{
924 return (__m128i)__builtin_ia32_pcmpgtw128((__v8hi)b, (__v8hi)a);
925}
926
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000927static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000928_mm_cmplt_epi32(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000929{
930 return (__m128i)__builtin_ia32_pcmpgtd128((__v4si)b, (__v4si)a);
931}
932
933#ifdef __x86_64__
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000934static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000935_mm_cvtsi64_sd(__m128d a, long long b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000936{
937 return __builtin_ia32_cvtsi642sd(a, b);
938}
939
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000940static inline long long __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000941_mm_cvtsd_si64(__m128d a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000942{
943 return __builtin_ia32_cvtsd2si64(a);
944}
945
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000946static inline long long __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000947_mm_cvttsd_si64(__m128d a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000948{
949 return __builtin_ia32_cvttsd2si64(a);
950}
951#endif
952
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000953static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000954_mm_cvtepi32_ps(__m128i a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000955{
956 return __builtin_ia32_cvtdq2ps((__v4si)a);
957}
958
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000959static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000960_mm_cvtps_epi32(__m128 a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000961{
962 return (__m128i)__builtin_ia32_cvtps2dq(a);
963}
964
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000965static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000966_mm_cvttps_epi32(__m128 a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000967{
968 return (__m128i)__builtin_ia32_cvttps2dq(a);
969}
970
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000971static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000972_mm_cvtsi32_si128(int a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000973{
974 return (__m128i)(__v4si){ a, 0, 0, 0 };
975}
976
977#ifdef __x86_64__
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000978static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000979_mm_cvtsi64_si128(long long a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000980{
981 return (__m128i){ a, 0 };
982}
983#endif
984
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000985static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000986_mm_cvtsi128_si32(__m128i a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000987{
988 __v4si b = (__v4si)a;
989 return b[0];
990}
991
992#ifdef __x86_64__
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000993static inline long long __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000994_mm_cvtsi128_si64(__m128i a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000995{
996 return a[0];
997}
998#endif
999
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001000static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001001_mm_load_si128(__m128i const *p)
Anders Carlsson0727df02008-12-25 23:48:58 +00001002{
1003 return *p;
1004}
1005
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001006static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001007_mm_loadu_si128(__m128i const *p)
Anders Carlsson0727df02008-12-25 23:48:58 +00001008{
1009 return (__m128i)__builtin_ia32_loaddqu((char const *)p);
1010}
1011
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001012static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001013_mm_loadl_epi64(__m128i const *p)
Anders Carlsson0727df02008-12-25 23:48:58 +00001014{
1015 return (__m128i)__builtin_ia32_loadlv4si((__v2si *)p);
1016}
1017
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001018static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001019_mm_set_epi64(__m64 q1, __m64 q0)
Anders Carlsson0727df02008-12-25 23:48:58 +00001020{
1021 return (__m128i){ (long long)q0, (long long)q1 };
1022}
1023
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001024static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001025_mm_set_epi32(int i3, int i2, int i1, int i0)
Anders Carlsson0727df02008-12-25 23:48:58 +00001026{
1027 return (__m128i)(__v4si){ i0, i1, i2, i3};
1028}
1029
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001030static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001031_mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0)
Anders Carlsson0727df02008-12-25 23:48:58 +00001032{
1033 return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
1034}
1035
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001036static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001037_mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
Anders Carlsson0727df02008-12-25 23:48:58 +00001038{
1039 return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
1040}
1041
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001042static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001043_mm_set1_epi64(__m64 q)
Anders Carlsson0727df02008-12-25 23:48:58 +00001044{
1045 return (__m128i){ (long long)q, (long long)q };
1046}
1047
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001048static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001049_mm_set1_epi32(int i)
Anders Carlsson0727df02008-12-25 23:48:58 +00001050{
1051 return (__m128i)(__v4si){ i, i, i, i };
1052}
1053
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001054static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001055_mm_set1_epi16(short w)
Anders Carlsson0727df02008-12-25 23:48:58 +00001056{
1057 return (__m128i)(__v8hi){ w, w, w, w, w, w, w, w };
1058}
1059
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001060static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001061_mm_set1_epi8(char b)
Anders Carlsson0727df02008-12-25 23:48:58 +00001062{
1063 return (__m128i)(__v16qi){ b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, b };
1064}
1065
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001066static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001067_mm_setr_epi64(__m64 q0, __m64 q1)
Anders Carlsson0727df02008-12-25 23:48:58 +00001068{
1069 return (__m128i){ (long long)q0, (long long)q1 };
1070}
1071
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001072static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001073_mm_setr_epi32(int i0, int i1, int i2, int i3)
Anders Carlsson0727df02008-12-25 23:48:58 +00001074{
1075 return (__m128i)(__v4si){ i0, i1, i2, i3};
1076}
1077
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001078static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001079_mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7)
Anders Carlsson0727df02008-12-25 23:48:58 +00001080{
1081 return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
1082}
1083
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001084static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001085_mm_setr_epi8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15)
Anders Carlsson0727df02008-12-25 23:48:58 +00001086{
1087 return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
1088}
1089
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001090static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001091_mm_setzero_si128(void)
Anders Carlsson0727df02008-12-25 23:48:58 +00001092{
1093 return (__m128i){ 0LL, 0LL };
1094}
1095
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001096static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001097_mm_store_si128(__m128i *p, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +00001098{
1099 *p = b;
1100}
1101
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001102static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001103_mm_storeu_si128(__m128i *p, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +00001104{
1105 __builtin_ia32_storedqu((char *)p, (__v16qi)b);
1106}
1107
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001108static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001109_mm_maskmoveu_si128(__m128i d, __m128i n, char *p)
Anders Carlsson0727df02008-12-25 23:48:58 +00001110{
1111 __builtin_ia32_maskmovdqu((__v16qi)d, (__v16qi)n, p);
1112}
1113
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001114static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001115_mm_storel_epi64(__m128i *p, __m128i a)
Anders Carlsson0727df02008-12-25 23:48:58 +00001116{
1117 __builtin_ia32_storelv4si((__v2si *)p, a);
1118}
1119
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001120static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001121_mm_stream_pd(double *p, __m128d a)
Anders Carlsson0727df02008-12-25 23:48:58 +00001122{
1123 __builtin_ia32_movntpd(p, a);
1124}
1125
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001126static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001127_mm_stream_si128(__m128i *p, __m128i a)
Anders Carlsson0727df02008-12-25 23:48:58 +00001128{
1129 __builtin_ia32_movntdq(p, a);
1130}
1131
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001132static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001133_mm_stream_si32(int *p, int a)
Anders Carlsson0727df02008-12-25 23:48:58 +00001134{
1135 __builtin_ia32_movnti(p, a);
1136}
1137
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001138static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001139_mm_clflush(void const *p)
Anders Carlsson0727df02008-12-25 23:48:58 +00001140{
1141 __builtin_ia32_clflush(p);
1142}
1143
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001144static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001145_mm_lfence(void)
Anders Carlsson0727df02008-12-25 23:48:58 +00001146{
1147 __builtin_ia32_lfence();
1148}
1149
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001150static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001151_mm_mfence(void)
Anders Carlsson0727df02008-12-25 23:48:58 +00001152{
1153 __builtin_ia32_mfence();
1154}
1155
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001156static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001157_mm_packs_epi16(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001158{
1159 return (__m128i)__builtin_ia32_packsswb128((__v8hi)a, (__v8hi)b);
1160}
1161
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001162static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001163_mm_packs_epi32(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001164{
1165 return (__m128i)__builtin_ia32_packssdw128((__v4si)a, (__v4si)b);
1166}
1167
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001168static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001169_mm_packus_epi16(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001170{
1171 return (__m128i)__builtin_ia32_packuswb128((__v8hi)a, (__v8hi)b);
1172}
1173
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001174static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001175_mm_extract_epi16(__m128i a, int imm)
Anders Carlsson45470752008-12-26 00:45:50 +00001176{
1177 __v8hi b = (__v8hi)a;
1178 return b[imm];
1179}
1180
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001181static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001182_mm_insert_epi16(__m128i a, int b, int imm)
Anders Carlsson45470752008-12-26 00:45:50 +00001183{
1184 return (__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)a, b, imm);
1185}
1186
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001187static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001188_mm_movemask_epi8(__m128i a)
Anders Carlsson45470752008-12-26 00:45:50 +00001189{
1190 return __builtin_ia32_pmovmskb128((__v16qi)a);
1191}
1192
Anders Carlsson92d66862008-12-26 00:50:47 +00001193#define _mm_shuffle_epi32(a, imm) ((__m128i)__builtin_ia32_pshufd((__v4si)(a), (imm)))
1194#define _mm_shufflehi_epi16(a, imm) ((__m128i)__builtin_ia32_pshufhw((__v8hi)(a), (imm)))
1195#define _mm_shufflelo_epi16(a, imm) ((__m128i)__builtin_ia32_pshuflw((__v8hi)(a), (imm)))
Anders Carlsson45470752008-12-26 00:45:50 +00001196
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001197static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001198_mm_unpackhi_epi8(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001199{
Anders Carlsson92d66862008-12-26 00:50:47 +00001200 return (__m128i)__builtin_shufflevector((__v16qi)a, (__v16qi)b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
Anders Carlsson45470752008-12-26 00:45:50 +00001201}
1202
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001203static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001204_mm_unpackhi_epi16(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001205{
Anders Carlsson92d66862008-12-26 00:50:47 +00001206 return (__m128i)__builtin_shufflevector((__v8hi)a, (__v8hi)b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);
Anders Carlsson45470752008-12-26 00:45:50 +00001207}
1208
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001209static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001210_mm_unpackhi_epi32(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001211{
Anders Carlsson92d66862008-12-26 00:50:47 +00001212 return (__m128i)__builtin_shufflevector((__v4si)a, (__v4si)b, 2, 4+2, 3, 4+3);
Anders Carlsson45470752008-12-26 00:45:50 +00001213}
1214
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001215static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001216_mm_unpackhi_epi64(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001217{
Anders Carlsson92d66862008-12-26 00:50:47 +00001218 return (__m128i)__builtin_shufflevector(a, b, 1, 2+1);
Anders Carlsson45470752008-12-26 00:45:50 +00001219}
1220
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001221static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001222_mm_unpacklo_epi8(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001223{
Anders Carlsson92d66862008-12-26 00:50:47 +00001224 return (__m128i)__builtin_shufflevector((__v16qi)a, (__v16qi)b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);
Anders Carlsson45470752008-12-26 00:45:50 +00001225}
1226
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001227static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001228_mm_unpacklo_epi16(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001229{
Anders Carlsson92d66862008-12-26 00:50:47 +00001230 return (__m128i)__builtin_shufflevector((__v8hi)a, (__v8hi)b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);
Anders Carlsson45470752008-12-26 00:45:50 +00001231}
1232
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001233static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001234_mm_unpacklo_epi32(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001235{
Anders Carlsson92d66862008-12-26 00:50:47 +00001236 return (__m128i)__builtin_shufflevector((__v4si)a, (__v4si)b, 0, 4+0, 1, 4+1);
Anders Carlsson45470752008-12-26 00:45:50 +00001237}
1238
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001239static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001240_mm_unpacklo_epi64(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001241{
Anders Carlsson92d66862008-12-26 00:50:47 +00001242 return (__m128i)__builtin_shufflevector(a, b, 0, 2+0);
Anders Carlsson45470752008-12-26 00:45:50 +00001243}
1244
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001245static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001246_mm_movepi64_pi64(__m128i a)
Anders Carlsson45470752008-12-26 00:45:50 +00001247{
1248 return (__m64)a[0];
1249}
1250
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001251static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001252_mm_movpi64_pi64(__m64 a)
Anders Carlsson45470752008-12-26 00:45:50 +00001253{
1254 return (__m128i){ (long long)a, 0 };
1255}
1256
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001257static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001258_mm_move_epi64(__m128i a)
Anders Carlsson45470752008-12-26 00:45:50 +00001259{
1260 return (__m128i){ a[0], 0 };
1261}
1262
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001263static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001264_mm_unpackhi_pd(__m128d a, __m128d b)
Anders Carlsson45470752008-12-26 00:45:50 +00001265{
Anders Carlsson92d66862008-12-26 00:50:47 +00001266 return __builtin_shufflevector(a, b, 1, 2+1);
Anders Carlsson45470752008-12-26 00:45:50 +00001267}
1268
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001269static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001270_mm_unpacklo_pd(__m128d a, __m128d b)
Anders Carlsson45470752008-12-26 00:45:50 +00001271{
Anders Carlsson92d66862008-12-26 00:50:47 +00001272 return __builtin_shufflevector(a, b, 0, 2+0);
Anders Carlsson45470752008-12-26 00:45:50 +00001273}
1274
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001275static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001276_mm_movemask_pd(__m128d a)
Anders Carlsson45470752008-12-26 00:45:50 +00001277{
1278 return __builtin_ia32_movmskpd(a);
1279}
1280
1281#define _mm_shuffle_pd(a, b, i) (__builtin_ia32_shufpd((a), (b), (i)))
1282
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001283static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001284_mm_castpd_ps(__m128d in)
Anders Carlsson45470752008-12-26 00:45:50 +00001285{
1286 return (__m128)in;
1287}
1288
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001289static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001290_mm_castpd_si128(__m128d in)
Anders Carlsson45470752008-12-26 00:45:50 +00001291{
1292 return (__m128i)in;
1293}
1294
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001295static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001296_mm_castps_pd(__m128 in)
Anders Carlsson45470752008-12-26 00:45:50 +00001297{
1298 return (__m128d)in;
1299}
1300
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001301static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001302_mm_castps_si128(__m128 in)
Anders Carlsson45470752008-12-26 00:45:50 +00001303{
1304 return (__m128i)in;
1305}
1306
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001307static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001308_mm_castsi128_ps(__m128i in)
Anders Carlsson45470752008-12-26 00:45:50 +00001309{
1310 return (__m128)in;
1311}
1312
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001313static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001314_mm_castsi128_pd(__m128i in)
Anders Carlsson45470752008-12-26 00:45:50 +00001315{
1316 return (__m128d)in;
1317}
1318
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001319static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001320_mm_pause(void)
Anders Carlssonf1bc6602008-12-26 00:49:43 +00001321{
Anders Carlsson4bcd44d2008-12-26 02:22:10 +00001322 __asm__ volatile ("pause");
Anders Carlssonf1bc6602008-12-26 00:49:43 +00001323}
1324
Anders Carlssonbbd1fa22009-01-21 01:49:39 +00001325#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
Anders Carlssonf1bc6602008-12-26 00:49:43 +00001326
Anders Carlsson37f2f002008-12-24 01:45:22 +00001327#endif /* __SSE2__ */
1328
1329#endif /* __EMMINTRIN_H */