blob: 84ce06a92fc8e0f06109714b40f0fb527367466d [file] [log] [blame]
Anders Carlssonc7fee2c2008-12-24 01:45:22 +00001/*===---- xmmintrin.h - SSE intrinsics -------------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __EMMINTRIN_H
25#define __EMMINTRIN_H
26
27#ifndef __SSE2__
28#error "SSE2 instruction set not enabled"
29#else
30
31#include <xmmintrin.h>
32
33typedef double __m128d __attribute__((__vector_size__(16)));
34typedef long long __m128i __attribute__((__vector_size__(16)));
35
36typedef int __v4si __attribute__((__vector_size__(16)));
Anders Carlsson7efc24b2008-12-24 02:41:00 +000037typedef short __v8hi __attribute__((__vector_size__(16)));
Anders Carlssonbf5b2c82008-12-24 02:11:54 +000038typedef char __v16qi __attribute__((__vector_size__(16)));
Anders Carlssonc7fee2c2008-12-24 01:45:22 +000039
Anders Carlsson8a9574b2009-02-14 01:00:11 +000040static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +000041_mm_add_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +000042{
43 return __builtin_ia32_addsd(a, b);
44}
45
Anders Carlsson8a9574b2009-02-14 01:00:11 +000046static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +000047_mm_add_pd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +000048{
49 return a + b;
50}
51
Anders Carlsson8a9574b2009-02-14 01:00:11 +000052static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +000053_mm_sub_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +000054{
55 return __builtin_ia32_subsd(a, b);
56}
57
Anders Carlsson8a9574b2009-02-14 01:00:11 +000058static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +000059_mm_sub_pd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +000060{
61 return a - b;
62}
63
Anders Carlsson8a9574b2009-02-14 01:00:11 +000064static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +000065_mm_mul_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +000066{
67 return __builtin_ia32_mulsd(a, b);
68}
69
Anders Carlsson8a9574b2009-02-14 01:00:11 +000070static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +000071_mm_mul_pd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +000072{
73 return a * b;
74}
75
Anders Carlsson8a9574b2009-02-14 01:00:11 +000076static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +000077_mm_div_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +000078{
79 return __builtin_ia32_divsd(a, b);
80}
81
Anders Carlsson8a9574b2009-02-14 01:00:11 +000082static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +000083_mm_div_pd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +000084{
85 return a / b;
86}
87
Anders Carlsson8a9574b2009-02-14 01:00:11 +000088static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +000089_mm_sqrt_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +000090{
91 __m128d c = __builtin_ia32_sqrtsd(b);
92 return (__m128d) { c[0], a[1] };
93}
94
Anders Carlsson8a9574b2009-02-14 01:00:11 +000095static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +000096_mm_sqrt_pd(__m128d a)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +000097{
98 return __builtin_ia32_sqrtpd(a);
99}
100
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000101static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000102_mm_min_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000103{
104 return __builtin_ia32_minsd(a, b);
105}
106
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000107static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000108_mm_min_pd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000109{
110 return __builtin_ia32_minpd(a, b);
111}
112
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000113static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000114_mm_max_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000115{
116 return __builtin_ia32_maxsd(a, b);
117}
118
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000119static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000120_mm_max_pd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000121{
122 return __builtin_ia32_maxpd(a, b);
123}
124
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000125static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000126_mm_and_pd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000127{
128 return __builtin_ia32_andpd(a, b);
129}
130
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000131static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000132_mm_andnot_pd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000133{
134 return __builtin_ia32_andnpd(a, b);
135}
136
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000137static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000138_mm_or_pd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000139{
140 return __builtin_ia32_orpd(a, b);
141}
142
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000143static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000144_mm_xor_pd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000145{
146 return __builtin_ia32_xorpd(a, b);
147}
148
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000149static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000150_mm_cmpeq_pd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000151{
152 return (__m128d)__builtin_ia32_cmpeqpd(a, b);
153}
154
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000155static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000156_mm_cmplt_pd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000157{
158 return (__m128d)__builtin_ia32_cmpltpd(a, b);
159}
160
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000161static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000162_mm_cmple_pd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000163{
164 return (__m128d)__builtin_ia32_cmplepd(a, b);
165}
166
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000167static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000168_mm_cmpgt_pd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000169{
170 return (__m128d)__builtin_ia32_cmpltpd(b, a);
171}
172
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000173static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000174_mm_cmpge_pd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000175{
176 return (__m128d)__builtin_ia32_cmplepd(b, a);
177}
178
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000179static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000180_mm_cmpord_pd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000181{
182 return (__m128d)__builtin_ia32_cmpordpd(a, b);
183}
184
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000185static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000186_mm_cmpunord_pd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000187{
188 return (__m128d)__builtin_ia32_cmpunordpd(a, b);
189}
190
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000191static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000192_mm_cmpneq_pd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000193{
194 return (__m128d)__builtin_ia32_cmpneqpd(a, b);
195}
196
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000197static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000198_mm_cmpnlt_pd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000199{
200 return (__m128d)__builtin_ia32_cmpnltpd(a, b);
201}
202
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000203static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000204_mm_cmpnle_pd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000205{
206 return (__m128d)__builtin_ia32_cmpnlepd(a, b);
207}
208
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000209static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000210_mm_cmpngt_pd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000211{
212 return (__m128d)__builtin_ia32_cmpnltpd(b, a);
213}
214
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000215static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000216_mm_cmpnge_pd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000217{
218 return (__m128d)__builtin_ia32_cmpnlepd(b, a);
219}
220
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000221static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000222_mm_cmpeq_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000223{
224 return (__m128d)__builtin_ia32_cmpeqsd(a, b);
225}
226
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000227static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000228_mm_cmplt_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000229{
230 return (__m128d)__builtin_ia32_cmpltsd(a, b);
231}
232
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000233static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000234_mm_cmple_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000235{
236 return (__m128d)__builtin_ia32_cmplesd(a, b);
237}
238
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000239static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000240_mm_cmpgt_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000241{
242 return (__m128d)__builtin_ia32_cmpltsd(b, a);
243}
244
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000245static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000246_mm_cmpge_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000247{
248 return (__m128d)__builtin_ia32_cmplesd(b, a);
249}
250
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000251static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000252_mm_cmpord_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000253{
254 return (__m128d)__builtin_ia32_cmpordsd(a, b);
255}
256
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000257static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000258_mm_cmpunord_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000259{
260 return (__m128d)__builtin_ia32_cmpunordsd(a, b);
261}
262
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000263static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000264_mm_cmpneq_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000265{
266 return (__m128d)__builtin_ia32_cmpneqsd(a, b);
267}
268
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000269static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000270_mm_cmpnlt_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000271{
272 return (__m128d)__builtin_ia32_cmpnltsd(a, b);
273}
274
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000275static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000276_mm_cmpnle_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000277{
278 return (__m128d)__builtin_ia32_cmpnlesd(a, b);
279}
280
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000281static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000282_mm_cmpngt_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000283{
284 return (__m128d)__builtin_ia32_cmpnltsd(b, a);
285}
286
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000287static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000288_mm_cmpnge_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000289{
290 return (__m128d)__builtin_ia32_cmpnlesd(b, a);
291}
292
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000293static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000294_mm_comieq_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000295{
296 return __builtin_ia32_comisdeq(a, b);
297}
298
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000299static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000300_mm_comilt_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000301{
302 return __builtin_ia32_comisdlt(a, b);
303}
304
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000305static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000306_mm_comile_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000307{
308 return __builtin_ia32_comisdle(a, b);
309}
310
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000311static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000312_mm_comigt_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000313{
314 return __builtin_ia32_comisdgt(a, b);
315}
316
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000317static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000318_mm_comineq_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000319{
320 return __builtin_ia32_comisdneq(a, b);
321}
322
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000323static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000324_mm_ucomieq_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000325{
326 return __builtin_ia32_ucomisdeq(a, b);
327}
328
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000329static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000330_mm_ucomilt_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000331{
332 return __builtin_ia32_ucomisdlt(a, b);
333}
334
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000335static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000336_mm_ucomile_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000337{
338 return __builtin_ia32_ucomisdle(a, b);
339}
340
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000341static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000342_mm_ucomigt_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000343{
344 return __builtin_ia32_ucomisdgt(a, b);
345}
346
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000347static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000348_mm_ucomineq_sd(__m128d a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000349{
350 return __builtin_ia32_ucomisdneq(a, b);
351}
352
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000353static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000354_mm_cvtpd_ps(__m128d a)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000355{
356 return __builtin_ia32_cvtpd2ps(a);
357}
358
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000359static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000360_mm_cvtps_pd(__m128 a)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000361{
362 return __builtin_ia32_cvtps2pd(a);
363}
364
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000365static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000366_mm_cvtepi32_pd(__m128i a)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000367{
368 return __builtin_ia32_cvtdq2pd((__v4si)a);
369}
370
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000371static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000372_mm_cvtpd_epi32(__m128d a)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000373{
374 return __builtin_ia32_cvtpd2dq(a);
375}
376
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000377static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000378_mm_cvtsd_si32(__m128d a)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000379{
380 return __builtin_ia32_cvtsd2si(a);
381}
382
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000383static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000384_mm_cvtsd_ss(__m128 a, __m128d b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000385{
386 return __builtin_ia32_cvtsd2ss(a, b);
387}
388
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000389static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000390_mm_cvtsi32_sd(__m128d a, int b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000391{
392 return __builtin_ia32_cvtsi2sd(a, b);
393}
394
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000395static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000396_mm_cvtss_sd(__m128d a, __m128 b)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000397{
398 return __builtin_ia32_cvtss2sd(a, b);
399}
400
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000401static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000402_mm_cvttpd_epi32(__m128d a)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000403{
404 return (__m128i)__builtin_ia32_cvttpd2dq(a);
405}
406
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000407static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000408_mm_cvttsd_si32(__m128d a)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000409{
410 return __builtin_ia32_cvttsd2si(a);
411}
412
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000413static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000414_mm_cvtpd_pi32(__m128d a)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000415{
416 return (__m64)__builtin_ia32_cvtpd2pi(a);
417}
418
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000419static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000420_mm_cvttpd_pi32(__m128d a)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000421{
422 return (__m64)__builtin_ia32_cvttpd2pi(a);
423}
424
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000425static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000426_mm_cvtpi32_pd(__m64 a)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000427{
428 return __builtin_ia32_cvtpi2pd((__v2si)a);
429}
430
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000431static inline double __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000432_mm_cvtsd_f64(__m128d a)
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000433{
434 return a[0];
435}
436
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000437static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000438_mm_load_pd(double const *dp)
Anders Carlssonbf5b2c82008-12-24 02:11:54 +0000439{
440 return *(__m128d*)dp;
441}
442
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000443static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000444_mm_load1_pd(double const *dp)
Anders Carlssonbf5b2c82008-12-24 02:11:54 +0000445{
446 return (__m128d){ dp[0], dp[0] };
447}
448
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000449static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000450_mm_loadr_pd(double const *dp)
Anders Carlssonbf5b2c82008-12-24 02:11:54 +0000451{
452 return (__m128d){ dp[1], dp[0] };
453}
454
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000455static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000456_mm_loadu_pd(double const *dp)
Anders Carlssonbf5b2c82008-12-24 02:11:54 +0000457{
458 return __builtin_ia32_loadupd(dp);
459}
460
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000461static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000462_mm_load_sd(double const *dp)
Anders Carlssonbf5b2c82008-12-24 02:11:54 +0000463{
464 return (__m128d){ *dp, 0.0 };
465}
466
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000467static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000468_mm_loadh_pd(__m128d a, double const *dp)
Anders Carlssonbf5b2c82008-12-24 02:11:54 +0000469{
470 return __builtin_shufflevector(a, *(__m128d *)dp, 0, 2);
471}
472
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000473static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000474_mm_loadl_pd(__m128d a, double const *dp)
Anders Carlssonbf5b2c82008-12-24 02:11:54 +0000475{
476 return __builtin_shufflevector(a, *(__m128d *)dp, 2, 1);
477}
478
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000479static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000480_mm_set_sd(double w)
Anders Carlssonbf5b2c82008-12-24 02:11:54 +0000481{
482 return (__m128d){ w, 0 };
483}
484
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000485static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000486_mm_set1_pd(double w)
Anders Carlssonbf5b2c82008-12-24 02:11:54 +0000487{
488 return (__m128d){ w, w };
489}
490
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000491static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000492_mm_set_pd(double w, double x)
Anders Carlssonbf5b2c82008-12-24 02:11:54 +0000493{
494 return (__m128d){ w, x };
495}
496
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000497static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000498_mm_setr_pd(double w, double x)
Anders Carlssonbf5b2c82008-12-24 02:11:54 +0000499{
500 return (__m128d){ x, w };
501}
502
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000503static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000504_mm_setzero_pd(void)
Anders Carlssonbf5b2c82008-12-24 02:11:54 +0000505{
506 return (__m128d){ 0, 0 };
507}
508
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000509static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000510_mm_move_sd(__m128d a, __m128d b)
Anders Carlssonbf5b2c82008-12-24 02:11:54 +0000511{
512 return (__m128d){ b[0], a[1] };
513}
514
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000515static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000516_mm_store_sd(double *dp, __m128d a)
Anders Carlssonbf5b2c82008-12-24 02:11:54 +0000517{
518 dp[0] = a[0];
519}
520
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000521static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000522_mm_store1_pd(double *dp, __m128d a)
Anders Carlssonbf5b2c82008-12-24 02:11:54 +0000523{
524 dp[0] = a[0];
525 dp[1] = a[0];
526}
527
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000528static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000529_mm_store_pd(double *dp, __m128d a)
Anders Carlssonbf5b2c82008-12-24 02:11:54 +0000530{
531 *(__m128d *)dp = a;
532}
533
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000534static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000535_mm_storeu_pd(double *dp, __m128d a)
Anders Carlssonbf5b2c82008-12-24 02:11:54 +0000536{
537 __builtin_ia32_storeupd(dp, a);
538}
539
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000540static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000541_mm_storer_pd(double *dp, __m128d a)
Anders Carlssonbf5b2c82008-12-24 02:11:54 +0000542{
543 dp[0] = a[1];
544 dp[1] = a[0];
545}
546
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000547static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000548_mm_storeh_pd(double *dp, __m128d a)
Anders Carlssonbf5b2c82008-12-24 02:11:54 +0000549{
550 dp[0] = a[1];
551}
552
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000553static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000554_mm_storel_pd(double *dp, __m128d a)
Anders Carlssonbf5b2c82008-12-24 02:11:54 +0000555{
556 dp[0] = a[0];
557}
558
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000559static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000560_mm_add_epi8(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000561{
562 return (__m128i)((__v16qi)a + (__v16qi)b);
563}
564
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000565static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000566_mm_add_epi16(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000567{
568 return (__m128i)((__v8hi)a + (__v8hi)b);
569}
570
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000571static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000572_mm_add_epi32(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000573{
574 return (__m128i)((__v4si)a + (__v4si)b);
575}
576
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000577static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000578_mm_add_si64(__m64 a, __m64 b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000579{
580 return a + b;
581}
582
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000583static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000584_mm_add_epi64(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000585{
586 return a + b;
587}
588
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000589static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000590_mm_adds_epi8(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000591{
592 return (__m128i)__builtin_ia32_paddsb128((__v16qi)a, (__v16qi)b);
593}
594
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000595static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000596_mm_adds_epi16(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000597{
598 return (__m128i)__builtin_ia32_paddsw128((__v8hi)a, (__v8hi)b);
599}
600
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000601static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000602_mm_adds_epu8(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000603{
604 return (__m128i)__builtin_ia32_paddusb128((__v16qi)a, (__v16qi)b);
605}
606
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000607static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000608_mm_adds_epu16(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000609{
610 return (__m128i)__builtin_ia32_paddusw128((__v8hi)a, (__v8hi)b);
611}
612
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000613static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000614_mm_avg_epu8(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000615{
616 return (__m128i)__builtin_ia32_pavgb128((__v16qi)a, (__v16qi)b);
617}
618
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000619static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000620_mm_avg_epu16(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000621{
622 return (__m128i)__builtin_ia32_pavgw128((__v8hi)a, (__v8hi)b);
623}
624
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000625static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000626_mm_madd_epi16(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000627{
628 return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)a, (__v8hi)b);
629}
630
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000631static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000632_mm_max_epi16(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000633{
634 return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)a, (__v8hi)b);
635}
636
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000637static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000638_mm_max_epu8(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000639{
640 return (__m128i)__builtin_ia32_pmaxub128((__v16qi)a, (__v16qi)b);
641}
642
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000643static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000644_mm_min_epi16(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000645{
646 return (__m128i)__builtin_ia32_pminsw128((__v8hi)a, (__v8hi)b);
647}
648
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000649static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000650_mm_min_epu8(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000651{
652 return (__m128i)__builtin_ia32_pminub128((__v16qi)a, (__v16qi)b);
653}
654
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000655static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000656_mm_mulhi_epi16(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000657{
658 return (__m128i)__builtin_ia32_pmulhw128((__v8hi)a, (__v8hi)b);
659}
660
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000661static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000662_mm_mulhi_epu16(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000663{
664 return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)a, (__v8hi)b);
665}
666
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000667static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000668_mm_mullo_epi16(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000669{
670 return (__m128i)__builtin_ia32_pmullw128((__v8hi)a, (__v8hi)b);
671}
672
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000673static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000674_mm_mul_su32(__m64 a, __m64 b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000675{
676 return __builtin_ia32_pmuludq((__v2si)a, (__v2si)b);
677}
678
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000679static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000680_mm_mul_epu32(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000681{
682 return __builtin_ia32_pmuludq128((__v4si)a, (__v4si)b);
683}
684
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000685static inline __m128i __attribute__((__always_inline__, __nodebug__))
Anders Carlsson6bf788b2009-04-06 21:55:22 +0000686_mm_sad_epu8(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000687{
688 return __builtin_ia32_psadbw128((__v16qi)a, (__v16qi)b);
689}
690
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000691static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000692_mm_sub_epi8(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000693{
694 return (__m128i)((__v16qi)a - (__v16qi)b);
695}
696
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000697static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000698_mm_sub_epi16(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000699{
700 return (__m128i)((__v8hi)a - (__v8hi)b);
701}
702
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000703static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000704_mm_sub_epi32(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000705{
706 return (__m128i)((__v4si)a - (__v4si)b);
707}
708
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000709static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000710_mm_sub_si64(__m64 a, __m64 b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000711{
712 return a - b;
713}
714
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000715static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000716_mm_sub_epi64(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000717{
718 return a - b;
719}
720
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000721static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000722_mm_subs_epi8(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000723{
724 return (__m128i)__builtin_ia32_psubsb128((__v16qi)a, (__v16qi)b);
725}
726
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000727static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000728_mm_subs_epi16(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000729{
730 return (__m128i)__builtin_ia32_psubsw128((__v8hi)a, (__v8hi)b);
731}
732
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000733static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000734_mm_subs_epu8(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000735{
736 return (__m128i)__builtin_ia32_psubusb128((__v16qi)a, (__v16qi)b);
737}
738
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000739static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000740_mm_subs_epu16(__m128i a, __m128i b)
Anders Carlsson7efc24b2008-12-24 02:41:00 +0000741{
742 return (__m128i)__builtin_ia32_psubusw128((__v8hi)a, (__v8hi)b);
743}
744
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000745static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000746_mm_and_si128(__m128i a, __m128i b)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000747{
748 return __builtin_ia32_pand128(a, b);
749}
750
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000751static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000752_mm_andnot_si128(__m128i a, __m128i b)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000753{
754 return __builtin_ia32_pandn128(a, b);
755}
756
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000757static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000758_mm_or_si128(__m128i a, __m128i b)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000759{
760 return __builtin_ia32_por128(a, b);
761}
762
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000763static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000764_mm_xor_si128(__m128i a, __m128i b)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000765{
766 return __builtin_ia32_pxor128(a, b);
767}
768
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000769static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000770_mm_slli_si128(__m128i a, int imm)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000771{
772 return __builtin_ia32_pslldqi128(a, imm * 8);
773}
774
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000775static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000776_mm_slli_epi16(__m128i a, int count)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000777{
778 return (__m128i)__builtin_ia32_psllwi128((__v8hi)a, count);
779}
780
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000781static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000782_mm_sll_epi16(__m128i a, __m128i count)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000783{
784 return (__m128i)__builtin_ia32_psllw128((__v8hi)a, (__v8hi)count);
785}
786
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000787static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000788_mm_slli_epi32(__m128i a, int count)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000789{
790 return (__m128i)__builtin_ia32_pslldi128((__v4si)a, count);
791}
792
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000793static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000794_mm_sll_epi32(__m128i a, __m128i count)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000795{
796 return (__m128i)__builtin_ia32_pslld128((__v4si)a, (__v4si)count);
797}
798
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000799static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000800_mm_slli_epi64(__m128i a, int count)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000801{
802 return __builtin_ia32_psllqi128(a, count);
803}
804
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000805static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000806_mm_sll_epi64(__m128i a, __m128i count)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000807{
808 return __builtin_ia32_psllq128(a, count);
809}
810
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000811static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000812_mm_srai_epi16(__m128i a, int count)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000813{
814 return (__m128i)__builtin_ia32_psrawi128((__v8hi)a, count);
815}
816
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000817static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000818_mm_sra_epi16(__m128i a, __m128i count)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000819{
820 return (__m128i)__builtin_ia32_psraw128((__v8hi)a, (__v8hi)count);
821}
822
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000823static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000824_mm_srai_epi32(__m128i a, int count)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000825{
826 return (__m128i)__builtin_ia32_psradi128((__v4si)a, count);
827}
828
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000829static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000830_mm_sra_epi32(__m128i a, __m128i count)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000831{
832 return (__m128i)__builtin_ia32_psrad128((__v4si)a, (__v4si)count);
833}
834
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000835static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000836_mm_srli_si128(__m128i a, int imm)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000837{
838 return __builtin_ia32_psrldqi128(a, imm * 8);
839}
840
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000841static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000842_mm_srli_epi16(__m128i a, int count)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000843{
844 return (__m128i)__builtin_ia32_psrlwi128((__v8hi)a, count);
845}
846
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000847static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000848_mm_srl_epi16(__m128i a, __m128i count)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000849{
850 return (__m128i)__builtin_ia32_psrlw128((__v8hi)a, (__v8hi)count);
851}
852
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000853static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000854_mm_srli_epi32(__m128i a, int count)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000855{
856 return (__m128i)__builtin_ia32_psrldi128((__v4si)a, count);
857}
858
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000859static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000860_mm_srl_epi32(__m128i a, __m128i count)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000861{
862 return (__m128i)__builtin_ia32_psrld128((__v4si)a, (__v4si)count);
863}
864
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000865static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000866_mm_srli_epi64(__m128i a, int count)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000867{
868 return __builtin_ia32_psrlqi128(a, count);
869}
870
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000871static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000872_mm_srl_epi64(__m128i a, __m128i count)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000873{
874 return __builtin_ia32_psrlq128(a, count);
875}
876
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000877static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000878_mm_cmpeq_epi8(__m128i a, __m128i b)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000879{
880 return (__m128i)__builtin_ia32_pcmpeqb128((__v16qi)a, (__v16qi)b);
881}
882
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000883static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000884_mm_cmpeq_epi16(__m128i a, __m128i b)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000885{
886 return (__m128i)__builtin_ia32_pcmpeqw128((__v8hi)a, (__v8hi)b);
887}
888
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000889static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000890_mm_cmpeq_epi32(__m128i a, __m128i b)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000891{
892 return (__m128i)__builtin_ia32_pcmpeqd128((__v4si)a, (__v4si)b);
893}
894
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000895static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000896_mm_cmpgt_epi8(__m128i a, __m128i b)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000897{
898 return (__m128i)__builtin_ia32_pcmpgtb128((__v16qi)a, (__v16qi)b);
899}
900
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000901static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000902_mm_cmpgt_epi16(__m128i a, __m128i b)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000903{
904 return (__m128i)__builtin_ia32_pcmpgtw128((__v8hi)a, (__v8hi)b);
905}
906
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000907static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000908_mm_cmpgt_epi32(__m128i a, __m128i b)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000909{
910 return (__m128i)__builtin_ia32_pcmpgtd128((__v4si)a, (__v4si)b);
911}
912
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000913static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000914_mm_cmplt_epi8(__m128i a, __m128i b)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000915{
916 return (__m128i)__builtin_ia32_pcmpgtb128((__v16qi)b, (__v16qi)a);
917}
918
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000919static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000920_mm_cmplt_epi16(__m128i a, __m128i b)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000921{
922 return (__m128i)__builtin_ia32_pcmpgtw128((__v8hi)b, (__v8hi)a);
923}
924
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000925static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000926_mm_cmplt_epi32(__m128i a, __m128i b)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000927{
928 return (__m128i)__builtin_ia32_pcmpgtd128((__v4si)b, (__v4si)a);
929}
930
931#ifdef __x86_64__
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000932static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000933_mm_cvtsi64_sd(__m128d a, long long b)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000934{
935 return __builtin_ia32_cvtsi642sd(a, b);
936}
937
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000938static inline long long __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000939_mm_cvtsd_si64(__m128d a)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000940{
941 return __builtin_ia32_cvtsd2si64(a);
942}
943
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000944static inline long long __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000945_mm_cvttsd_si64(__m128d a)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000946{
947 return __builtin_ia32_cvttsd2si64(a);
948}
949#endif
950
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000951static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000952_mm_cvtepi32_ps(__m128i a)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000953{
954 return __builtin_ia32_cvtdq2ps((__v4si)a);
955}
956
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000957static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000958_mm_cvtps_epi32(__m128 a)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000959{
960 return (__m128i)__builtin_ia32_cvtps2dq(a);
961}
962
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000963static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000964_mm_cvttps_epi32(__m128 a)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000965{
966 return (__m128i)__builtin_ia32_cvttps2dq(a);
967}
968
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000969static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000970_mm_cvtsi32_si128(int a)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000971{
972 return (__m128i)(__v4si){ a, 0, 0, 0 };
973}
974
975#ifdef __x86_64__
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000976static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000977_mm_cvtsi64_si128(long long a)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000978{
979 return (__m128i){ a, 0 };
980}
981#endif
982
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000983static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000984_mm_cvtsi128_si32(__m128i a)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000985{
986 __v4si b = (__v4si)a;
987 return b[0];
988}
989
990#ifdef __x86_64__
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000991static inline long long __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000992_mm_cvtsi128_si64(__m128i a)
Anders Carlsson3cb886b2008-12-25 23:48:58 +0000993{
994 return a[0];
995}
996#endif
997
Anders Carlsson8a9574b2009-02-14 01:00:11 +0000998static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +0000999_mm_load_si128(__m128i const *p)
Anders Carlsson3cb886b2008-12-25 23:48:58 +00001000{
1001 return *p;
1002}
1003
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001004static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001005_mm_loadu_si128(__m128i const *p)
Anders Carlsson3cb886b2008-12-25 23:48:58 +00001006{
1007 return (__m128i)__builtin_ia32_loaddqu((char const *)p);
1008}
1009
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001010static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001011_mm_loadl_epi64(__m128i const *p)
Anders Carlsson3cb886b2008-12-25 23:48:58 +00001012{
1013 return (__m128i)__builtin_ia32_loadlv4si((__v2si *)p);
1014}
1015
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001016static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001017_mm_set_epi64(__m64 q1, __m64 q0)
Anders Carlsson3cb886b2008-12-25 23:48:58 +00001018{
1019 return (__m128i){ (long long)q0, (long long)q1 };
1020}
1021
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001022static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001023_mm_set_epi32(int i3, int i2, int i1, int i0)
Anders Carlsson3cb886b2008-12-25 23:48:58 +00001024{
1025 return (__m128i)(__v4si){ i0, i1, i2, i3};
1026}
1027
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001028static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001029_mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0)
Anders Carlsson3cb886b2008-12-25 23:48:58 +00001030{
1031 return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
1032}
1033
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001034static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001035_mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
Anders Carlsson3cb886b2008-12-25 23:48:58 +00001036{
1037 return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
1038}
1039
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001040static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001041_mm_set1_epi64(__m64 q)
Anders Carlsson3cb886b2008-12-25 23:48:58 +00001042{
1043 return (__m128i){ (long long)q, (long long)q };
1044}
1045
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001046static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001047_mm_set1_epi32(int i)
Anders Carlsson3cb886b2008-12-25 23:48:58 +00001048{
1049 return (__m128i)(__v4si){ i, i, i, i };
1050}
1051
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001052static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001053_mm_set1_epi16(short w)
Anders Carlsson3cb886b2008-12-25 23:48:58 +00001054{
1055 return (__m128i)(__v8hi){ w, w, w, w, w, w, w, w };
1056}
1057
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001058static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001059_mm_set1_epi8(char b)
Anders Carlsson3cb886b2008-12-25 23:48:58 +00001060{
1061 return (__m128i)(__v16qi){ b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, b };
1062}
1063
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001064static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001065_mm_setr_epi64(__m64 q0, __m64 q1)
Anders Carlsson3cb886b2008-12-25 23:48:58 +00001066{
1067 return (__m128i){ (long long)q0, (long long)q1 };
1068}
1069
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001070static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001071_mm_setr_epi32(int i0, int i1, int i2, int i3)
Anders Carlsson3cb886b2008-12-25 23:48:58 +00001072{
1073 return (__m128i)(__v4si){ i0, i1, i2, i3};
1074}
1075
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001076static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001077_mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7)
Anders Carlsson3cb886b2008-12-25 23:48:58 +00001078{
1079 return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
1080}
1081
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001082static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001083_mm_setr_epi8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15)
Anders Carlsson3cb886b2008-12-25 23:48:58 +00001084{
1085 return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
1086}
1087
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001088static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001089_mm_setzero_si128(void)
Anders Carlsson3cb886b2008-12-25 23:48:58 +00001090{
1091 return (__m128i){ 0LL, 0LL };
1092}
1093
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001094static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001095_mm_store_si128(__m128i *p, __m128i b)
Anders Carlsson3cb886b2008-12-25 23:48:58 +00001096{
1097 *p = b;
1098}
1099
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001100static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001101_mm_storeu_si128(__m128i *p, __m128i b)
Anders Carlsson3cb886b2008-12-25 23:48:58 +00001102{
1103 __builtin_ia32_storedqu((char *)p, (__v16qi)b);
1104}
1105
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001106static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001107_mm_maskmoveu_si128(__m128i d, __m128i n, char *p)
Anders Carlsson3cb886b2008-12-25 23:48:58 +00001108{
1109 __builtin_ia32_maskmovdqu((__v16qi)d, (__v16qi)n, p);
1110}
1111
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001112static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001113_mm_storel_epi64(__m128i *p, __m128i a)
Anders Carlsson3cb886b2008-12-25 23:48:58 +00001114{
1115 __builtin_ia32_storelv4si((__v2si *)p, a);
1116}
1117
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001118static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001119_mm_stream_pd(double *p, __m128d a)
Anders Carlsson3cb886b2008-12-25 23:48:58 +00001120{
1121 __builtin_ia32_movntpd(p, a);
1122}
1123
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001124static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001125_mm_stream_si128(__m128i *p, __m128i a)
Anders Carlsson3cb886b2008-12-25 23:48:58 +00001126{
1127 __builtin_ia32_movntdq(p, a);
1128}
1129
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001130static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001131_mm_stream_si32(int *p, int a)
Anders Carlsson3cb886b2008-12-25 23:48:58 +00001132{
1133 __builtin_ia32_movnti(p, a);
1134}
1135
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001136static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001137_mm_clflush(void const *p)
Anders Carlsson3cb886b2008-12-25 23:48:58 +00001138{
1139 __builtin_ia32_clflush(p);
1140}
1141
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001142static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001143_mm_lfence(void)
Anders Carlsson3cb886b2008-12-25 23:48:58 +00001144{
1145 __builtin_ia32_lfence();
1146}
1147
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001148static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001149_mm_mfence(void)
Anders Carlsson3cb886b2008-12-25 23:48:58 +00001150{
1151 __builtin_ia32_mfence();
1152}
1153
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001154static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001155_mm_packs_epi16(__m128i a, __m128i b)
Anders Carlsson430f9392008-12-26 00:45:50 +00001156{
1157 return (__m128i)__builtin_ia32_packsswb128((__v8hi)a, (__v8hi)b);
1158}
1159
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001160static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001161_mm_packs_epi32(__m128i a, __m128i b)
Anders Carlsson430f9392008-12-26 00:45:50 +00001162{
1163 return (__m128i)__builtin_ia32_packssdw128((__v4si)a, (__v4si)b);
1164}
1165
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001166static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001167_mm_packus_epi16(__m128i a, __m128i b)
Anders Carlsson430f9392008-12-26 00:45:50 +00001168{
1169 return (__m128i)__builtin_ia32_packuswb128((__v8hi)a, (__v8hi)b);
1170}
1171
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001172static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001173_mm_extract_epi16(__m128i a, int imm)
Anders Carlsson430f9392008-12-26 00:45:50 +00001174{
1175 __v8hi b = (__v8hi)a;
1176 return b[imm];
1177}
1178
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001179static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001180_mm_insert_epi16(__m128i a, int b, int imm)
Anders Carlsson430f9392008-12-26 00:45:50 +00001181{
1182 return (__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)a, b, imm);
1183}
1184
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001185static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001186_mm_movemask_epi8(__m128i a)
Anders Carlsson430f9392008-12-26 00:45:50 +00001187{
1188 return __builtin_ia32_pmovmskb128((__v16qi)a);
1189}
1190
Anders Carlsson7d4cd092008-12-26 00:50:47 +00001191#define _mm_shuffle_epi32(a, imm) ((__m128i)__builtin_ia32_pshufd((__v4si)(a), (imm)))
1192#define _mm_shufflehi_epi16(a, imm) ((__m128i)__builtin_ia32_pshufhw((__v8hi)(a), (imm)))
1193#define _mm_shufflelo_epi16(a, imm) ((__m128i)__builtin_ia32_pshuflw((__v8hi)(a), (imm)))
Anders Carlsson430f9392008-12-26 00:45:50 +00001194
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001195static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001196_mm_unpackhi_epi8(__m128i a, __m128i b)
Anders Carlsson430f9392008-12-26 00:45:50 +00001197{
Anders Carlsson7d4cd092008-12-26 00:50:47 +00001198 return (__m128i)__builtin_shufflevector((__v16qi)a, (__v16qi)b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
Anders Carlsson430f9392008-12-26 00:45:50 +00001199}
1200
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001201static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001202_mm_unpackhi_epi16(__m128i a, __m128i b)
Anders Carlsson430f9392008-12-26 00:45:50 +00001203{
Anders Carlsson7d4cd092008-12-26 00:50:47 +00001204 return (__m128i)__builtin_shufflevector((__v8hi)a, (__v8hi)b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);
Anders Carlsson430f9392008-12-26 00:45:50 +00001205}
1206
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001207static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001208_mm_unpackhi_epi32(__m128i a, __m128i b)
Anders Carlsson430f9392008-12-26 00:45:50 +00001209{
Anders Carlsson7d4cd092008-12-26 00:50:47 +00001210 return (__m128i)__builtin_shufflevector((__v4si)a, (__v4si)b, 2, 4+2, 3, 4+3);
Anders Carlsson430f9392008-12-26 00:45:50 +00001211}
1212
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001213static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001214_mm_unpackhi_epi64(__m128i a, __m128i b)
Anders Carlsson430f9392008-12-26 00:45:50 +00001215{
Anders Carlsson7d4cd092008-12-26 00:50:47 +00001216 return (__m128i)__builtin_shufflevector(a, b, 1, 2+1);
Anders Carlsson430f9392008-12-26 00:45:50 +00001217}
1218
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001219static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001220_mm_unpacklo_epi8(__m128i a, __m128i b)
Anders Carlsson430f9392008-12-26 00:45:50 +00001221{
Anders Carlsson7d4cd092008-12-26 00:50:47 +00001222 return (__m128i)__builtin_shufflevector((__v16qi)a, (__v16qi)b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);
Anders Carlsson430f9392008-12-26 00:45:50 +00001223}
1224
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001225static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001226_mm_unpacklo_epi16(__m128i a, __m128i b)
Anders Carlsson430f9392008-12-26 00:45:50 +00001227{
Anders Carlsson7d4cd092008-12-26 00:50:47 +00001228 return (__m128i)__builtin_shufflevector((__v8hi)a, (__v8hi)b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);
Anders Carlsson430f9392008-12-26 00:45:50 +00001229}
1230
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001231static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001232_mm_unpacklo_epi32(__m128i a, __m128i b)
Anders Carlsson430f9392008-12-26 00:45:50 +00001233{
Anders Carlsson7d4cd092008-12-26 00:50:47 +00001234 return (__m128i)__builtin_shufflevector((__v4si)a, (__v4si)b, 0, 4+0, 1, 4+1);
Anders Carlsson430f9392008-12-26 00:45:50 +00001235}
1236
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001237static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001238_mm_unpacklo_epi64(__m128i a, __m128i b)
Anders Carlsson430f9392008-12-26 00:45:50 +00001239{
Anders Carlsson7d4cd092008-12-26 00:50:47 +00001240 return (__m128i)__builtin_shufflevector(a, b, 0, 2+0);
Anders Carlsson430f9392008-12-26 00:45:50 +00001241}
1242
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001243static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001244_mm_movepi64_pi64(__m128i a)
Anders Carlsson430f9392008-12-26 00:45:50 +00001245{
1246 return (__m64)a[0];
1247}
1248
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001249static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001250_mm_movpi64_pi64(__m64 a)
Anders Carlsson430f9392008-12-26 00:45:50 +00001251{
1252 return (__m128i){ (long long)a, 0 };
1253}
1254
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001255static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001256_mm_move_epi64(__m128i a)
Anders Carlsson430f9392008-12-26 00:45:50 +00001257{
1258 return (__m128i){ a[0], 0 };
1259}
1260
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001261static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001262_mm_unpackhi_pd(__m128d a, __m128d b)
Anders Carlsson430f9392008-12-26 00:45:50 +00001263{
Anders Carlsson7d4cd092008-12-26 00:50:47 +00001264 return __builtin_shufflevector(a, b, 1, 2+1);
Anders Carlsson430f9392008-12-26 00:45:50 +00001265}
1266
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001267static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001268_mm_unpacklo_pd(__m128d a, __m128d b)
Anders Carlsson430f9392008-12-26 00:45:50 +00001269{
Anders Carlsson7d4cd092008-12-26 00:50:47 +00001270 return __builtin_shufflevector(a, b, 0, 2+0);
Anders Carlsson430f9392008-12-26 00:45:50 +00001271}
1272
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001273static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001274_mm_movemask_pd(__m128d a)
Anders Carlsson430f9392008-12-26 00:45:50 +00001275{
1276 return __builtin_ia32_movmskpd(a);
1277}
1278
1279#define _mm_shuffle_pd(a, b, i) (__builtin_ia32_shufpd((a), (b), (i)))
1280
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001281static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001282_mm_castpd_ps(__m128d in)
Anders Carlsson430f9392008-12-26 00:45:50 +00001283{
1284 return (__m128)in;
1285}
1286
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001287static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001288_mm_castpd_si128(__m128d in)
Anders Carlsson430f9392008-12-26 00:45:50 +00001289{
1290 return (__m128i)in;
1291}
1292
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001293static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001294_mm_castps_pd(__m128 in)
Anders Carlsson430f9392008-12-26 00:45:50 +00001295{
1296 return (__m128d)in;
1297}
1298
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001299static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001300_mm_castps_si128(__m128 in)
Anders Carlsson430f9392008-12-26 00:45:50 +00001301{
1302 return (__m128i)in;
1303}
1304
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001305static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001306_mm_castsi128_ps(__m128i in)
Anders Carlsson430f9392008-12-26 00:45:50 +00001307{
1308 return (__m128)in;
1309}
1310
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001311static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001312_mm_castsi128_pd(__m128i in)
Anders Carlsson430f9392008-12-26 00:45:50 +00001313{
1314 return (__m128d)in;
1315}
1316
Anders Carlsson8a9574b2009-02-14 01:00:11 +00001317static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump228fbfb2009-02-13 14:24:50 +00001318_mm_pause(void)
Anders Carlsson6e593602008-12-26 00:49:43 +00001319{
Anders Carlsson06acefb2008-12-26 02:22:10 +00001320 __asm__ volatile ("pause");
Anders Carlsson6e593602008-12-26 00:49:43 +00001321}
1322
Anders Carlsson79addd12009-01-21 01:49:39 +00001323#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
Anders Carlsson6e593602008-12-26 00:49:43 +00001324
Anders Carlssonc7fee2c2008-12-24 01:45:22 +00001325#endif /* __SSE2__ */
1326
1327#endif /* __EMMINTRIN_H */