blob: 049e19c070122e8f0f222b64413ef2483bb1fc09 [file] [log] [blame]
Anders Carlsson37f2f002008-12-24 01:45:22 +00001/*===---- xmmintrin.h - SSE intrinsics -------------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __EMMINTRIN_H
25#define __EMMINTRIN_H
26
27#ifndef __SSE2__
28#error "SSE2 instruction set not enabled"
29#else
30
31#include <xmmintrin.h>
32
33typedef double __m128d __attribute__((__vector_size__(16)));
34typedef long long __m128i __attribute__((__vector_size__(16)));
35
36typedef int __v4si __attribute__((__vector_size__(16)));
Anders Carlsson07603aa2008-12-24 02:41:00 +000037typedef short __v8hi __attribute__((__vector_size__(16)));
Anders Carlsson445afa02008-12-24 02:11:54 +000038typedef char __v16qi __attribute__((__vector_size__(16)));
Anders Carlsson37f2f002008-12-24 01:45:22 +000039
Anders Carlssona2f12ae2009-02-14 01:00:11 +000040static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000041_mm_add_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000042{
43 return __builtin_ia32_addsd(a, b);
44}
45
Anders Carlssona2f12ae2009-02-14 01:00:11 +000046static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000047_mm_add_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000048{
49 return a + b;
50}
51
Anders Carlssona2f12ae2009-02-14 01:00:11 +000052static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000053_mm_sub_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000054{
55 return __builtin_ia32_subsd(a, b);
56}
57
Anders Carlssona2f12ae2009-02-14 01:00:11 +000058static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000059_mm_sub_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000060{
61 return a - b;
62}
63
Anders Carlssona2f12ae2009-02-14 01:00:11 +000064static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000065_mm_mul_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000066{
67 return __builtin_ia32_mulsd(a, b);
68}
69
Anders Carlssona2f12ae2009-02-14 01:00:11 +000070static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000071_mm_mul_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000072{
73 return a * b;
74}
75
Anders Carlssona2f12ae2009-02-14 01:00:11 +000076static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000077_mm_div_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000078{
79 return __builtin_ia32_divsd(a, b);
80}
81
Anders Carlssona2f12ae2009-02-14 01:00:11 +000082static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000083_mm_div_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000084{
85 return a / b;
86}
87
Anders Carlssona2f12ae2009-02-14 01:00:11 +000088static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000089_mm_sqrt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +000090{
91 __m128d c = __builtin_ia32_sqrtsd(b);
92 return (__m128d) { c[0], a[1] };
93}
94
Anders Carlssona2f12ae2009-02-14 01:00:11 +000095static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000096_mm_sqrt_pd(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +000097{
98 return __builtin_ia32_sqrtpd(a);
99}
100
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000101static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000102_mm_min_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000103{
104 return __builtin_ia32_minsd(a, b);
105}
106
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000107static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000108_mm_min_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000109{
110 return __builtin_ia32_minpd(a, b);
111}
112
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000113static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000114_mm_max_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000115{
116 return __builtin_ia32_maxsd(a, b);
117}
118
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000119static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000120_mm_max_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000121{
122 return __builtin_ia32_maxpd(a, b);
123}
124
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000125static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000126_mm_and_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000127{
128 return __builtin_ia32_andpd(a, b);
129}
130
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000131static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000132_mm_andnot_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000133{
134 return __builtin_ia32_andnpd(a, b);
135}
136
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000137static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000138_mm_or_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000139{
140 return __builtin_ia32_orpd(a, b);
141}
142
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000143static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000144_mm_xor_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000145{
146 return __builtin_ia32_xorpd(a, b);
147}
148
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000149static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000150_mm_cmpeq_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000151{
152 return (__m128d)__builtin_ia32_cmpeqpd(a, b);
153}
154
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000155static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000156_mm_cmplt_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000157{
158 return (__m128d)__builtin_ia32_cmpltpd(a, b);
159}
160
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000161static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000162_mm_cmple_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000163{
164 return (__m128d)__builtin_ia32_cmplepd(a, b);
165}
166
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000167static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000168_mm_cmpgt_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000169{
170 return (__m128d)__builtin_ia32_cmpltpd(b, a);
171}
172
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000173static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000174_mm_cmpge_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000175{
176 return (__m128d)__builtin_ia32_cmplepd(b, a);
177}
178
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000179static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000180_mm_cmpord_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000181{
182 return (__m128d)__builtin_ia32_cmpordpd(a, b);
183}
184
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000185static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000186_mm_cmpunord_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000187{
188 return (__m128d)__builtin_ia32_cmpunordpd(a, b);
189}
190
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000191static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000192_mm_cmpneq_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000193{
194 return (__m128d)__builtin_ia32_cmpneqpd(a, b);
195}
196
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000197static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000198_mm_cmpnlt_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000199{
200 return (__m128d)__builtin_ia32_cmpnltpd(a, b);
201}
202
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000203static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000204_mm_cmpnle_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000205{
206 return (__m128d)__builtin_ia32_cmpnlepd(a, b);
207}
208
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000209static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000210_mm_cmpngt_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000211{
212 return (__m128d)__builtin_ia32_cmpnltpd(b, a);
213}
214
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000215static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000216_mm_cmpnge_pd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000217{
218 return (__m128d)__builtin_ia32_cmpnlepd(b, a);
219}
220
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000221static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000222_mm_cmpeq_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000223{
224 return (__m128d)__builtin_ia32_cmpeqsd(a, b);
225}
226
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000227static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000228_mm_cmplt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000229{
230 return (__m128d)__builtin_ia32_cmpltsd(a, b);
231}
232
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000233static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000234_mm_cmple_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000235{
236 return (__m128d)__builtin_ia32_cmplesd(a, b);
237}
238
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000239static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000240_mm_cmpgt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000241{
242 return (__m128d)__builtin_ia32_cmpltsd(b, a);
243}
244
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000245static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000246_mm_cmpge_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000247{
248 return (__m128d)__builtin_ia32_cmplesd(b, a);
249}
250
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000251static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000252_mm_cmpord_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000253{
254 return (__m128d)__builtin_ia32_cmpordsd(a, b);
255}
256
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000257static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000258_mm_cmpunord_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000259{
260 return (__m128d)__builtin_ia32_cmpunordsd(a, b);
261}
262
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000263static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000264_mm_cmpneq_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000265{
266 return (__m128d)__builtin_ia32_cmpneqsd(a, b);
267}
268
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000269static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000270_mm_cmpnlt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000271{
272 return (__m128d)__builtin_ia32_cmpnltsd(a, b);
273}
274
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000275static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000276_mm_cmpnle_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000277{
278 return (__m128d)__builtin_ia32_cmpnlesd(a, b);
279}
280
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000281static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000282_mm_cmpngt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000283{
284 return (__m128d)__builtin_ia32_cmpnltsd(b, a);
285}
286
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000287static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000288_mm_cmpnge_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000289{
290 return (__m128d)__builtin_ia32_cmpnlesd(b, a);
291}
292
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000293static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000294_mm_comieq_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000295{
296 return __builtin_ia32_comisdeq(a, b);
297}
298
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000299static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000300_mm_comilt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000301{
302 return __builtin_ia32_comisdlt(a, b);
303}
304
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000305static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000306_mm_comile_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000307{
308 return __builtin_ia32_comisdle(a, b);
309}
310
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000311static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000312_mm_comigt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000313{
314 return __builtin_ia32_comisdgt(a, b);
315}
316
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000317static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000318_mm_comineq_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000319{
320 return __builtin_ia32_comisdneq(a, b);
321}
322
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000323static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000324_mm_ucomieq_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000325{
326 return __builtin_ia32_ucomisdeq(a, b);
327}
328
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000329static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000330_mm_ucomilt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000331{
332 return __builtin_ia32_ucomisdlt(a, b);
333}
334
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000335static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000336_mm_ucomile_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000337{
338 return __builtin_ia32_ucomisdle(a, b);
339}
340
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000341static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000342_mm_ucomigt_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000343{
344 return __builtin_ia32_ucomisdgt(a, b);
345}
346
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000347static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000348_mm_ucomineq_sd(__m128d a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000349{
350 return __builtin_ia32_ucomisdneq(a, b);
351}
352
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000353static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000354_mm_cvtpd_ps(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000355{
356 return __builtin_ia32_cvtpd2ps(a);
357}
358
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000359static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000360_mm_cvtps_pd(__m128 a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000361{
362 return __builtin_ia32_cvtps2pd(a);
363}
364
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000365static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000366_mm_cvtepi32_pd(__m128i a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000367{
368 return __builtin_ia32_cvtdq2pd((__v4si)a);
369}
370
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000371static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000372_mm_cvtpd_epi32(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000373{
374 return __builtin_ia32_cvtpd2dq(a);
375}
376
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000377static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000378_mm_cvtsd_si32(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000379{
380 return __builtin_ia32_cvtsd2si(a);
381}
382
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000383static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000384_mm_cvtsd_ss(__m128 a, __m128d b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000385{
386 return __builtin_ia32_cvtsd2ss(a, b);
387}
388
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000389static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000390_mm_cvtsi32_sd(__m128d a, int b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000391{
392 return __builtin_ia32_cvtsi2sd(a, b);
393}
394
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000395static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000396_mm_cvtss_sd(__m128d a, __m128 b)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000397{
398 return __builtin_ia32_cvtss2sd(a, b);
399}
400
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000401static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000402_mm_cvttpd_epi32(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000403{
404 return (__m128i)__builtin_ia32_cvttpd2dq(a);
405}
406
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000407static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000408_mm_cvttsd_si32(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000409{
410 return __builtin_ia32_cvttsd2si(a);
411}
412
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000413static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000414_mm_cvtpd_pi32(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000415{
416 return (__m64)__builtin_ia32_cvtpd2pi(a);
417}
418
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000419static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000420_mm_cvttpd_pi32(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000421{
422 return (__m64)__builtin_ia32_cvttpd2pi(a);
423}
424
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000425static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000426_mm_cvtpi32_pd(__m64 a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000427{
428 return __builtin_ia32_cvtpi2pd((__v2si)a);
429}
430
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000431static inline double __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000432_mm_cvtsd_f64(__m128d a)
Anders Carlsson37f2f002008-12-24 01:45:22 +0000433{
434 return a[0];
435}
436
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000437static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000438_mm_load_pd(double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000439{
440 return *(__m128d*)dp;
441}
442
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000443static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000444_mm_load1_pd(double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000445{
446 return (__m128d){ dp[0], dp[0] };
447}
448
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000449static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000450_mm_loadr_pd(double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000451{
452 return (__m128d){ dp[1], dp[0] };
453}
454
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000455static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000456_mm_loadu_pd(double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000457{
458 return __builtin_ia32_loadupd(dp);
459}
460
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000461static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000462_mm_load_sd(double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000463{
464 return (__m128d){ *dp, 0.0 };
465}
466
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000467static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000468_mm_loadh_pd(__m128d a, double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000469{
470 return __builtin_shufflevector(a, *(__m128d *)dp, 0, 2);
471}
472
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000473static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000474_mm_loadl_pd(__m128d a, double const *dp)
Anders Carlsson445afa02008-12-24 02:11:54 +0000475{
476 return __builtin_shufflevector(a, *(__m128d *)dp, 2, 1);
477}
478
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000479static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000480_mm_set_sd(double w)
Anders Carlsson445afa02008-12-24 02:11:54 +0000481{
482 return (__m128d){ w, 0 };
483}
484
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000485static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000486_mm_set1_pd(double w)
Anders Carlsson445afa02008-12-24 02:11:54 +0000487{
488 return (__m128d){ w, w };
489}
490
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000491static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000492_mm_set_pd(double w, double x)
Anders Carlsson445afa02008-12-24 02:11:54 +0000493{
494 return (__m128d){ w, x };
495}
496
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000497static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000498_mm_setr_pd(double w, double x)
Anders Carlsson445afa02008-12-24 02:11:54 +0000499{
500 return (__m128d){ x, w };
501}
502
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000503static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000504_mm_setzero_pd(void)
Anders Carlsson445afa02008-12-24 02:11:54 +0000505{
506 return (__m128d){ 0, 0 };
507}
508
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000509static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000510_mm_move_sd(__m128d a, __m128d b)
Anders Carlsson445afa02008-12-24 02:11:54 +0000511{
512 return (__m128d){ b[0], a[1] };
513}
514
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000515static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000516_mm_store_sd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000517{
518 dp[0] = a[0];
519}
520
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000521static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000522_mm_store1_pd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000523{
524 dp[0] = a[0];
525 dp[1] = a[0];
526}
527
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000528static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000529_mm_store_pd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000530{
531 *(__m128d *)dp = a;
532}
533
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000534static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000535_mm_storeu_pd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000536{
537 __builtin_ia32_storeupd(dp, a);
538}
539
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000540static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000541_mm_storer_pd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000542{
543 dp[0] = a[1];
544 dp[1] = a[0];
545}
546
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000547static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000548_mm_storeh_pd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000549{
550 dp[0] = a[1];
551}
552
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000553static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000554_mm_storel_pd(double *dp, __m128d a)
Anders Carlsson445afa02008-12-24 02:11:54 +0000555{
556 dp[0] = a[0];
557}
558
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000559static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000560_mm_add_epi8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000561{
562 return (__m128i)((__v16qi)a + (__v16qi)b);
563}
564
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000565static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000566_mm_add_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000567{
568 return (__m128i)((__v8hi)a + (__v8hi)b);
569}
570
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000571static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000572_mm_add_epi32(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000573{
574 return (__m128i)((__v4si)a + (__v4si)b);
575}
576
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000577static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000578_mm_add_si64(__m64 a, __m64 b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000579{
580 return a + b;
581}
582
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000583static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000584_mm_add_epi64(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000585{
586 return a + b;
587}
588
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000589static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000590_mm_adds_epi8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000591{
592 return (__m128i)__builtin_ia32_paddsb128((__v16qi)a, (__v16qi)b);
593}
594
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000595static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000596_mm_adds_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000597{
598 return (__m128i)__builtin_ia32_paddsw128((__v8hi)a, (__v8hi)b);
599}
600
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000601static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000602_mm_adds_epu8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000603{
604 return (__m128i)__builtin_ia32_paddusb128((__v16qi)a, (__v16qi)b);
605}
606
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000607static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000608_mm_adds_epu16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000609{
610 return (__m128i)__builtin_ia32_paddusw128((__v8hi)a, (__v8hi)b);
611}
612
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000613static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000614_mm_avg_epu8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000615{
616 return (__m128i)__builtin_ia32_pavgb128((__v16qi)a, (__v16qi)b);
617}
618
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000619static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000620_mm_avg_epu16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000621{
622 return (__m128i)__builtin_ia32_pavgw128((__v8hi)a, (__v8hi)b);
623}
624
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000625static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000626_mm_madd_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000627{
628 return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)a, (__v8hi)b);
629}
630
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000631static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000632_mm_max_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000633{
634 return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)a, (__v8hi)b);
635}
636
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000637static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000638_mm_max_epu8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000639{
640 return (__m128i)__builtin_ia32_pmaxub128((__v16qi)a, (__v16qi)b);
641}
642
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000643static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000644_mm_min_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000645{
646 return (__m128i)__builtin_ia32_pminsw128((__v8hi)a, (__v8hi)b);
647}
648
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000649static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000650_mm_min_epu8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000651{
652 return (__m128i)__builtin_ia32_pminub128((__v16qi)a, (__v16qi)b);
653}
654
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000655static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000656_mm_mulhi_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000657{
658 return (__m128i)__builtin_ia32_pmulhw128((__v8hi)a, (__v8hi)b);
659}
660
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000661static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000662_mm_mulhi_epu16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000663{
664 return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)a, (__v8hi)b);
665}
666
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000667static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000668_mm_mullo_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000669{
670 return (__m128i)__builtin_ia32_pmullw128((__v8hi)a, (__v8hi)b);
671}
672
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000673static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000674_mm_mul_su32(__m64 a, __m64 b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000675{
676 return __builtin_ia32_pmuludq((__v2si)a, (__v2si)b);
677}
678
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000679static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000680_mm_mul_epu32(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000681{
682 return __builtin_ia32_pmuludq128((__v4si)a, (__v4si)b);
683}
684
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000685static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000686_mm_sad_epu(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000687{
688 return __builtin_ia32_psadbw128((__v16qi)a, (__v16qi)b);
689}
690
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000691static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000692_mm_sub_epi8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000693{
694 return (__m128i)((__v16qi)a - (__v16qi)b);
695}
696
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000697static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000698_mm_sub_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000699{
700 return (__m128i)((__v8hi)a - (__v8hi)b);
701}
702
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000703static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000704_mm_sub_epi32(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000705{
706 return (__m128i)((__v4si)a - (__v4si)b);
707}
708
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000709static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000710_mm_sub_si64(__m64 a, __m64 b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000711{
712 return a - b;
713}
714
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000715static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000716_mm_sub_epi64(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000717{
718 return a - b;
719}
720
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000721static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000722_mm_subs_epi8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000723{
724 return (__m128i)__builtin_ia32_psubsb128((__v16qi)a, (__v16qi)b);
725}
726
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000727static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000728_mm_subs_epi16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000729{
730 return (__m128i)__builtin_ia32_psubsw128((__v8hi)a, (__v8hi)b);
731}
732
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000733static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000734_mm_subs_epu8(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000735{
736 return (__m128i)__builtin_ia32_psubusb128((__v16qi)a, (__v16qi)b);
737}
738
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000739static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000740_mm_subs_epu16(__m128i a, __m128i b)
Anders Carlsson07603aa2008-12-24 02:41:00 +0000741{
742 return (__m128i)__builtin_ia32_psubusw128((__v8hi)a, (__v8hi)b);
743}
744
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000745static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000746_mm_and_si128(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000747{
748 return __builtin_ia32_pand128(a, b);
749}
750
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000751static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000752_mm_andnot_si128(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000753{
754 return __builtin_ia32_pandn128(a, b);
755}
756
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000757static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000758_mm_or_si128(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000759{
760 return __builtin_ia32_por128(a, b);
761}
762
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000763static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000764_mm_xor_si128(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000765{
766 return __builtin_ia32_pxor128(a, b);
767}
768
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000769static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000770_mm_slli_si128(__m128i a, int imm)
Anders Carlsson0727df02008-12-25 23:48:58 +0000771{
772 return __builtin_ia32_pslldqi128(a, imm * 8);
773}
774
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000775static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000776_mm_slli_epi16(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000777{
778 return (__m128i)__builtin_ia32_psllwi128((__v8hi)a, count);
779}
780
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000781static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000782_mm_sll_epi16(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000783{
784 return (__m128i)__builtin_ia32_psllw128((__v8hi)a, (__v8hi)count);
785}
786
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000787static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000788_mm_slli_epi32(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000789{
790 return (__m128i)__builtin_ia32_pslldi128((__v4si)a, count);
791}
792
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000793static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000794_mm_sll_epi32(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000795{
796 return (__m128i)__builtin_ia32_pslld128((__v4si)a, (__v4si)count);
797}
798
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000799static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000800_mm_slli_epi64(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000801{
802 return __builtin_ia32_psllqi128(a, count);
803}
804
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000805static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000806_mm_sll_epi64(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000807{
808 return __builtin_ia32_psllq128(a, count);
809}
810
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000811static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000812_mm_srai_epi16(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000813{
814 return (__m128i)__builtin_ia32_psrawi128((__v8hi)a, count);
815}
816
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000817static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000818_mm_sra_epi16(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000819{
820 return (__m128i)__builtin_ia32_psraw128((__v8hi)a, (__v8hi)count);
821}
822
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000823static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000824_mm_srai_epi32(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000825{
826 return (__m128i)__builtin_ia32_psradi128((__v4si)a, count);
827}
828
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000829static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000830_mm_sra_epi32(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000831{
832 return (__m128i)__builtin_ia32_psrad128((__v4si)a, (__v4si)count);
833}
834
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000835static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000836_mm_srli_si128(__m128i a, int imm)
Anders Carlsson0727df02008-12-25 23:48:58 +0000837{
838 return __builtin_ia32_psrldqi128(a, imm * 8);
839}
840
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000841static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000842_mm_srli_epi16(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000843{
844 return (__m128i)__builtin_ia32_psrlwi128((__v8hi)a, count);
845}
846
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000847static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000848_mm_srl_epi16(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000849{
850 return (__m128i)__builtin_ia32_psrlw128((__v8hi)a, (__v8hi)count);
851}
852
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000853static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000854_mm_srli_epi32(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000855{
856 return (__m128i)__builtin_ia32_psrldi128((__v4si)a, count);
857}
858
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000859static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000860_mm_srl_epi32(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000861{
862 return (__m128i)__builtin_ia32_psrld128((__v4si)a, (__v4si)count);
863}
864
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000865static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000866_mm_srli_epi64(__m128i a, int count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000867{
868 return __builtin_ia32_psrlqi128(a, count);
869}
870
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000871static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000872_mm_srl_epi64(__m128i a, __m128i count)
Anders Carlsson0727df02008-12-25 23:48:58 +0000873{
874 return __builtin_ia32_psrlq128(a, count);
875}
876
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000877static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000878_mm_cmpeq_epi8(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000879{
880 return (__m128i)__builtin_ia32_pcmpeqb128((__v16qi)a, (__v16qi)b);
881}
882
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000883static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000884_mm_cmpeq_epi16(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000885{
886 return (__m128i)__builtin_ia32_pcmpeqw128((__v8hi)a, (__v8hi)b);
887}
888
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000889static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000890_mm_cmpeq_epi32(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000891{
892 return (__m128i)__builtin_ia32_pcmpeqd128((__v4si)a, (__v4si)b);
893}
894
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000895static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000896_mm_cmpgt_epi8(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000897{
898 return (__m128i)__builtin_ia32_pcmpgtb128((__v16qi)a, (__v16qi)b);
899}
900
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000901static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000902_mm_cmpgt_epi16(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000903{
904 return (__m128i)__builtin_ia32_pcmpgtw128((__v8hi)a, (__v8hi)b);
905}
906
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000907static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000908_mm_cmpgt_epi32(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000909{
910 return (__m128i)__builtin_ia32_pcmpgtd128((__v4si)a, (__v4si)b);
911}
912
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000913static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000914_mm_cmplt_epi8(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000915{
916 return (__m128i)__builtin_ia32_pcmpgtb128((__v16qi)b, (__v16qi)a);
917}
918
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000919static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000920_mm_cmplt_epi16(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000921{
922 return (__m128i)__builtin_ia32_pcmpgtw128((__v8hi)b, (__v8hi)a);
923}
924
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000925static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000926_mm_cmplt_epi32(__m128i a, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000927{
928 return (__m128i)__builtin_ia32_pcmpgtd128((__v4si)b, (__v4si)a);
929}
930
931#ifdef __x86_64__
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000932static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000933_mm_cvtsi64_sd(__m128d a, long long b)
Anders Carlsson0727df02008-12-25 23:48:58 +0000934{
935 return __builtin_ia32_cvtsi642sd(a, b);
936}
937
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000938static inline long long __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000939_mm_cvtsd_si64(__m128d a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000940{
941 return __builtin_ia32_cvtsd2si64(a);
942}
943
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000944static inline long long __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000945_mm_cvttsd_si64(__m128d a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000946{
947 return __builtin_ia32_cvttsd2si64(a);
948}
949#endif
950
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000951static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000952_mm_cvtepi32_ps(__m128i a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000953{
954 return __builtin_ia32_cvtdq2ps((__v4si)a);
955}
956
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000957static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000958_mm_cvtps_epi32(__m128 a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000959{
960 return (__m128i)__builtin_ia32_cvtps2dq(a);
961}
962
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000963static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000964_mm_cvttps_epi32(__m128 a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000965{
966 return (__m128i)__builtin_ia32_cvttps2dq(a);
967}
968
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000969static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000970_mm_cvtsi32_si128(int a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000971{
972 return (__m128i)(__v4si){ a, 0, 0, 0 };
973}
974
975#ifdef __x86_64__
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000976static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000977_mm_cvtsi64_si128(long long a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000978{
979 return (__m128i){ a, 0 };
980}
981#endif
982
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000983static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000984_mm_cvtsi128_si32(__m128i a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000985{
986 __v4si b = (__v4si)a;
987 return b[0];
988}
989
990#ifdef __x86_64__
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000991static inline long long __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000992_mm_cvtsi128_si64(__m128i a)
Anders Carlsson0727df02008-12-25 23:48:58 +0000993{
994 return a[0];
995}
996#endif
997
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000998static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000999_mm_load_si128(__m128i const *p)
Anders Carlsson0727df02008-12-25 23:48:58 +00001000{
1001 return *p;
1002}
1003
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001004static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001005_mm_loadu_si128(__m128i const *p)
Anders Carlsson0727df02008-12-25 23:48:58 +00001006{
1007 return (__m128i)__builtin_ia32_loaddqu((char const *)p);
1008}
1009
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001010static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001011_mm_loadl_epi64(__m128i const *p)
Anders Carlsson0727df02008-12-25 23:48:58 +00001012{
1013 return (__m128i)__builtin_ia32_loadlv4si((__v2si *)p);
1014}
1015
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001016static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001017_mm_set_epi64(__m64 q1, __m64 q0)
Anders Carlsson0727df02008-12-25 23:48:58 +00001018{
1019 return (__m128i){ (long long)q0, (long long)q1 };
1020}
1021
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001022static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001023_mm_set_epi32(int i3, int i2, int i1, int i0)
Anders Carlsson0727df02008-12-25 23:48:58 +00001024{
1025 return (__m128i)(__v4si){ i0, i1, i2, i3};
1026}
1027
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001028static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001029_mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0)
Anders Carlsson0727df02008-12-25 23:48:58 +00001030{
1031 return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
1032}
1033
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001034static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001035_mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
Anders Carlsson0727df02008-12-25 23:48:58 +00001036{
1037 return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
1038}
1039
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001040static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001041_mm_set1_epi64(__m64 q)
Anders Carlsson0727df02008-12-25 23:48:58 +00001042{
1043 return (__m128i){ (long long)q, (long long)q };
1044}
1045
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001046static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001047_mm_set1_epi32(int i)
Anders Carlsson0727df02008-12-25 23:48:58 +00001048{
1049 return (__m128i)(__v4si){ i, i, i, i };
1050}
1051
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001052static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001053_mm_set1_epi16(short w)
Anders Carlsson0727df02008-12-25 23:48:58 +00001054{
1055 return (__m128i)(__v8hi){ w, w, w, w, w, w, w, w };
1056}
1057
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001058static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001059_mm_set1_epi8(char b)
Anders Carlsson0727df02008-12-25 23:48:58 +00001060{
1061 return (__m128i)(__v16qi){ b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, b };
1062}
1063
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001064static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001065_mm_setr_epi64(__m64 q0, __m64 q1)
Anders Carlsson0727df02008-12-25 23:48:58 +00001066{
1067 return (__m128i){ (long long)q0, (long long)q1 };
1068}
1069
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001070static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001071_mm_setr_epi32(int i0, int i1, int i2, int i3)
Anders Carlsson0727df02008-12-25 23:48:58 +00001072{
1073 return (__m128i)(__v4si){ i0, i1, i2, i3};
1074}
1075
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001076static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001077_mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7)
Anders Carlsson0727df02008-12-25 23:48:58 +00001078{
1079 return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
1080}
1081
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001082static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001083_mm_setr_epi8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15)
Anders Carlsson0727df02008-12-25 23:48:58 +00001084{
1085 return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
1086}
1087
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001088static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001089_mm_setzero_si128(void)
Anders Carlsson0727df02008-12-25 23:48:58 +00001090{
1091 return (__m128i){ 0LL, 0LL };
1092}
1093
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001094static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001095_mm_store_si128(__m128i *p, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +00001096{
1097 *p = b;
1098}
1099
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001100static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001101_mm_storeu_si128(__m128i *p, __m128i b)
Anders Carlsson0727df02008-12-25 23:48:58 +00001102{
1103 __builtin_ia32_storedqu((char *)p, (__v16qi)b);
1104}
1105
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001106static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001107_mm_maskmoveu_si128(__m128i d, __m128i n, char *p)
Anders Carlsson0727df02008-12-25 23:48:58 +00001108{
1109 __builtin_ia32_maskmovdqu((__v16qi)d, (__v16qi)n, p);
1110}
1111
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001112static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001113_mm_storel_epi64(__m128i *p, __m128i a)
Anders Carlsson0727df02008-12-25 23:48:58 +00001114{
1115 __builtin_ia32_storelv4si((__v2si *)p, a);
1116}
1117
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001118static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001119_mm_stream_pd(double *p, __m128d a)
Anders Carlsson0727df02008-12-25 23:48:58 +00001120{
1121 __builtin_ia32_movntpd(p, a);
1122}
1123
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001124static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001125_mm_stream_si128(__m128i *p, __m128i a)
Anders Carlsson0727df02008-12-25 23:48:58 +00001126{
1127 __builtin_ia32_movntdq(p, a);
1128}
1129
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001130static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001131_mm_stream_si32(int *p, int a)
Anders Carlsson0727df02008-12-25 23:48:58 +00001132{
1133 __builtin_ia32_movnti(p, a);
1134}
1135
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001136static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001137_mm_clflush(void const *p)
Anders Carlsson0727df02008-12-25 23:48:58 +00001138{
1139 __builtin_ia32_clflush(p);
1140}
1141
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001142static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001143_mm_lfence(void)
Anders Carlsson0727df02008-12-25 23:48:58 +00001144{
1145 __builtin_ia32_lfence();
1146}
1147
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001148static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001149_mm_mfence(void)
Anders Carlsson0727df02008-12-25 23:48:58 +00001150{
1151 __builtin_ia32_mfence();
1152}
1153
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001154static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001155_mm_packs_epi16(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001156{
1157 return (__m128i)__builtin_ia32_packsswb128((__v8hi)a, (__v8hi)b);
1158}
1159
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001160static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001161_mm_packs_epi32(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001162{
1163 return (__m128i)__builtin_ia32_packssdw128((__v4si)a, (__v4si)b);
1164}
1165
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001166static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001167_mm_packus_epi16(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001168{
1169 return (__m128i)__builtin_ia32_packuswb128((__v8hi)a, (__v8hi)b);
1170}
1171
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001172static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001173_mm_extract_epi16(__m128i a, int imm)
Anders Carlsson45470752008-12-26 00:45:50 +00001174{
1175 __v8hi b = (__v8hi)a;
1176 return b[imm];
1177}
1178
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001179static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001180_mm_insert_epi16(__m128i a, int b, int imm)
Anders Carlsson45470752008-12-26 00:45:50 +00001181{
1182 return (__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)a, b, imm);
1183}
1184
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001185static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001186_mm_movemask_epi8(__m128i a)
Anders Carlsson45470752008-12-26 00:45:50 +00001187{
1188 return __builtin_ia32_pmovmskb128((__v16qi)a);
1189}
1190
Anders Carlsson92d66862008-12-26 00:50:47 +00001191#define _mm_shuffle_epi32(a, imm) ((__m128i)__builtin_ia32_pshufd((__v4si)(a), (imm)))
1192#define _mm_shufflehi_epi16(a, imm) ((__m128i)__builtin_ia32_pshufhw((__v8hi)(a), (imm)))
1193#define _mm_shufflelo_epi16(a, imm) ((__m128i)__builtin_ia32_pshuflw((__v8hi)(a), (imm)))
Anders Carlsson45470752008-12-26 00:45:50 +00001194
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001195static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001196_mm_unpackhi_epi8(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001197{
Anders Carlsson92d66862008-12-26 00:50:47 +00001198 return (__m128i)__builtin_shufflevector((__v16qi)a, (__v16qi)b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
Anders Carlsson45470752008-12-26 00:45:50 +00001199}
1200
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001201static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001202_mm_unpackhi_epi16(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001203{
Anders Carlsson92d66862008-12-26 00:50:47 +00001204 return (__m128i)__builtin_shufflevector((__v8hi)a, (__v8hi)b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);
Anders Carlsson45470752008-12-26 00:45:50 +00001205}
1206
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001207static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001208_mm_unpackhi_epi32(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001209{
Anders Carlsson92d66862008-12-26 00:50:47 +00001210 return (__m128i)__builtin_shufflevector((__v4si)a, (__v4si)b, 2, 4+2, 3, 4+3);
Anders Carlsson45470752008-12-26 00:45:50 +00001211}
1212
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001213static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001214_mm_unpackhi_epi64(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001215{
Anders Carlsson92d66862008-12-26 00:50:47 +00001216 return (__m128i)__builtin_shufflevector(a, b, 1, 2+1);
Anders Carlsson45470752008-12-26 00:45:50 +00001217}
1218
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001219static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001220_mm_unpacklo_epi8(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001221{
Anders Carlsson92d66862008-12-26 00:50:47 +00001222 return (__m128i)__builtin_shufflevector((__v16qi)a, (__v16qi)b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);
Anders Carlsson45470752008-12-26 00:45:50 +00001223}
1224
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001225static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001226_mm_unpacklo_epi16(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001227{
Anders Carlsson92d66862008-12-26 00:50:47 +00001228 return (__m128i)__builtin_shufflevector((__v8hi)a, (__v8hi)b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);
Anders Carlsson45470752008-12-26 00:45:50 +00001229}
1230
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001231static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001232_mm_unpacklo_epi32(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001233{
Anders Carlsson92d66862008-12-26 00:50:47 +00001234 return (__m128i)__builtin_shufflevector((__v4si)a, (__v4si)b, 0, 4+0, 1, 4+1);
Anders Carlsson45470752008-12-26 00:45:50 +00001235}
1236
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001237static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001238_mm_unpacklo_epi64(__m128i a, __m128i b)
Anders Carlsson45470752008-12-26 00:45:50 +00001239{
Anders Carlsson92d66862008-12-26 00:50:47 +00001240 return (__m128i)__builtin_shufflevector(a, b, 0, 2+0);
Anders Carlsson45470752008-12-26 00:45:50 +00001241}
1242
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001243static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001244_mm_movepi64_pi64(__m128i a)
Anders Carlsson45470752008-12-26 00:45:50 +00001245{
1246 return (__m64)a[0];
1247}
1248
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001249static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001250_mm_movpi64_pi64(__m64 a)
Anders Carlsson45470752008-12-26 00:45:50 +00001251{
1252 return (__m128i){ (long long)a, 0 };
1253}
1254
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001255static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001256_mm_move_epi64(__m128i a)
Anders Carlsson45470752008-12-26 00:45:50 +00001257{
1258 return (__m128i){ a[0], 0 };
1259}
1260
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001261static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001262_mm_unpackhi_pd(__m128d a, __m128d b)
Anders Carlsson45470752008-12-26 00:45:50 +00001263{
Anders Carlsson92d66862008-12-26 00:50:47 +00001264 return __builtin_shufflevector(a, b, 1, 2+1);
Anders Carlsson45470752008-12-26 00:45:50 +00001265}
1266
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001267static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001268_mm_unpacklo_pd(__m128d a, __m128d b)
Anders Carlsson45470752008-12-26 00:45:50 +00001269{
Anders Carlsson92d66862008-12-26 00:50:47 +00001270 return __builtin_shufflevector(a, b, 0, 2+0);
Anders Carlsson45470752008-12-26 00:45:50 +00001271}
1272
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001273static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001274_mm_movemask_pd(__m128d a)
Anders Carlsson45470752008-12-26 00:45:50 +00001275{
1276 return __builtin_ia32_movmskpd(a);
1277}
1278
1279#define _mm_shuffle_pd(a, b, i) (__builtin_ia32_shufpd((a), (b), (i)))
1280
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001281static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001282_mm_castpd_ps(__m128d in)
Anders Carlsson45470752008-12-26 00:45:50 +00001283{
1284 return (__m128)in;
1285}
1286
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001287static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001288_mm_castpd_si128(__m128d in)
Anders Carlsson45470752008-12-26 00:45:50 +00001289{
1290 return (__m128i)in;
1291}
1292
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001293static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001294_mm_castps_pd(__m128 in)
Anders Carlsson45470752008-12-26 00:45:50 +00001295{
1296 return (__m128d)in;
1297}
1298
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001299static inline __m128i __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001300_mm_castps_si128(__m128 in)
Anders Carlsson45470752008-12-26 00:45:50 +00001301{
1302 return (__m128i)in;
1303}
1304
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001305static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001306_mm_castsi128_ps(__m128i in)
Anders Carlsson45470752008-12-26 00:45:50 +00001307{
1308 return (__m128)in;
1309}
1310
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001311static inline __m128d __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001312_mm_castsi128_pd(__m128i in)
Anders Carlsson45470752008-12-26 00:45:50 +00001313{
1314 return (__m128d)in;
1315}
1316
Anders Carlssona2f12ae2009-02-14 01:00:11 +00001317static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +00001318_mm_pause(void)
Anders Carlssonf1bc6602008-12-26 00:49:43 +00001319{
Anders Carlsson4bcd44d2008-12-26 02:22:10 +00001320 __asm__ volatile ("pause");
Anders Carlssonf1bc6602008-12-26 00:49:43 +00001321}
1322
Anders Carlssonbbd1fa22009-01-21 01:49:39 +00001323#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
Anders Carlssonf1bc6602008-12-26 00:49:43 +00001324
Anders Carlsson37f2f002008-12-24 01:45:22 +00001325#endif /* __SSE2__ */
1326
1327#endif /* __EMMINTRIN_H */