blob: f0d252c4566930a3b47f6e0a4416220e1830084f [file] [log] [blame]
Anders Carlsson566d8da2008-12-22 00:01:20 +00001/*===---- xmmintrin.h - SSE intrinsics -------------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __XMMINTRIN_H
25#define __XMMINTRIN_H
26
27#ifndef __SSE__
Anders Carlsson4fd3e632008-12-26 00:57:11 +000028#error "SSE instruction set not enabled"
Anders Carlsson566d8da2008-12-22 00:01:20 +000029#else
30
Anders Carlsson4fcc3132008-12-22 00:48:30 +000031#include <mmintrin.h>
32
Eric Christopher020f1ed2010-03-20 01:08:47 +000033typedef int __v4si __attribute__((__vector_size__(16)));
Anders Carlsson398082e2008-12-22 17:42:23 +000034typedef float __v4sf __attribute__((__vector_size__(16)));
Anders Carlsson566d8da2008-12-22 00:01:20 +000035typedef float __m128 __attribute__((__vector_size__(16)));
36
Anders Carlsson398082e2008-12-22 17:42:23 +000037#include <mm_malloc.h>
Anders Carlsson398082e2008-12-22 17:42:23 +000038
Chris Lattner1bddbcb2010-03-22 18:14:12 +000039static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000040_mm_add_ss(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000041{
Eli Friedman80c80042009-06-06 02:13:04 +000042 a[0] += b[0];
43 return a;
Anders Carlsson566d8da2008-12-22 00:01:20 +000044}
45
Chris Lattner1bddbcb2010-03-22 18:14:12 +000046static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000047_mm_add_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000048{
49 return a + b;
50}
51
Chris Lattner1bddbcb2010-03-22 18:14:12 +000052static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000053_mm_sub_ss(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000054{
Eli Friedman80c80042009-06-06 02:13:04 +000055 a[0] -= b[0];
56 return a;
Anders Carlsson566d8da2008-12-22 00:01:20 +000057}
58
Chris Lattner1bddbcb2010-03-22 18:14:12 +000059static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000060_mm_sub_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000061{
62 return a - b;
63}
64
Chris Lattner1bddbcb2010-03-22 18:14:12 +000065static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000066_mm_mul_ss(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000067{
Eli Friedman80c80042009-06-06 02:13:04 +000068 a[0] *= b[0];
69 return a;
Anders Carlsson566d8da2008-12-22 00:01:20 +000070}
71
Chris Lattner1bddbcb2010-03-22 18:14:12 +000072static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000073_mm_mul_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000074{
75 return a * b;
76}
77
Chris Lattner1bddbcb2010-03-22 18:14:12 +000078static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000079_mm_div_ss(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000080{
Eli Friedman80c80042009-06-06 02:13:04 +000081 a[0] /= b[0];
82 return a;
Anders Carlsson566d8da2008-12-22 00:01:20 +000083}
84
Chris Lattner1bddbcb2010-03-22 18:14:12 +000085static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000086_mm_div_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000087{
88 return a / b;
89}
90
Chris Lattner1bddbcb2010-03-22 18:14:12 +000091static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000092_mm_sqrt_ss(__m128 a)
Anders Carlsson566d8da2008-12-22 00:01:20 +000093{
94 return __builtin_ia32_sqrtss(a);
95}
96
Chris Lattner1bddbcb2010-03-22 18:14:12 +000097static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000098_mm_sqrt_ps(__m128 a)
Anders Carlsson566d8da2008-12-22 00:01:20 +000099{
100 return __builtin_ia32_sqrtps(a);
101}
102
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000103static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000104_mm_rcp_ss(__m128 a)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000105{
106 return __builtin_ia32_rcpss(a);
107}
108
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000109static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000110_mm_rcp_ps(__m128 a)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000111{
112 return __builtin_ia32_rcpps(a);
113}
114
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000115static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000116_mm_rsqrt_ss(__m128 a)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000117{
118 return __builtin_ia32_rsqrtss(a);
119}
120
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000121static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000122_mm_rsqrt_ps(__m128 a)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000123{
124 return __builtin_ia32_rsqrtps(a);
125}
126
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000127static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000128_mm_min_ss(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000129{
130 return __builtin_ia32_minss(a, b);
131}
132
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000133static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000134_mm_min_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000135{
136 return __builtin_ia32_minps(a, b);
137}
138
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000139static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000140_mm_max_ss(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000141{
142 return __builtin_ia32_maxss(a, b);
143}
144
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000145static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000146_mm_max_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000147{
148 return __builtin_ia32_maxps(a, b);
149}
150
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000151static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000152_mm_and_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000153{
Eli Friedman80c80042009-06-06 02:13:04 +0000154 return (__m128)((__v4si)a & (__v4si)b);
Anders Carlsson566d8da2008-12-22 00:01:20 +0000155}
156
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000157static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000158_mm_andnot_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000159{
Eli Friedman80c80042009-06-06 02:13:04 +0000160 return (__m128)(~(__v4si)a & (__v4si)b);
Anders Carlsson566d8da2008-12-22 00:01:20 +0000161}
162
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000163static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000164_mm_or_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000165{
Eli Friedman80c80042009-06-06 02:13:04 +0000166 return (__m128)((__v4si)a | (__v4si)b);
Anders Carlsson566d8da2008-12-22 00:01:20 +0000167}
168
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000169static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000170_mm_xor_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000171{
Chris Lattner2c483452010-01-07 00:36:41 +0000172 return (__m128)((__v4si)a ^ (__v4si)b);
Anders Carlsson566d8da2008-12-22 00:01:20 +0000173}
174
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000175static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000176_mm_cmpeq_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000177{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000178 return (__m128)__builtin_ia32_cmpss(a, b, 0);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000179}
180
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000181static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000182_mm_cmpeq_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000183{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000184 return (__m128)__builtin_ia32_cmpps(a, b, 0);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000185}
186
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000187static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000188_mm_cmplt_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000189{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000190 return (__m128)__builtin_ia32_cmpss(a, b, 1);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000191}
192
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000193static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000194_mm_cmplt_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000195{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000196 return (__m128)__builtin_ia32_cmpps(a, b, 1);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000197}
198
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000199static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000200_mm_cmple_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000201{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000202 return (__m128)__builtin_ia32_cmpss(a, b, 2);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000203}
204
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000205static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000206_mm_cmple_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000207{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000208 return (__m128)__builtin_ia32_cmpps(a, b, 2);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000209}
210
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000211static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000212_mm_cmpgt_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000213{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000214 return (__m128)__builtin_ia32_cmpss(b, a, 1);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000215}
216
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000217static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000218_mm_cmpgt_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000219{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000220 return (__m128)__builtin_ia32_cmpps(b, a, 1);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000221}
222
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000223static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000224_mm_cmpge_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000225{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000226 return (__m128)__builtin_ia32_cmpss(b, a, 2);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000227}
228
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000229static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000230_mm_cmpge_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000231{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000232 return (__m128)__builtin_ia32_cmpps(b, a, 2);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000233}
234
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000235static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000236_mm_cmpneq_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000237{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000238 return (__m128)__builtin_ia32_cmpss(a, b, 4);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000239}
240
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000241static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000242_mm_cmpneq_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000243{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000244 return (__m128)__builtin_ia32_cmpps(a, b, 4);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000245}
246
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000247static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000248_mm_cmpnlt_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000249{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000250 return (__m128)__builtin_ia32_cmpss(a, b, 5);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000251}
252
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000253static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000254_mm_cmpnlt_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000255{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000256 return (__m128)__builtin_ia32_cmpps(a, b, 5);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000257}
258
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000259static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000260_mm_cmpnle_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000261{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000262 return (__m128)__builtin_ia32_cmpss(a, b, 6);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000263}
264
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000265static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000266_mm_cmpnle_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000267{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000268 return (__m128)__builtin_ia32_cmpps(a, b, 6);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000269}
270
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000271static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000272_mm_cmpngt_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000273{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000274 return (__m128)__builtin_ia32_cmpss(b, a, 5);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000275}
276
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000277static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000278_mm_cmpngt_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000279{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000280 return (__m128)__builtin_ia32_cmpps(b, a, 5);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000281}
282
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000283static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000284_mm_cmpnge_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000285{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000286 return (__m128)__builtin_ia32_cmpss(b, a, 6);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000287}
288
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000289static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000290_mm_cmpnge_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000291{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000292 return (__m128)__builtin_ia32_cmpps(b, a, 6);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000293}
294
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000295static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000296_mm_cmpord_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000297{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000298 return (__m128)__builtin_ia32_cmpss(a, b, 7);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000299}
300
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000301static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000302_mm_cmpord_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000303{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000304 return (__m128)__builtin_ia32_cmpps(a, b, 7);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000305}
306
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000307static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000308_mm_cmpunord_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000309{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000310 return (__m128)__builtin_ia32_cmpss(a, b, 3);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000311}
312
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000313static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000314_mm_cmpunord_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000315{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000316 return (__m128)__builtin_ia32_cmpps(a, b, 3);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000317}
318
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000319static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000320_mm_comieq_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000321{
322 return __builtin_ia32_comieq(a, b);
323}
324
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000325static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000326_mm_comilt_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000327{
328 return __builtin_ia32_comilt(a, b);
329}
330
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000331static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000332_mm_comile_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000333{
334 return __builtin_ia32_comile(a, b);
335}
336
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000337static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000338_mm_comigt_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000339{
340 return __builtin_ia32_comigt(a, b);
341}
342
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000343static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000344_mm_comige_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000345{
346 return __builtin_ia32_comige(a, b);
347}
348
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000349static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000350_mm_comineq_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000351{
352 return __builtin_ia32_comineq(a, b);
353}
354
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000355static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000356_mm_ucomieq_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000357{
358 return __builtin_ia32_ucomieq(a, b);
359}
360
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000361static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000362_mm_ucomilt_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000363{
364 return __builtin_ia32_ucomilt(a, b);
365}
366
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000367static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000368_mm_ucomile_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000369{
370 return __builtin_ia32_ucomile(a, b);
371}
372
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000373static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000374_mm_ucomigt_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000375{
376 return __builtin_ia32_ucomigt(a, b);
377}
378
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000379static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000380_mm_ucomige_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000381{
382 return __builtin_ia32_ucomige(a, b);
383}
384
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000385static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000386_mm_ucomineq_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000387{
388 return __builtin_ia32_ucomineq(a, b);
389}
390
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000391static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000392_mm_cvtss_si32(__m128 a)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000393{
394 return __builtin_ia32_cvtss2si(a);
395}
396
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000397static __inline__ int __attribute__((__always_inline__, __nodebug__))
Chris Lattneref5ebf62010-02-16 18:21:25 +0000398_mm_cvt_ss2si(__m128 a)
399{
400 return _mm_cvtss_si32(a);
401}
402
Eli Friedman80c80042009-06-06 02:13:04 +0000403#ifdef __x86_64__
404
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000405static __inline__ long long __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000406_mm_cvtss_si64(__m128 a)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000407{
408 return __builtin_ia32_cvtss2si64(a);
409}
410
Eli Friedman80c80042009-06-06 02:13:04 +0000411#endif
412
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000413static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000414_mm_cvtps_pi32(__m128 a)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000415{
416 return (__m64)__builtin_ia32_cvtps2pi(a);
417}
418
Chandler Carruthfa38c812010-07-22 06:47:28 +0000419static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
420_mm_cvt_ps2pi(__m128 a)
421{
422 return _mm_cvtps_pi32(a);
423}
424
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000425static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000426_mm_cvttss_si32(__m128 a)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000427{
Eli Friedman80c80042009-06-06 02:13:04 +0000428 return a[0];
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000429}
430
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000431static __inline__ int __attribute__((__always_inline__, __nodebug__))
Chris Lattneref5ebf62010-02-16 18:21:25 +0000432_mm_cvtt_ss2si(__m128 a)
433{
434 return _mm_cvttss_si32(a);
435}
436
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000437static __inline__ long long __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000438_mm_cvttss_si64(__m128 a)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000439{
Eli Friedman80c80042009-06-06 02:13:04 +0000440 return a[0];
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000441}
442
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000443static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000444_mm_cvttps_pi32(__m128 a)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000445{
446 return (__m64)__builtin_ia32_cvttps2pi(a);
447}
448
Chandler Carruthfa38c812010-07-22 06:47:28 +0000449static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
450_mm_cvtt_ps2pi(__m128 a)
451{
452 return _mm_cvttps_pi32(a);
453}
454
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000455static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000456_mm_cvtsi32_ss(__m128 a, int b)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000457{
Eli Friedman80c80042009-06-06 02:13:04 +0000458 a[0] = b;
459 return a;
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000460}
461
Chandler Carruthfa38c812010-07-22 06:47:28 +0000462static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
463_mm_cvtt_si2ss(__m128 a, int b)
464{
465 return _mm_cvtsi32_ss(a, b);
466}
467
Anders Carlsson1b76b802008-12-22 01:26:50 +0000468#ifdef __x86_64__
469
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000470static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000471_mm_cvtsi64_ss(__m128 a, long long b)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000472{
Eli Friedman80c80042009-06-06 02:13:04 +0000473 a[0] = b;
474 return a;
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000475}
476
Anders Carlsson1b76b802008-12-22 01:26:50 +0000477#endif
478
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000479static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000480_mm_cvtpi32_ps(__m128 a, __m64 b)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000481{
482 return __builtin_ia32_cvtpi2ps(a, (__v2si)b);
483}
484
Chandler Carruthfa38c812010-07-22 06:47:28 +0000485static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
486_mm_cvtt_pi2ps(__m128 a, __m64 b)
487{
488 return _mm_cvtpi32_ps(a, b);
489}
490
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000491static __inline__ float __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000492_mm_cvtss_f32(__m128 a)
Anders Carlsson1b76b802008-12-22 01:26:50 +0000493{
Anders Carlssona6431dc2008-12-22 07:08:03 +0000494 return a[0];
Anders Carlsson1b76b802008-12-22 01:26:50 +0000495}
496
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000497static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Chris Lattner21b91a32010-02-01 20:14:14 +0000498_mm_loadh_pi(__m128 a, const __m64 *p)
Anders Carlsson97700862008-12-22 02:43:30 +0000499{
Eli Friedmane0ae8bd2009-06-07 07:12:56 +0000500 __m128 b;
501 b[0] = *(float*)p;
502 b[1] = *((float*)p+1);
503 return __builtin_shufflevector(a, b, 0, 1, 4, 5);
Anders Carlsson97700862008-12-22 02:43:30 +0000504}
505
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000506static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Chris Lattner21b91a32010-02-01 20:14:14 +0000507_mm_loadl_pi(__m128 a, const __m64 *p)
Anders Carlsson97700862008-12-22 02:43:30 +0000508{
Eli Friedman80c80042009-06-06 02:13:04 +0000509 __m128 b;
510 b[0] = *(float*)p;
511 b[1] = *((float*)p+1);
Eli Friedmane0ae8bd2009-06-07 07:12:56 +0000512 return __builtin_shufflevector(a, b, 4, 5, 2, 3);
Anders Carlsson97700862008-12-22 02:43:30 +0000513}
514
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000515static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Chris Lattner21b91a32010-02-01 20:14:14 +0000516_mm_load_ss(const float *p)
Anders Carlsson97700862008-12-22 02:43:30 +0000517{
518 return (__m128){ *p, 0, 0, 0 };
519}
520
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000521static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Chris Lattner21b91a32010-02-01 20:14:14 +0000522_mm_load1_ps(const float *p)
Anders Carlsson97700862008-12-22 02:43:30 +0000523{
524 return (__m128){ *p, *p, *p, *p };
525}
526
Eli Friedmandb7351a2009-06-02 05:55:48 +0000527#define _mm_load_ps1(p) _mm_load1_ps(p)
528
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000529static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Chris Lattner21b91a32010-02-01 20:14:14 +0000530_mm_load_ps(const float *p)
Anders Carlsson97700862008-12-22 02:43:30 +0000531{
532 return *(__m128*)p;
533}
534
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000535static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Chris Lattner21b91a32010-02-01 20:14:14 +0000536_mm_loadu_ps(const float *p)
Anders Carlsson97700862008-12-22 02:43:30 +0000537{
538 return __builtin_ia32_loadups(p);
539}
540
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000541static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Chris Lattner21b91a32010-02-01 20:14:14 +0000542_mm_loadr_ps(const float *p)
Anders Carlsson97700862008-12-22 02:43:30 +0000543{
544 __m128 a = _mm_load_ps(p);
545 return __builtin_shufflevector(a, a, 3, 2, 1, 0);
546}
547
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000548static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000549_mm_set_ss(float w)
Anders Carlssona6ba0012008-12-22 02:51:35 +0000550{
551 return (__m128){ w, 0, 0, 0 };
552}
553
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000554static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000555_mm_set1_ps(float w)
Anders Carlssona6ba0012008-12-22 02:51:35 +0000556{
557 return (__m128){ w, w, w, w };
558}
559
Anders Carlsson12868cc2008-12-27 04:26:15 +0000560// Microsoft specific.
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000561static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000562_mm_set_ps1(float w)
Anders Carlsson12868cc2008-12-27 04:26:15 +0000563{
564 return _mm_set1_ps(w);
565}
566
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000567static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000568_mm_set_ps(float z, float y, float x, float w)
Anders Carlssona6ba0012008-12-22 02:51:35 +0000569{
570 return (__m128){ w, x, y, z };
571}
572
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000573static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000574_mm_setr_ps(float z, float y, float x, float w)
Anders Carlssona6ba0012008-12-22 02:51:35 +0000575{
576 return (__m128){ z, y, x, w };
577}
578
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000579static __inline__ __m128 __attribute__((__always_inline__))
Mike Stumpdae44132009-02-13 14:24:50 +0000580_mm_setzero_ps(void)
Anders Carlssona6ba0012008-12-22 02:51:35 +0000581{
582 return (__m128){ 0, 0, 0, 0 };
583}
584
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000585static __inline__ void __attribute__((__always_inline__))
Mike Stumpdae44132009-02-13 14:24:50 +0000586_mm_storeh_pi(__m64 *p, __m128 a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000587{
588 __builtin_ia32_storehps((__v2si *)p, a);
589}
590
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000591static __inline__ void __attribute__((__always_inline__))
Mike Stumpdae44132009-02-13 14:24:50 +0000592_mm_storel_pi(__m64 *p, __m128 a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000593{
594 __builtin_ia32_storelps((__v2si *)p, a);
595}
596
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000597static __inline__ void __attribute__((__always_inline__))
Mike Stumpdae44132009-02-13 14:24:50 +0000598_mm_store_ss(float *p, __m128 a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000599{
600 *p = a[0];
601}
602
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000603static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000604_mm_storeu_ps(float *p, __m128 a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000605{
606 __builtin_ia32_storeups(p, a);
607}
608
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000609static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000610_mm_store1_ps(float *p, __m128 a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000611{
612 a = __builtin_shufflevector(a, a, 0, 0, 0, 0);
613 _mm_storeu_ps(p, a);
614}
615
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000616static __inline__ void __attribute__((__always_inline__, __nodebug__))
Chandler Carruthfa38c812010-07-22 06:47:28 +0000617_mm_store_ps1(float *p, __m128 a)
618{
619 return _mm_store1_ps(p, a);
620}
621
622static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000623_mm_store_ps(float *p, __m128 a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000624{
625 *(__m128 *)p = a;
626}
627
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000628static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000629_mm_storer_ps(float *p, __m128 a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000630{
631 a = __builtin_shufflevector(a, a, 3, 2, 1, 0);
632 _mm_store_ps(p, a);
633}
634
Anders Carlssondedad4e2008-12-22 03:50:21 +0000635#define _MM_HINT_T0 1
636#define _MM_HINT_T1 2
637#define _MM_HINT_T2 3
638#define _MM_HINT_NTA 0
639
Nick Lewyckyc2b9b362010-05-30 18:26:21 +0000640/* FIXME: We have to #define this because "sel" must be a constant integer, and
Anders Carlsson62af71c2008-12-22 04:55:36 +0000641 Sema doesn't do any form of constant propagation yet. */
Anders Carlssondedad4e2008-12-22 03:50:21 +0000642
Nick Lewyckyc2b9b362010-05-30 18:26:21 +0000643#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), 0, sel))
Anders Carlssondedad4e2008-12-22 03:50:21 +0000644
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000645static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000646_mm_stream_pi(__m64 *p, __m64 a)
Anders Carlssondedad4e2008-12-22 03:50:21 +0000647{
648 __builtin_ia32_movntq(p, a);
649}
650
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000651static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000652_mm_stream_ps(float *p, __m128 a)
Anders Carlssondedad4e2008-12-22 03:50:21 +0000653{
654 __builtin_ia32_movntps(p, a);
655}
656
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000657static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000658_mm_sfence(void)
Anders Carlssondedad4e2008-12-22 03:50:21 +0000659{
660 __builtin_ia32_sfence();
661}
662
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000663static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000664_mm_extract_pi16(__m64 a, int n)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000665{
Anders Carlsson62af71c2008-12-22 04:55:36 +0000666 __v4hi b = (__v4hi)a;
Eli Friedman80c80042009-06-06 02:13:04 +0000667 return (unsigned short)b[n & 3];
Anders Carlsson62af71c2008-12-22 04:55:36 +0000668}
669
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000670static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000671_mm_insert_pi16(__m64 a, int d, int n)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000672{
Eli Friedman80c80042009-06-06 02:13:04 +0000673 __v4hi b = (__v4hi)a;
674 b[n & 3] = d;
Eli Friedman17d2e3a2009-06-06 03:45:06 +0000675 return (__m64)b;
Anders Carlsson62af71c2008-12-22 04:55:36 +0000676}
677
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000678static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000679_mm_max_pi16(__m64 a, __m64 b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000680{
681 return (__m64)__builtin_ia32_pmaxsw((__v4hi)a, (__v4hi)b);
682}
683
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000684static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000685_mm_max_pu8(__m64 a, __m64 b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000686{
687 return (__m64)__builtin_ia32_pmaxub((__v8qi)a, (__v8qi)b);
688}
689
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000690static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000691_mm_min_pi16(__m64 a, __m64 b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000692{
693 return (__m64)__builtin_ia32_pminsw((__v4hi)a, (__v4hi)b);
694}
695
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000696static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000697_mm_min_pu8(__m64 a, __m64 b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000698{
699 return (__m64)__builtin_ia32_pminub((__v8qi)a, (__v8qi)b);
700}
701
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000702static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000703_mm_movemask_pi8(__m64 a)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000704{
705 return __builtin_ia32_pmovmskb((__v8qi)a);
706}
707
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000708static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000709_mm_mulhi_pu16(__m64 a, __m64 b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000710{
711 return (__m64)__builtin_ia32_pmulhuw((__v4hi)a, (__v4hi)b);
712}
713
Eli Friedman098136a2009-06-06 08:08:06 +0000714#define _mm_shuffle_pi16(a, n) \
715 ((__m64)__builtin_shufflevector((__v4hi)(a), (__v4hi) {0}, \
716 (n) & 0x3, ((n) & 0xc) >> 2, \
717 ((n) & 0x30) >> 4, ((n) & 0xc0) >> 6))
Anders Carlsson62af71c2008-12-22 04:55:36 +0000718
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000719static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000720_mm_maskmove_si64(__m64 d, __m64 n, char *p)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000721{
722 __builtin_ia32_maskmovq((__v8qi)d, (__v8qi)n, p);
723}
724
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000725static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000726_mm_avg_pu8(__m64 a, __m64 b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000727{
728 return (__m64)__builtin_ia32_pavgb((__v8qi)a, (__v8qi)b);
729}
730
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000731static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000732_mm_avg_pu16(__m64 a, __m64 b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000733{
734 return (__m64)__builtin_ia32_pavgw((__v4hi)a, (__v4hi)b);
735}
736
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000737static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000738_mm_sad_pu8(__m64 a, __m64 b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000739{
740 return (__m64)__builtin_ia32_psadbw((__v8qi)a, (__v8qi)b);
741}
Anders Carlssonc1f9afd2008-12-22 05:00:07 +0000742
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000743static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000744_mm_getcsr(void)
Anders Carlssonc1f9afd2008-12-22 05:00:07 +0000745{
746 return __builtin_ia32_stmxcsr();
747}
748
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000749static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000750_mm_setcsr(unsigned int i)
Anders Carlssonc1f9afd2008-12-22 05:00:07 +0000751{
752 __builtin_ia32_ldmxcsr(i);
753}
754
Eli Friedman098136a2009-06-06 08:08:06 +0000755#define _mm_shuffle_ps(a, b, mask) \
Daniel Dunbar79e5ab72010-06-02 16:35:01 +0000756 (__builtin_shufflevector((__v4sf)(a), (__v4sf)(b), \
Chris Lattnerf805a6c2010-05-15 05:53:53 +0000757 (mask) & 0x3, ((mask) & 0xc) >> 2, \
Eli Friedman098136a2009-06-06 08:08:06 +0000758 (((mask) & 0x30) >> 4) + 4, \
759 (((mask) & 0xc0) >> 6) + 4))
Anders Carlsson50099cb2008-12-22 05:20:34 +0000760
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000761static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000762_mm_unpackhi_ps(__m128 a, __m128 b)
Anders Carlsson50099cb2008-12-22 05:20:34 +0000763{
764 return __builtin_shufflevector(a, b, 2, 6, 3, 7);
765}
766
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000767static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000768_mm_unpacklo_ps(__m128 a, __m128 b)
Anders Carlsson50099cb2008-12-22 05:20:34 +0000769{
770 return __builtin_shufflevector(a, b, 0, 4, 1, 5);
771}
772
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000773static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000774_mm_move_ss(__m128 a, __m128 b)
Anders Carlsson50099cb2008-12-22 05:20:34 +0000775{
776 return __builtin_shufflevector(a, b, 4, 1, 2, 3);
777}
778
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000779static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000780_mm_movehl_ps(__m128 a, __m128 b)
Anders Carlsson50099cb2008-12-22 05:20:34 +0000781{
782 return __builtin_shufflevector(a, b, 6, 7, 2, 3);
783}
784
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000785static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000786_mm_movelh_ps(__m128 a, __m128 b)
Anders Carlsson50099cb2008-12-22 05:20:34 +0000787{
788 return __builtin_shufflevector(a, b, 0, 1, 4, 5);
789}
790
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000791static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000792_mm_cvtpi16_ps(__m64 a)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000793{
794 __m64 b, c;
795 __m128 r;
796
797 b = _mm_setzero_si64();
798 b = _mm_cmpgt_pi16(b, a);
799 c = _mm_unpackhi_pi16(a, b);
800 r = _mm_setzero_ps();
801 r = _mm_cvtpi32_ps(r, c);
802 r = _mm_movelh_ps(r, r);
803 c = _mm_unpacklo_pi16(a, b);
804 r = _mm_cvtpi32_ps(r, c);
805
806 return r;
807}
808
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000809static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000810_mm_cvtpu16_ps(__m64 a)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000811{
812 __m64 b, c;
813 __m128 r;
814
815 b = _mm_setzero_si64();
816 c = _mm_unpackhi_pi16(a, b);
817 r = _mm_setzero_ps();
818 r = _mm_cvtpi32_ps(r, c);
819 r = _mm_movelh_ps(r, r);
820 c = _mm_unpacklo_pi16(a, b);
821 r = _mm_cvtpi32_ps(r, c);
822
823 return r;
824}
825
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000826static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000827_mm_cvtpi8_ps(__m64 a)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000828{
829 __m64 b;
830
831 b = _mm_setzero_si64();
832 b = _mm_cmpgt_pi8(b, a);
833 b = _mm_unpacklo_pi8(a, b);
834
835 return _mm_cvtpi16_ps(b);
836}
837
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000838static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000839_mm_cvtpu8_ps(__m64 a)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000840{
841 __m64 b;
842
843 b = _mm_setzero_si64();
844 b = _mm_unpacklo_pi8(a, b);
845
846 return _mm_cvtpi16_ps(b);
847}
848
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000849static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000850_mm_cvtpi32x2_ps(__m64 a, __m64 b)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000851{
852 __m128 c;
853
854 c = _mm_setzero_ps();
855 c = _mm_cvtpi32_ps(c, b);
856 c = _mm_movelh_ps(c, c);
857
858 return _mm_cvtpi32_ps(c, a);
859}
860
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000861static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000862_mm_cvtps_pi16(__m128 a)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000863{
864 __m64 b, c;
865
866 b = _mm_cvtps_pi32(a);
867 a = _mm_movehl_ps(a, a);
868 c = _mm_cvtps_pi32(a);
869
870 return _mm_packs_pi16(b, c);
871}
872
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000873static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000874_mm_cvtps_pi8(__m128 a)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000875{
876 __m64 b, c;
877
878 b = _mm_cvtps_pi16(a);
879 c = _mm_setzero_si64();
880
881 return _mm_packs_pi16(b, c);
882}
883
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000884static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000885_mm_movemask_ps(__m128 a)
Anders Carlsson50099cb2008-12-22 05:20:34 +0000886{
887 return __builtin_ia32_movmskps(a);
888}
889
Anders Carlssonb5955092008-12-22 05:42:03 +0000890#define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))
891
Anders Carlsson4cc44272009-02-11 06:29:32 +0000892#define _MM_EXCEPT_INVALID (0x0001)
893#define _MM_EXCEPT_DENORM (0x0002)
894#define _MM_EXCEPT_DIV_ZERO (0x0004)
895#define _MM_EXCEPT_OVERFLOW (0x0008)
896#define _MM_EXCEPT_UNDERFLOW (0x0010)
897#define _MM_EXCEPT_INEXACT (0x0020)
Anders Carlssonbbd1fa22009-01-21 01:49:39 +0000898#define _MM_EXCEPT_MASK (0x003f)
Anders Carlsson4cc44272009-02-11 06:29:32 +0000899
Anders Carlssonbbd1fa22009-01-21 01:49:39 +0000900#define _MM_MASK_INVALID (0x0080)
901#define _MM_MASK_DENORM (0x0100)
Anders Carlssona837a072009-02-14 04:01:38 +0000902#define _MM_MASK_DIV_ZERO (0x0200)
903#define _MM_MASK_OVERFLOW (0x0400)
904#define _MM_MASK_UNDERFLOW (0x0800)
905#define _MM_MASK_INEXACT (0x1000)
Anders Carlssonbbd1fa22009-01-21 01:49:39 +0000906#define _MM_MASK_MASK (0x1f80)
907
Anders Carlssonbbd1fa22009-01-21 01:49:39 +0000908#define _MM_ROUND_NEAREST (0x0000)
909#define _MM_ROUND_DOWN (0x2000)
910#define _MM_ROUND_UP (0x4000)
911#define _MM_ROUND_TOWARD_ZERO (0x6000)
Anders Carlsson4cc44272009-02-11 06:29:32 +0000912#define _MM_ROUND_MASK (0x6000)
Anders Carlssonbbd1fa22009-01-21 01:49:39 +0000913
914#define _MM_FLUSH_ZERO_MASK (0x8000)
915#define _MM_FLUSH_ZERO_ON (0x8000)
916#define _MM_FLUSH_ZERO_OFF (0x8000)
Anders Carlssonb5955092008-12-22 05:42:03 +0000917
918#define _MM_GET_EXCEPTION_MASK() (_mm_getcsr() & _MM_MASK_MASK)
919#define _MM_GET_EXCEPTION_STATE() (_mm_getcsr() & _MM_EXCEPT_MASK)
Anders Carlsson62005c12009-01-20 21:51:44 +0000920#define _MM_GET_FLUSH_ZERO_MODE() (_mm_getcsr() & _MM_FLUSH_ZERO_MASK)
Anders Carlssonb5955092008-12-22 05:42:03 +0000921#define _MM_GET_ROUNDING_MODE() (_mm_getcsr() & _MM_ROUND_MASK)
922
Anders Carlsson62005c12009-01-20 21:51:44 +0000923#define _MM_SET_EXCEPTION_MASK(x) (_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (x)))
Anders Carlssonb5955092008-12-22 05:42:03 +0000924#define _MM_SET_EXCEPTION_STATE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (x)))
Anders Carlsson62005c12009-01-20 21:51:44 +0000925#define _MM_SET_FLUSH_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (x)))
Anders Carlssonb5955092008-12-22 05:42:03 +0000926#define _MM_SET_ROUNDING_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (x)))
927
928#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \
929do { \
930 __m128 tmp3, tmp2, tmp1, tmp0; \
931 tmp0 = _mm_unpacklo_ps((row0), (row1)); \
932 tmp2 = _mm_unpacklo_ps((row2), (row3)); \
933 tmp1 = _mm_unpackhi_ps((row0), (row1)); \
934 tmp3 = _mm_unpackhi_ps((row2), (row3)); \
935 (row0) = _mm_movelh_ps(tmp0, tmp2); \
936 (row1) = _mm_movehl_ps(tmp2, tmp0); \
937 (row2) = _mm_movelh_ps(tmp1, tmp3); \
Chris Lattner9afb2272010-01-27 07:54:50 +0000938 (row3) = _mm_movehl_ps(tmp3, tmp1); \
Anders Carlssonb5955092008-12-22 05:42:03 +0000939} while (0)
940
Chandler Carruthfa38c812010-07-22 06:47:28 +0000941/* Aliases for compatibility. */
942#define _m_pextrw _mm_extract_pi16
943#define _m_pinsrw _mm_insert_pi16
944#define _m_pmaxsw _mm_max_pi16
945#define _m_pmaxub _mm_max_pu8
946#define _m_pminsw _mm_min_pi16
947#define _m_pminub _mm_min_pu8
948#define _m_pmovmskb _mm_movemask_pi8
949#define _m_pmulhuw _mm_mulhi_pu16
950#define _m_pshufw _mm_shuffle_pi16
951#define _m_maskmovq _mm_maskmove_si64
952#define _m_pavgb _mm_avg_pu8
953#define _m_pavgw _mm_avg_pu16
954#define _m_psadbw _mm_sad_pu8
955#define _m_ _mm_
956#define _m_ _mm_
957
Eli Friedmanc7d95dc2009-06-11 18:50:02 +0000958/* Ugly hack for backwards-compatibility (compatible with gcc) */
959#ifdef __SSE2__
Daniel Dunbar3eef3e12009-06-07 08:33:23 +0000960#include <emmintrin.h>
Eli Friedmanc7d95dc2009-06-11 18:50:02 +0000961#endif
Daniel Dunbar3eef3e12009-06-07 08:33:23 +0000962
Anders Carlsson566d8da2008-12-22 00:01:20 +0000963#endif /* __SSE__ */
964
965#endif /* __XMMINTRIN_H */