blob: b3b23cb7d0d4d6efc676c3296fa8f1ed5c45e4af [file] [log] [blame]
Anders Carlsson566d8da2008-12-22 00:01:20 +00001/*===---- xmmintrin.h - SSE intrinsics -------------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __XMMINTRIN_H
25#define __XMMINTRIN_H
26
27#ifndef __SSE__
Anders Carlsson4fd3e632008-12-26 00:57:11 +000028#error "SSE instruction set not enabled"
Anders Carlsson566d8da2008-12-22 00:01:20 +000029#else
30
Anders Carlsson4fcc3132008-12-22 00:48:30 +000031#include <mmintrin.h>
32
Eric Christopher020f1ed2010-03-20 01:08:47 +000033typedef int __v4si __attribute__((__vector_size__(16)));
Anders Carlsson398082e2008-12-22 17:42:23 +000034typedef float __v4sf __attribute__((__vector_size__(16)));
Anders Carlsson566d8da2008-12-22 00:01:20 +000035typedef float __m128 __attribute__((__vector_size__(16)));
36
Chandler Carruth7acb9532010-11-22 08:06:31 +000037// This header should only be included in a hosted environment as it depends on
38// a standard library to provide allocation routines.
39#if __STDC_HOSTED__
Anders Carlsson398082e2008-12-22 17:42:23 +000040#include <mm_malloc.h>
Chandler Carruth7acb9532010-11-22 08:06:31 +000041#endif
Anders Carlsson398082e2008-12-22 17:42:23 +000042
Chris Lattner1bddbcb2010-03-22 18:14:12 +000043static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +000044_mm_add_ss(__m128 __a, __m128 __b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000045{
David Blaikie4f918ae2013-01-16 23:08:36 +000046 __a[0] += __b[0];
47 return __a;
Anders Carlsson566d8da2008-12-22 00:01:20 +000048}
49
Chris Lattner1bddbcb2010-03-22 18:14:12 +000050static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +000051_mm_add_ps(__m128 __a, __m128 __b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000052{
David Blaikie4f918ae2013-01-16 23:08:36 +000053 return __a + __b;
Anders Carlsson566d8da2008-12-22 00:01:20 +000054}
55
Chris Lattner1bddbcb2010-03-22 18:14:12 +000056static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +000057_mm_sub_ss(__m128 __a, __m128 __b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000058{
David Blaikie4f918ae2013-01-16 23:08:36 +000059 __a[0] -= __b[0];
60 return __a;
Anders Carlsson566d8da2008-12-22 00:01:20 +000061}
62
Chris Lattner1bddbcb2010-03-22 18:14:12 +000063static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +000064_mm_sub_ps(__m128 __a, __m128 __b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000065{
David Blaikie4f918ae2013-01-16 23:08:36 +000066 return __a - __b;
Anders Carlsson566d8da2008-12-22 00:01:20 +000067}
68
Chris Lattner1bddbcb2010-03-22 18:14:12 +000069static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +000070_mm_mul_ss(__m128 __a, __m128 __b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000071{
David Blaikie4f918ae2013-01-16 23:08:36 +000072 __a[0] *= __b[0];
73 return __a;
Anders Carlsson566d8da2008-12-22 00:01:20 +000074}
75
Chris Lattner1bddbcb2010-03-22 18:14:12 +000076static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +000077_mm_mul_ps(__m128 __a, __m128 __b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000078{
David Blaikie4f918ae2013-01-16 23:08:36 +000079 return __a * __b;
Anders Carlsson566d8da2008-12-22 00:01:20 +000080}
81
Chris Lattner1bddbcb2010-03-22 18:14:12 +000082static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +000083_mm_div_ss(__m128 __a, __m128 __b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000084{
David Blaikie4f918ae2013-01-16 23:08:36 +000085 __a[0] /= __b[0];
86 return __a;
Anders Carlsson566d8da2008-12-22 00:01:20 +000087}
88
Chris Lattner1bddbcb2010-03-22 18:14:12 +000089static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +000090_mm_div_ps(__m128 __a, __m128 __b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000091{
David Blaikie4f918ae2013-01-16 23:08:36 +000092 return __a / __b;
Anders Carlsson566d8da2008-12-22 00:01:20 +000093}
94
Chris Lattner1bddbcb2010-03-22 18:14:12 +000095static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +000096_mm_sqrt_ss(__m128 __a)
Anders Carlsson566d8da2008-12-22 00:01:20 +000097{
David Blaikie4f918ae2013-01-16 23:08:36 +000098 __m128 __c = __builtin_ia32_sqrtss(__a);
99 return (__m128) { __c[0], __a[1], __a[2], __a[3] };
Anders Carlsson566d8da2008-12-22 00:01:20 +0000100}
101
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000102static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000103_mm_sqrt_ps(__m128 __a)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000104{
David Blaikie4f918ae2013-01-16 23:08:36 +0000105 return __builtin_ia32_sqrtps(__a);
Anders Carlsson566d8da2008-12-22 00:01:20 +0000106}
107
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000108static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000109_mm_rcp_ss(__m128 __a)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000110{
David Blaikie4f918ae2013-01-16 23:08:36 +0000111 __m128 __c = __builtin_ia32_rcpss(__a);
112 return (__m128) { __c[0], __a[1], __a[2], __a[3] };
Anders Carlsson566d8da2008-12-22 00:01:20 +0000113}
114
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000115static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000116_mm_rcp_ps(__m128 __a)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000117{
David Blaikie4f918ae2013-01-16 23:08:36 +0000118 return __builtin_ia32_rcpps(__a);
Anders Carlsson566d8da2008-12-22 00:01:20 +0000119}
120
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000121static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000122_mm_rsqrt_ss(__m128 __a)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000123{
David Blaikie4f918ae2013-01-16 23:08:36 +0000124 __m128 __c = __builtin_ia32_rsqrtss(__a);
125 return (__m128) { __c[0], __a[1], __a[2], __a[3] };
Anders Carlsson566d8da2008-12-22 00:01:20 +0000126}
127
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000128static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000129_mm_rsqrt_ps(__m128 __a)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000130{
David Blaikie4f918ae2013-01-16 23:08:36 +0000131 return __builtin_ia32_rsqrtps(__a);
Anders Carlsson566d8da2008-12-22 00:01:20 +0000132}
133
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000134static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000135_mm_min_ss(__m128 __a, __m128 __b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000136{
David Blaikie4f918ae2013-01-16 23:08:36 +0000137 return __builtin_ia32_minss(__a, __b);
Anders Carlsson566d8da2008-12-22 00:01:20 +0000138}
139
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000140static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000141_mm_min_ps(__m128 __a, __m128 __b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000142{
David Blaikie4f918ae2013-01-16 23:08:36 +0000143 return __builtin_ia32_minps(__a, __b);
Anders Carlsson566d8da2008-12-22 00:01:20 +0000144}
145
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000146static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000147_mm_max_ss(__m128 __a, __m128 __b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000148{
David Blaikie4f918ae2013-01-16 23:08:36 +0000149 return __builtin_ia32_maxss(__a, __b);
Anders Carlsson566d8da2008-12-22 00:01:20 +0000150}
151
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000152static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000153_mm_max_ps(__m128 __a, __m128 __b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000154{
David Blaikie4f918ae2013-01-16 23:08:36 +0000155 return __builtin_ia32_maxps(__a, __b);
Anders Carlsson566d8da2008-12-22 00:01:20 +0000156}
157
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000158static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000159_mm_and_ps(__m128 __a, __m128 __b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000160{
David Blaikie4f918ae2013-01-16 23:08:36 +0000161 return (__m128)((__v4si)__a & (__v4si)__b);
Anders Carlsson566d8da2008-12-22 00:01:20 +0000162}
163
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000164static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000165_mm_andnot_ps(__m128 __a, __m128 __b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000166{
David Blaikie4f918ae2013-01-16 23:08:36 +0000167 return (__m128)(~(__v4si)__a & (__v4si)__b);
Anders Carlsson566d8da2008-12-22 00:01:20 +0000168}
169
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000170static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000171_mm_or_ps(__m128 __a, __m128 __b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000172{
David Blaikie4f918ae2013-01-16 23:08:36 +0000173 return (__m128)((__v4si)__a | (__v4si)__b);
Anders Carlsson566d8da2008-12-22 00:01:20 +0000174}
175
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000176static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000177_mm_xor_ps(__m128 __a, __m128 __b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000178{
David Blaikie4f918ae2013-01-16 23:08:36 +0000179 return (__m128)((__v4si)__a ^ (__v4si)__b);
Anders Carlsson566d8da2008-12-22 00:01:20 +0000180}
181
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000182static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000183_mm_cmpeq_ss(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000184{
David Blaikie4f918ae2013-01-16 23:08:36 +0000185 return (__m128)__builtin_ia32_cmpss(__a, __b, 0);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000186}
187
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000188static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000189_mm_cmpeq_ps(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000190{
David Blaikie4f918ae2013-01-16 23:08:36 +0000191 return (__m128)__builtin_ia32_cmpps(__a, __b, 0);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000192}
193
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000194static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000195_mm_cmplt_ss(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000196{
David Blaikie4f918ae2013-01-16 23:08:36 +0000197 return (__m128)__builtin_ia32_cmpss(__a, __b, 1);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000198}
199
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000200static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000201_mm_cmplt_ps(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000202{
David Blaikie4f918ae2013-01-16 23:08:36 +0000203 return (__m128)__builtin_ia32_cmpps(__a, __b, 1);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000204}
205
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000206static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000207_mm_cmple_ss(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000208{
David Blaikie4f918ae2013-01-16 23:08:36 +0000209 return (__m128)__builtin_ia32_cmpss(__a, __b, 2);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000210}
211
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000212static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000213_mm_cmple_ps(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000214{
David Blaikie4f918ae2013-01-16 23:08:36 +0000215 return (__m128)__builtin_ia32_cmpps(__a, __b, 2);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000216}
217
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000218static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000219_mm_cmpgt_ss(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000220{
David Blaikie4f918ae2013-01-16 23:08:36 +0000221 return (__m128)__builtin_ia32_cmpss(__b, __a, 1);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000222}
223
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000224static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000225_mm_cmpgt_ps(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000226{
David Blaikie4f918ae2013-01-16 23:08:36 +0000227 return (__m128)__builtin_ia32_cmpps(__b, __a, 1);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000228}
229
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000230static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000231_mm_cmpge_ss(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000232{
David Blaikie4f918ae2013-01-16 23:08:36 +0000233 return (__m128)__builtin_ia32_cmpss(__b, __a, 2);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000234}
235
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000236static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000237_mm_cmpge_ps(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000238{
David Blaikie4f918ae2013-01-16 23:08:36 +0000239 return (__m128)__builtin_ia32_cmpps(__b, __a, 2);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000240}
241
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000242static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000243_mm_cmpneq_ss(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000244{
David Blaikie4f918ae2013-01-16 23:08:36 +0000245 return (__m128)__builtin_ia32_cmpss(__a, __b, 4);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000246}
247
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000248static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000249_mm_cmpneq_ps(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000250{
David Blaikie4f918ae2013-01-16 23:08:36 +0000251 return (__m128)__builtin_ia32_cmpps(__a, __b, 4);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000252}
253
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000254static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000255_mm_cmpnlt_ss(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000256{
David Blaikie4f918ae2013-01-16 23:08:36 +0000257 return (__m128)__builtin_ia32_cmpss(__a, __b, 5);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000258}
259
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000260static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000261_mm_cmpnlt_ps(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000262{
David Blaikie4f918ae2013-01-16 23:08:36 +0000263 return (__m128)__builtin_ia32_cmpps(__a, __b, 5);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000264}
265
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000266static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000267_mm_cmpnle_ss(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000268{
David Blaikie4f918ae2013-01-16 23:08:36 +0000269 return (__m128)__builtin_ia32_cmpss(__a, __b, 6);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000270}
271
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000272static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000273_mm_cmpnle_ps(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000274{
David Blaikie4f918ae2013-01-16 23:08:36 +0000275 return (__m128)__builtin_ia32_cmpps(__a, __b, 6);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000276}
277
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000278static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000279_mm_cmpngt_ss(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000280{
David Blaikie4f918ae2013-01-16 23:08:36 +0000281 return (__m128)__builtin_ia32_cmpss(__b, __a, 5);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000282}
283
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000284static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000285_mm_cmpngt_ps(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000286{
David Blaikie4f918ae2013-01-16 23:08:36 +0000287 return (__m128)__builtin_ia32_cmpps(__b, __a, 5);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000288}
289
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000290static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000291_mm_cmpnge_ss(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000292{
David Blaikie4f918ae2013-01-16 23:08:36 +0000293 return (__m128)__builtin_ia32_cmpss(__b, __a, 6);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000294}
295
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000296static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000297_mm_cmpnge_ps(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000298{
David Blaikie4f918ae2013-01-16 23:08:36 +0000299 return (__m128)__builtin_ia32_cmpps(__b, __a, 6);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000300}
301
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000302static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000303_mm_cmpord_ss(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000304{
David Blaikie4f918ae2013-01-16 23:08:36 +0000305 return (__m128)__builtin_ia32_cmpss(__a, __b, 7);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000306}
307
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000308static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000309_mm_cmpord_ps(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000310{
David Blaikie4f918ae2013-01-16 23:08:36 +0000311 return (__m128)__builtin_ia32_cmpps(__a, __b, 7);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000312}
313
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000314static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000315_mm_cmpunord_ss(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000316{
David Blaikie4f918ae2013-01-16 23:08:36 +0000317 return (__m128)__builtin_ia32_cmpss(__a, __b, 3);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000318}
319
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000320static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000321_mm_cmpunord_ps(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000322{
David Blaikie4f918ae2013-01-16 23:08:36 +0000323 return (__m128)__builtin_ia32_cmpps(__a, __b, 3);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000324}
325
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000326static __inline__ int __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000327_mm_comieq_ss(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000328{
David Blaikie4f918ae2013-01-16 23:08:36 +0000329 return __builtin_ia32_comieq(__a, __b);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000330}
331
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000332static __inline__ int __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000333_mm_comilt_ss(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000334{
David Blaikie4f918ae2013-01-16 23:08:36 +0000335 return __builtin_ia32_comilt(__a, __b);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000336}
337
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000338static __inline__ int __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000339_mm_comile_ss(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000340{
David Blaikie4f918ae2013-01-16 23:08:36 +0000341 return __builtin_ia32_comile(__a, __b);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000342}
343
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000344static __inline__ int __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000345_mm_comigt_ss(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000346{
David Blaikie4f918ae2013-01-16 23:08:36 +0000347 return __builtin_ia32_comigt(__a, __b);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000348}
349
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000350static __inline__ int __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000351_mm_comige_ss(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000352{
David Blaikie4f918ae2013-01-16 23:08:36 +0000353 return __builtin_ia32_comige(__a, __b);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000354}
355
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000356static __inline__ int __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000357_mm_comineq_ss(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000358{
David Blaikie4f918ae2013-01-16 23:08:36 +0000359 return __builtin_ia32_comineq(__a, __b);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000360}
361
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000362static __inline__ int __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000363_mm_ucomieq_ss(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000364{
David Blaikie4f918ae2013-01-16 23:08:36 +0000365 return __builtin_ia32_ucomieq(__a, __b);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000366}
367
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000368static __inline__ int __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000369_mm_ucomilt_ss(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000370{
David Blaikie4f918ae2013-01-16 23:08:36 +0000371 return __builtin_ia32_ucomilt(__a, __b);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000372}
373
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000374static __inline__ int __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000375_mm_ucomile_ss(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000376{
David Blaikie4f918ae2013-01-16 23:08:36 +0000377 return __builtin_ia32_ucomile(__a, __b);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000378}
379
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000380static __inline__ int __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000381_mm_ucomigt_ss(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000382{
David Blaikie4f918ae2013-01-16 23:08:36 +0000383 return __builtin_ia32_ucomigt(__a, __b);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000384}
385
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000386static __inline__ int __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000387_mm_ucomige_ss(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000388{
David Blaikie4f918ae2013-01-16 23:08:36 +0000389 return __builtin_ia32_ucomige(__a, __b);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000390}
391
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000392static __inline__ int __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000393_mm_ucomineq_ss(__m128 __a, __m128 __b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000394{
David Blaikie4f918ae2013-01-16 23:08:36 +0000395 return __builtin_ia32_ucomineq(__a, __b);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000396}
397
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000398static __inline__ int __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000399_mm_cvtss_si32(__m128 __a)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000400{
David Blaikie4f918ae2013-01-16 23:08:36 +0000401 return __builtin_ia32_cvtss2si(__a);
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000402}
403
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000404static __inline__ int __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000405_mm_cvt_ss2si(__m128 __a)
Chris Lattneref5ebf62010-02-16 18:21:25 +0000406{
David Blaikie4f918ae2013-01-16 23:08:36 +0000407 return _mm_cvtss_si32(__a);
Chris Lattneref5ebf62010-02-16 18:21:25 +0000408}
409
Eli Friedman80c80042009-06-06 02:13:04 +0000410#ifdef __x86_64__
411
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000412static __inline__ long long __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000413_mm_cvtss_si64(__m128 __a)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000414{
David Blaikie4f918ae2013-01-16 23:08:36 +0000415 return __builtin_ia32_cvtss2si64(__a);
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000416}
417
Eli Friedman80c80042009-06-06 02:13:04 +0000418#endif
419
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000420static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000421_mm_cvtps_pi32(__m128 __a)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000422{
David Blaikie4f918ae2013-01-16 23:08:36 +0000423 return (__m64)__builtin_ia32_cvtps2pi(__a);
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000424}
425
Chandler Carruthfa38c812010-07-22 06:47:28 +0000426static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000427_mm_cvt_ps2pi(__m128 __a)
Chandler Carruthfa38c812010-07-22 06:47:28 +0000428{
David Blaikie4f918ae2013-01-16 23:08:36 +0000429 return _mm_cvtps_pi32(__a);
Chandler Carruthfa38c812010-07-22 06:47:28 +0000430}
431
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000432static __inline__ int __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000433_mm_cvttss_si32(__m128 __a)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000434{
David Blaikie4f918ae2013-01-16 23:08:36 +0000435 return __a[0];
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000436}
437
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000438static __inline__ int __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000439_mm_cvtt_ss2si(__m128 __a)
Chris Lattneref5ebf62010-02-16 18:21:25 +0000440{
David Blaikie4f918ae2013-01-16 23:08:36 +0000441 return _mm_cvttss_si32(__a);
Chris Lattneref5ebf62010-02-16 18:21:25 +0000442}
443
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000444static __inline__ long long __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000445_mm_cvttss_si64(__m128 __a)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000446{
David Blaikie4f918ae2013-01-16 23:08:36 +0000447 return __a[0];
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000448}
449
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000450static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000451_mm_cvttps_pi32(__m128 __a)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000452{
David Blaikie4f918ae2013-01-16 23:08:36 +0000453 return (__m64)__builtin_ia32_cvttps2pi(__a);
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000454}
455
Chandler Carruthfa38c812010-07-22 06:47:28 +0000456static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000457_mm_cvtt_ps2pi(__m128 __a)
Chandler Carruthfa38c812010-07-22 06:47:28 +0000458{
David Blaikie4f918ae2013-01-16 23:08:36 +0000459 return _mm_cvttps_pi32(__a);
Chandler Carruthfa38c812010-07-22 06:47:28 +0000460}
461
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000462static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000463_mm_cvtsi32_ss(__m128 __a, int __b)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000464{
David Blaikie4f918ae2013-01-16 23:08:36 +0000465 __a[0] = __b;
466 return __a;
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000467}
468
Chandler Carruthfa38c812010-07-22 06:47:28 +0000469static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000470_mm_cvt_si2ss(__m128 __a, int __b)
Chandler Carruthfa38c812010-07-22 06:47:28 +0000471{
David Blaikie4f918ae2013-01-16 23:08:36 +0000472 return _mm_cvtsi32_ss(__a, __b);
Chandler Carruthfa38c812010-07-22 06:47:28 +0000473}
474
Anders Carlsson1b76b802008-12-22 01:26:50 +0000475#ifdef __x86_64__
476
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000477static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000478_mm_cvtsi64_ss(__m128 __a, long long __b)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000479{
David Blaikie4f918ae2013-01-16 23:08:36 +0000480 __a[0] = __b;
481 return __a;
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000482}
483
Anders Carlsson1b76b802008-12-22 01:26:50 +0000484#endif
485
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000486static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000487_mm_cvtpi32_ps(__m128 __a, __m64 __b)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000488{
David Blaikie4f918ae2013-01-16 23:08:36 +0000489 return __builtin_ia32_cvtpi2ps(__a, (__v2si)__b);
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000490}
491
Chandler Carruthfa38c812010-07-22 06:47:28 +0000492static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000493_mm_cvt_pi2ps(__m128 __a, __m64 __b)
Chandler Carruthfa38c812010-07-22 06:47:28 +0000494{
David Blaikie4f918ae2013-01-16 23:08:36 +0000495 return _mm_cvtpi32_ps(__a, __b);
Chandler Carruthfa38c812010-07-22 06:47:28 +0000496}
497
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000498static __inline__ float __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000499_mm_cvtss_f32(__m128 __a)
Anders Carlsson1b76b802008-12-22 01:26:50 +0000500{
David Blaikie4f918ae2013-01-16 23:08:36 +0000501 return __a[0];
Anders Carlsson1b76b802008-12-22 01:26:50 +0000502}
503
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000504static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000505_mm_loadh_pi(__m128 __a, const __m64 *__p)
Anders Carlsson97700862008-12-22 02:43:30 +0000506{
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000507 typedef float __mm_loadh_pi_v2f32 __attribute__((__vector_size__(8)));
508 struct __mm_loadh_pi_struct {
David Blaikie4f918ae2013-01-16 23:08:36 +0000509 __mm_loadh_pi_v2f32 __u;
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000510 } __attribute__((__packed__, __may_alias__));
David Blaikie4f918ae2013-01-16 23:08:36 +0000511 __mm_loadh_pi_v2f32 __b = ((struct __mm_loadh_pi_struct*)__p)->__u;
512 __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1);
513 return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5);
Anders Carlsson97700862008-12-22 02:43:30 +0000514}
515
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000516static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000517_mm_loadl_pi(__m128 __a, const __m64 *__p)
Anders Carlsson97700862008-12-22 02:43:30 +0000518{
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000519 typedef float __mm_loadl_pi_v2f32 __attribute__((__vector_size__(8)));
520 struct __mm_loadl_pi_struct {
David Blaikie4f918ae2013-01-16 23:08:36 +0000521 __mm_loadl_pi_v2f32 __u;
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000522 } __attribute__((__packed__, __may_alias__));
David Blaikie4f918ae2013-01-16 23:08:36 +0000523 __mm_loadl_pi_v2f32 __b = ((struct __mm_loadl_pi_struct*)__p)->__u;
524 __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1);
525 return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3);
Anders Carlsson97700862008-12-22 02:43:30 +0000526}
527
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000528static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000529_mm_load_ss(const float *__p)
Anders Carlsson97700862008-12-22 02:43:30 +0000530{
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000531 struct __mm_load_ss_struct {
David Blaikie4f918ae2013-01-16 23:08:36 +0000532 float __u;
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000533 } __attribute__((__packed__, __may_alias__));
David Blaikie4f918ae2013-01-16 23:08:36 +0000534 float __u = ((struct __mm_load_ss_struct*)__p)->__u;
535 return (__m128){ __u, 0, 0, 0 };
Anders Carlsson97700862008-12-22 02:43:30 +0000536}
537
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000538static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000539_mm_load1_ps(const float *__p)
Anders Carlsson97700862008-12-22 02:43:30 +0000540{
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000541 struct __mm_load1_ps_struct {
David Blaikie4f918ae2013-01-16 23:08:36 +0000542 float __u;
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000543 } __attribute__((__packed__, __may_alias__));
David Blaikie4f918ae2013-01-16 23:08:36 +0000544 float __u = ((struct __mm_load1_ps_struct*)__p)->__u;
545 return (__m128){ __u, __u, __u, __u };
Anders Carlsson97700862008-12-22 02:43:30 +0000546}
547
Eli Friedmandb7351a2009-06-02 05:55:48 +0000548#define _mm_load_ps1(p) _mm_load1_ps(p)
549
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000550static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000551_mm_load_ps(const float *__p)
Anders Carlsson97700862008-12-22 02:43:30 +0000552{
David Blaikie4f918ae2013-01-16 23:08:36 +0000553 return *(__m128*)__p;
Anders Carlsson97700862008-12-22 02:43:30 +0000554}
555
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000556static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000557_mm_loadu_ps(const float *__p)
Anders Carlsson97700862008-12-22 02:43:30 +0000558{
Bill Wendlingeed92a12011-05-13 00:11:39 +0000559 struct __loadu_ps {
David Blaikie4f918ae2013-01-16 23:08:36 +0000560 __m128 __v;
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000561 } __attribute__((__packed__, __may_alias__));
David Blaikie4f918ae2013-01-16 23:08:36 +0000562 return ((struct __loadu_ps*)__p)->__v;
Anders Carlsson97700862008-12-22 02:43:30 +0000563}
564
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000565static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000566_mm_loadr_ps(const float *__p)
Anders Carlsson97700862008-12-22 02:43:30 +0000567{
David Blaikie4f918ae2013-01-16 23:08:36 +0000568 __m128 __a = _mm_load_ps(__p);
569 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0);
Anders Carlsson97700862008-12-22 02:43:30 +0000570}
571
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000572static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000573_mm_set_ss(float __w)
Anders Carlssona6ba0012008-12-22 02:51:35 +0000574{
David Blaikie4f918ae2013-01-16 23:08:36 +0000575 return (__m128){ __w, 0, 0, 0 };
Anders Carlssona6ba0012008-12-22 02:51:35 +0000576}
577
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000578static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000579_mm_set1_ps(float __w)
Anders Carlssona6ba0012008-12-22 02:51:35 +0000580{
David Blaikie4f918ae2013-01-16 23:08:36 +0000581 return (__m128){ __w, __w, __w, __w };
Anders Carlssona6ba0012008-12-22 02:51:35 +0000582}
583
Anders Carlsson12868cc2008-12-27 04:26:15 +0000584// Microsoft specific.
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000585static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000586_mm_set_ps1(float __w)
Anders Carlsson12868cc2008-12-27 04:26:15 +0000587{
David Blaikie4f918ae2013-01-16 23:08:36 +0000588 return _mm_set1_ps(__w);
Anders Carlsson12868cc2008-12-27 04:26:15 +0000589}
590
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000591static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000592_mm_set_ps(float __z, float __y, float __x, float __w)
Anders Carlssona6ba0012008-12-22 02:51:35 +0000593{
David Blaikie4f918ae2013-01-16 23:08:36 +0000594 return (__m128){ __w, __x, __y, __z };
Anders Carlssona6ba0012008-12-22 02:51:35 +0000595}
596
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000597static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000598_mm_setr_ps(float __z, float __y, float __x, float __w)
Anders Carlssona6ba0012008-12-22 02:51:35 +0000599{
David Blaikie4f918ae2013-01-16 23:08:36 +0000600 return (__m128){ __z, __y, __x, __w };
Anders Carlssona6ba0012008-12-22 02:51:35 +0000601}
602
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000603static __inline__ __m128 __attribute__((__always_inline__))
Mike Stumpdae44132009-02-13 14:24:50 +0000604_mm_setzero_ps(void)
Anders Carlssona6ba0012008-12-22 02:51:35 +0000605{
606 return (__m128){ 0, 0, 0, 0 };
607}
608
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000609static __inline__ void __attribute__((__always_inline__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000610_mm_storeh_pi(__m64 *__p, __m128 __a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000611{
David Blaikie4f918ae2013-01-16 23:08:36 +0000612 __builtin_ia32_storehps((__v2si *)__p, __a);
Anders Carlsson09b93052008-12-22 03:16:40 +0000613}
614
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000615static __inline__ void __attribute__((__always_inline__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000616_mm_storel_pi(__m64 *__p, __m128 __a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000617{
David Blaikie4f918ae2013-01-16 23:08:36 +0000618 __builtin_ia32_storelps((__v2si *)__p, __a);
Anders Carlsson09b93052008-12-22 03:16:40 +0000619}
620
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000621static __inline__ void __attribute__((__always_inline__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000622_mm_store_ss(float *__p, __m128 __a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000623{
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000624 struct __mm_store_ss_struct {
David Blaikie4f918ae2013-01-16 23:08:36 +0000625 float __u;
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000626 } __attribute__((__packed__, __may_alias__));
David Blaikie4f918ae2013-01-16 23:08:36 +0000627 ((struct __mm_store_ss_struct*)__p)->__u = __a[0];
Anders Carlsson09b93052008-12-22 03:16:40 +0000628}
629
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000630static __inline__ void __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000631_mm_storeu_ps(float *__p, __m128 __a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000632{
David Blaikie4f918ae2013-01-16 23:08:36 +0000633 __builtin_ia32_storeups(__p, __a);
Anders Carlsson09b93052008-12-22 03:16:40 +0000634}
635
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000636static __inline__ void __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000637_mm_store1_ps(float *__p, __m128 __a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000638{
David Blaikie4f918ae2013-01-16 23:08:36 +0000639 __a = __builtin_shufflevector(__a, __a, 0, 0, 0, 0);
640 _mm_storeu_ps(__p, __a);
Anders Carlsson09b93052008-12-22 03:16:40 +0000641}
642
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000643static __inline__ void __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000644_mm_store_ps1(float *__p, __m128 __a)
Chandler Carruthfa38c812010-07-22 06:47:28 +0000645{
David Blaikie4f918ae2013-01-16 23:08:36 +0000646 return _mm_store1_ps(__p, __a);
Chandler Carruthfa38c812010-07-22 06:47:28 +0000647}
648
649static __inline__ void __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000650_mm_store_ps(float *__p, __m128 __a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000651{
David Blaikie4f918ae2013-01-16 23:08:36 +0000652 *(__m128 *)__p = __a;
Anders Carlsson09b93052008-12-22 03:16:40 +0000653}
654
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000655static __inline__ void __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000656_mm_storer_ps(float *__p, __m128 __a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000657{
David Blaikie4f918ae2013-01-16 23:08:36 +0000658 __a = __builtin_shufflevector(__a, __a, 3, 2, 1, 0);
659 _mm_store_ps(__p, __a);
Anders Carlsson09b93052008-12-22 03:16:40 +0000660}
661
Chris Lattner551f37c2010-08-27 20:10:06 +0000662#define _MM_HINT_T0 3
Anders Carlssondedad4e2008-12-22 03:50:21 +0000663#define _MM_HINT_T1 2
Chris Lattner551f37c2010-08-27 20:10:06 +0000664#define _MM_HINT_T2 1
Anders Carlssondedad4e2008-12-22 03:50:21 +0000665#define _MM_HINT_NTA 0
666
Nick Lewyckyc2b9b362010-05-30 18:26:21 +0000667/* FIXME: We have to #define this because "sel" must be a constant integer, and
Anders Carlsson62af71c2008-12-22 04:55:36 +0000668 Sema doesn't do any form of constant propagation yet. */
Anders Carlssondedad4e2008-12-22 03:50:21 +0000669
Craig Topper34a1da42011-12-24 07:55:14 +0000670#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), 0, (sel)))
Anders Carlssondedad4e2008-12-22 03:50:21 +0000671
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000672static __inline__ void __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000673_mm_stream_pi(__m64 *__p, __m64 __a)
Anders Carlssondedad4e2008-12-22 03:50:21 +0000674{
David Blaikie4f918ae2013-01-16 23:08:36 +0000675 __builtin_ia32_movntq(__p, __a);
Anders Carlssondedad4e2008-12-22 03:50:21 +0000676}
677
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000678static __inline__ void __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000679_mm_stream_ps(float *__p, __m128 __a)
Anders Carlssondedad4e2008-12-22 03:50:21 +0000680{
David Blaikie4f918ae2013-01-16 23:08:36 +0000681 __builtin_ia32_movntps(__p, __a);
Anders Carlssondedad4e2008-12-22 03:50:21 +0000682}
683
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000684static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000685_mm_sfence(void)
Anders Carlssondedad4e2008-12-22 03:50:21 +0000686{
687 __builtin_ia32_sfence();
688}
689
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000690static __inline__ int __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000691_mm_extract_pi16(__m64 __a, int __n)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000692{
David Blaikie4f918ae2013-01-16 23:08:36 +0000693 __v4hi __b = (__v4hi)__a;
694 return (unsigned short)__b[__n & 3];
Anders Carlsson62af71c2008-12-22 04:55:36 +0000695}
696
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000697static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000698_mm_insert_pi16(__m64 __a, int __d, int __n)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000699{
David Blaikie4f918ae2013-01-16 23:08:36 +0000700 __v4hi __b = (__v4hi)__a;
701 __b[__n & 3] = __d;
702 return (__m64)__b;
Anders Carlsson62af71c2008-12-22 04:55:36 +0000703}
704
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000705static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000706_mm_max_pi16(__m64 __a, __m64 __b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000707{
David Blaikie4f918ae2013-01-16 23:08:36 +0000708 return (__m64)__builtin_ia32_pmaxsw((__v4hi)__a, (__v4hi)__b);
Anders Carlsson62af71c2008-12-22 04:55:36 +0000709}
710
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000711static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000712_mm_max_pu8(__m64 __a, __m64 __b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000713{
David Blaikie4f918ae2013-01-16 23:08:36 +0000714 return (__m64)__builtin_ia32_pmaxub((__v8qi)__a, (__v8qi)__b);
Anders Carlsson62af71c2008-12-22 04:55:36 +0000715}
716
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000717static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000718_mm_min_pi16(__m64 __a, __m64 __b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000719{
David Blaikie4f918ae2013-01-16 23:08:36 +0000720 return (__m64)__builtin_ia32_pminsw((__v4hi)__a, (__v4hi)__b);
Anders Carlsson62af71c2008-12-22 04:55:36 +0000721}
722
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000723static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000724_mm_min_pu8(__m64 __a, __m64 __b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000725{
David Blaikie4f918ae2013-01-16 23:08:36 +0000726 return (__m64)__builtin_ia32_pminub((__v8qi)__a, (__v8qi)__b);
Anders Carlsson62af71c2008-12-22 04:55:36 +0000727}
728
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000729static __inline__ int __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000730_mm_movemask_pi8(__m64 __a)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000731{
David Blaikie4f918ae2013-01-16 23:08:36 +0000732 return __builtin_ia32_pmovmskb((__v8qi)__a);
Anders Carlsson62af71c2008-12-22 04:55:36 +0000733}
734
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000735static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000736_mm_mulhi_pu16(__m64 __a, __m64 __b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000737{
David Blaikie4f918ae2013-01-16 23:08:36 +0000738 return (__m64)__builtin_ia32_pmulhuw((__v4hi)__a, (__v4hi)__b);
Anders Carlsson62af71c2008-12-22 04:55:36 +0000739}
740
Bob Wilson32bae372011-11-05 06:08:06 +0000741#define _mm_shuffle_pi16(a, n) __extension__ ({ \
742 __m64 __a = (a); \
743 (__m64)__builtin_ia32_pshufw((__v4hi)__a, (n)); })
Anders Carlsson62af71c2008-12-22 04:55:36 +0000744
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000745static __inline__ void __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000746_mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000747{
David Blaikie4f918ae2013-01-16 23:08:36 +0000748 __builtin_ia32_maskmovq((__v8qi)__d, (__v8qi)__n, __p);
Anders Carlsson62af71c2008-12-22 04:55:36 +0000749}
750
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000751static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000752_mm_avg_pu8(__m64 __a, __m64 __b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000753{
David Blaikie4f918ae2013-01-16 23:08:36 +0000754 return (__m64)__builtin_ia32_pavgb((__v8qi)__a, (__v8qi)__b);
Anders Carlsson62af71c2008-12-22 04:55:36 +0000755}
756
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000757static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000758_mm_avg_pu16(__m64 __a, __m64 __b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000759{
David Blaikie4f918ae2013-01-16 23:08:36 +0000760 return (__m64)__builtin_ia32_pavgw((__v4hi)__a, (__v4hi)__b);
Anders Carlsson62af71c2008-12-22 04:55:36 +0000761}
762
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000763static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000764_mm_sad_pu8(__m64 __a, __m64 __b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000765{
David Blaikie4f918ae2013-01-16 23:08:36 +0000766 return (__m64)__builtin_ia32_psadbw((__v8qi)__a, (__v8qi)__b);
Anders Carlsson62af71c2008-12-22 04:55:36 +0000767}
Anders Carlssonc1f9afd2008-12-22 05:00:07 +0000768
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000769static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000770_mm_getcsr(void)
Anders Carlssonc1f9afd2008-12-22 05:00:07 +0000771{
772 return __builtin_ia32_stmxcsr();
773}
774
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000775static __inline__ void __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000776_mm_setcsr(unsigned int __i)
Anders Carlssonc1f9afd2008-12-22 05:00:07 +0000777{
David Blaikie4f918ae2013-01-16 23:08:36 +0000778 __builtin_ia32_ldmxcsr(__i);
Anders Carlssonc1f9afd2008-12-22 05:00:07 +0000779}
780
Bob Wilson32bae372011-11-05 06:08:06 +0000781#define _mm_shuffle_ps(a, b, mask) __extension__ ({ \
782 __m128 __a = (a); \
783 __m128 __b = (b); \
784 (__m128)__builtin_shufflevector((__v4sf)__a, (__v4sf)__b, \
785 (mask) & 0x3, ((mask) & 0xc) >> 2, \
786 (((mask) & 0x30) >> 4) + 4, \
787 (((mask) & 0xc0) >> 6) + 4); })
Anders Carlsson50099cb2008-12-22 05:20:34 +0000788
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000789static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000790_mm_unpackhi_ps(__m128 __a, __m128 __b)
Anders Carlsson50099cb2008-12-22 05:20:34 +0000791{
David Blaikie4f918ae2013-01-16 23:08:36 +0000792 return __builtin_shufflevector(__a, __b, 2, 6, 3, 7);
Anders Carlsson50099cb2008-12-22 05:20:34 +0000793}
794
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000795static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000796_mm_unpacklo_ps(__m128 __a, __m128 __b)
Anders Carlsson50099cb2008-12-22 05:20:34 +0000797{
David Blaikie4f918ae2013-01-16 23:08:36 +0000798 return __builtin_shufflevector(__a, __b, 0, 4, 1, 5);
Anders Carlsson50099cb2008-12-22 05:20:34 +0000799}
800
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000801static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000802_mm_move_ss(__m128 __a, __m128 __b)
Anders Carlsson50099cb2008-12-22 05:20:34 +0000803{
David Blaikie4f918ae2013-01-16 23:08:36 +0000804 return __builtin_shufflevector(__a, __b, 4, 1, 2, 3);
Anders Carlsson50099cb2008-12-22 05:20:34 +0000805}
806
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000807static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000808_mm_movehl_ps(__m128 __a, __m128 __b)
Anders Carlsson50099cb2008-12-22 05:20:34 +0000809{
David Blaikie4f918ae2013-01-16 23:08:36 +0000810 return __builtin_shufflevector(__a, __b, 6, 7, 2, 3);
Anders Carlsson50099cb2008-12-22 05:20:34 +0000811}
812
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000813static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000814_mm_movelh_ps(__m128 __a, __m128 __b)
Anders Carlsson50099cb2008-12-22 05:20:34 +0000815{
David Blaikie4f918ae2013-01-16 23:08:36 +0000816 return __builtin_shufflevector(__a, __b, 0, 1, 4, 5);
Anders Carlsson50099cb2008-12-22 05:20:34 +0000817}
818
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000819static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000820_mm_cvtpi16_ps(__m64 __a)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000821{
David Blaikie4f918ae2013-01-16 23:08:36 +0000822 __m64 __b, __c;
823 __m128 __r;
Anders Carlssona6431dc2008-12-22 07:08:03 +0000824
David Blaikie4f918ae2013-01-16 23:08:36 +0000825 __b = _mm_setzero_si64();
826 __b = _mm_cmpgt_pi16(__b, __a);
827 __c = _mm_unpackhi_pi16(__a, __b);
828 __r = _mm_setzero_ps();
829 __r = _mm_cvtpi32_ps(__r, __c);
830 __r = _mm_movelh_ps(__r, __r);
831 __c = _mm_unpacklo_pi16(__a, __b);
832 __r = _mm_cvtpi32_ps(__r, __c);
Anders Carlssona6431dc2008-12-22 07:08:03 +0000833
David Blaikie4f918ae2013-01-16 23:08:36 +0000834 return __r;
Anders Carlssona6431dc2008-12-22 07:08:03 +0000835}
836
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000837static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000838_mm_cvtpu16_ps(__m64 __a)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000839{
David Blaikie4f918ae2013-01-16 23:08:36 +0000840 __m64 __b, __c;
841 __m128 __r;
Anders Carlssona6431dc2008-12-22 07:08:03 +0000842
David Blaikie4f918ae2013-01-16 23:08:36 +0000843 __b = _mm_setzero_si64();
844 __c = _mm_unpackhi_pi16(__a, __b);
845 __r = _mm_setzero_ps();
846 __r = _mm_cvtpi32_ps(__r, __c);
847 __r = _mm_movelh_ps(__r, __r);
848 __c = _mm_unpacklo_pi16(__a, __b);
849 __r = _mm_cvtpi32_ps(__r, __c);
Anders Carlssona6431dc2008-12-22 07:08:03 +0000850
David Blaikie4f918ae2013-01-16 23:08:36 +0000851 return __r;
Anders Carlssona6431dc2008-12-22 07:08:03 +0000852}
853
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000854static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000855_mm_cvtpi8_ps(__m64 __a)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000856{
David Blaikie4f918ae2013-01-16 23:08:36 +0000857 __m64 __b;
Anders Carlssona6431dc2008-12-22 07:08:03 +0000858
David Blaikie4f918ae2013-01-16 23:08:36 +0000859 __b = _mm_setzero_si64();
860 __b = _mm_cmpgt_pi8(__b, __a);
861 __b = _mm_unpacklo_pi8(__a, __b);
Anders Carlssona6431dc2008-12-22 07:08:03 +0000862
David Blaikie4f918ae2013-01-16 23:08:36 +0000863 return _mm_cvtpi16_ps(__b);
Anders Carlssona6431dc2008-12-22 07:08:03 +0000864}
865
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000866static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000867_mm_cvtpu8_ps(__m64 __a)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000868{
David Blaikie4f918ae2013-01-16 23:08:36 +0000869 __m64 __b;
Anders Carlssona6431dc2008-12-22 07:08:03 +0000870
David Blaikie4f918ae2013-01-16 23:08:36 +0000871 __b = _mm_setzero_si64();
872 __b = _mm_unpacklo_pi8(__a, __b);
Anders Carlssona6431dc2008-12-22 07:08:03 +0000873
David Blaikie4f918ae2013-01-16 23:08:36 +0000874 return _mm_cvtpi16_ps(__b);
Anders Carlssona6431dc2008-12-22 07:08:03 +0000875}
876
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000877static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000878_mm_cvtpi32x2_ps(__m64 __a, __m64 __b)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000879{
David Blaikie4f918ae2013-01-16 23:08:36 +0000880 __m128 __c;
Anders Carlssona6431dc2008-12-22 07:08:03 +0000881
David Blaikie4f918ae2013-01-16 23:08:36 +0000882 __c = _mm_setzero_ps();
883 __c = _mm_cvtpi32_ps(__c, __b);
884 __c = _mm_movelh_ps(__c, __c);
Anders Carlssona6431dc2008-12-22 07:08:03 +0000885
David Blaikie4f918ae2013-01-16 23:08:36 +0000886 return _mm_cvtpi32_ps(__c, __a);
Anders Carlssona6431dc2008-12-22 07:08:03 +0000887}
888
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000889static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000890_mm_cvtps_pi16(__m128 __a)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000891{
David Blaikie4f918ae2013-01-16 23:08:36 +0000892 __m64 __b, __c;
Anders Carlssona6431dc2008-12-22 07:08:03 +0000893
David Blaikie4f918ae2013-01-16 23:08:36 +0000894 __b = _mm_cvtps_pi32(__a);
895 __a = _mm_movehl_ps(__a, __a);
896 __c = _mm_cvtps_pi32(__a);
Anders Carlssona6431dc2008-12-22 07:08:03 +0000897
David Blaikie4f918ae2013-01-16 23:08:36 +0000898 return _mm_packs_pi16(__b, __c);
Anders Carlssona6431dc2008-12-22 07:08:03 +0000899}
900
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000901static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000902_mm_cvtps_pi8(__m128 __a)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000903{
David Blaikie4f918ae2013-01-16 23:08:36 +0000904 __m64 __b, __c;
Anders Carlssona6431dc2008-12-22 07:08:03 +0000905
David Blaikie4f918ae2013-01-16 23:08:36 +0000906 __b = _mm_cvtps_pi16(__a);
907 __c = _mm_setzero_si64();
Anders Carlssona6431dc2008-12-22 07:08:03 +0000908
David Blaikie4f918ae2013-01-16 23:08:36 +0000909 return _mm_packs_pi16(__b, __c);
Anders Carlssona6431dc2008-12-22 07:08:03 +0000910}
911
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000912static __inline__ int __attribute__((__always_inline__, __nodebug__))
David Blaikie4f918ae2013-01-16 23:08:36 +0000913_mm_movemask_ps(__m128 __a)
Anders Carlsson50099cb2008-12-22 05:20:34 +0000914{
David Blaikie4f918ae2013-01-16 23:08:36 +0000915 return __builtin_ia32_movmskps(__a);
Anders Carlsson50099cb2008-12-22 05:20:34 +0000916}
917
Anders Carlssonb5955092008-12-22 05:42:03 +0000918#define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))
919
Anders Carlsson4cc44272009-02-11 06:29:32 +0000920#define _MM_EXCEPT_INVALID (0x0001)
921#define _MM_EXCEPT_DENORM (0x0002)
922#define _MM_EXCEPT_DIV_ZERO (0x0004)
923#define _MM_EXCEPT_OVERFLOW (0x0008)
924#define _MM_EXCEPT_UNDERFLOW (0x0010)
925#define _MM_EXCEPT_INEXACT (0x0020)
Anders Carlssonbbd1fa22009-01-21 01:49:39 +0000926#define _MM_EXCEPT_MASK (0x003f)
Anders Carlsson4cc44272009-02-11 06:29:32 +0000927
Anders Carlssonbbd1fa22009-01-21 01:49:39 +0000928#define _MM_MASK_INVALID (0x0080)
929#define _MM_MASK_DENORM (0x0100)
Anders Carlssona837a072009-02-14 04:01:38 +0000930#define _MM_MASK_DIV_ZERO (0x0200)
931#define _MM_MASK_OVERFLOW (0x0400)
932#define _MM_MASK_UNDERFLOW (0x0800)
933#define _MM_MASK_INEXACT (0x1000)
Anders Carlssonbbd1fa22009-01-21 01:49:39 +0000934#define _MM_MASK_MASK (0x1f80)
935
Anders Carlssonbbd1fa22009-01-21 01:49:39 +0000936#define _MM_ROUND_NEAREST (0x0000)
937#define _MM_ROUND_DOWN (0x2000)
938#define _MM_ROUND_UP (0x4000)
939#define _MM_ROUND_TOWARD_ZERO (0x6000)
Anders Carlsson4cc44272009-02-11 06:29:32 +0000940#define _MM_ROUND_MASK (0x6000)
Anders Carlssonbbd1fa22009-01-21 01:49:39 +0000941
942#define _MM_FLUSH_ZERO_MASK (0x8000)
943#define _MM_FLUSH_ZERO_ON (0x8000)
Bob Wilson092acb02012-01-23 18:27:24 +0000944#define _MM_FLUSH_ZERO_OFF (0x0000)
Anders Carlssonb5955092008-12-22 05:42:03 +0000945
946#define _MM_GET_EXCEPTION_MASK() (_mm_getcsr() & _MM_MASK_MASK)
947#define _MM_GET_EXCEPTION_STATE() (_mm_getcsr() & _MM_EXCEPT_MASK)
Anders Carlsson62005c12009-01-20 21:51:44 +0000948#define _MM_GET_FLUSH_ZERO_MODE() (_mm_getcsr() & _MM_FLUSH_ZERO_MASK)
Anders Carlssonb5955092008-12-22 05:42:03 +0000949#define _MM_GET_ROUNDING_MODE() (_mm_getcsr() & _MM_ROUND_MASK)
950
Anders Carlsson62005c12009-01-20 21:51:44 +0000951#define _MM_SET_EXCEPTION_MASK(x) (_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (x)))
Anders Carlssonb5955092008-12-22 05:42:03 +0000952#define _MM_SET_EXCEPTION_STATE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (x)))
Anders Carlsson62005c12009-01-20 21:51:44 +0000953#define _MM_SET_FLUSH_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (x)))
Anders Carlssonb5955092008-12-22 05:42:03 +0000954#define _MM_SET_ROUNDING_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (x)))
955
956#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \
957do { \
958 __m128 tmp3, tmp2, tmp1, tmp0; \
959 tmp0 = _mm_unpacklo_ps((row0), (row1)); \
960 tmp2 = _mm_unpacklo_ps((row2), (row3)); \
961 tmp1 = _mm_unpackhi_ps((row0), (row1)); \
962 tmp3 = _mm_unpackhi_ps((row2), (row3)); \
963 (row0) = _mm_movelh_ps(tmp0, tmp2); \
964 (row1) = _mm_movehl_ps(tmp2, tmp0); \
965 (row2) = _mm_movelh_ps(tmp1, tmp3); \
Chris Lattner9afb2272010-01-27 07:54:50 +0000966 (row3) = _mm_movehl_ps(tmp3, tmp1); \
Anders Carlssonb5955092008-12-22 05:42:03 +0000967} while (0)
968
Chandler Carruthfa38c812010-07-22 06:47:28 +0000969/* Aliases for compatibility. */
970#define _m_pextrw _mm_extract_pi16
971#define _m_pinsrw _mm_insert_pi16
972#define _m_pmaxsw _mm_max_pi16
973#define _m_pmaxub _mm_max_pu8
974#define _m_pminsw _mm_min_pi16
975#define _m_pminub _mm_min_pu8
976#define _m_pmovmskb _mm_movemask_pi8
977#define _m_pmulhuw _mm_mulhi_pu16
978#define _m_pshufw _mm_shuffle_pi16
979#define _m_maskmovq _mm_maskmove_si64
980#define _m_pavgb _mm_avg_pu8
981#define _m_pavgw _mm_avg_pu16
982#define _m_psadbw _mm_sad_pu8
983#define _m_ _mm_
984#define _m_ _mm_
985
Eli Friedmanc7d95dc2009-06-11 18:50:02 +0000986/* Ugly hack for backwards-compatibility (compatible with gcc) */
987#ifdef __SSE2__
Daniel Dunbar3eef3e12009-06-07 08:33:23 +0000988#include <emmintrin.h>
Eli Friedmanc7d95dc2009-06-11 18:50:02 +0000989#endif
Daniel Dunbar3eef3e12009-06-07 08:33:23 +0000990
Anders Carlsson566d8da2008-12-22 00:01:20 +0000991#endif /* __SSE__ */
992
993#endif /* __XMMINTRIN_H */