blob: 51ce5a135ce73503600e8245eef4bc24fb73225b [file] [log] [blame]
Anders Carlsson566d8da2008-12-22 00:01:20 +00001/*===---- xmmintrin.h - SSE intrinsics -------------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __XMMINTRIN_H
25#define __XMMINTRIN_H
26
27#ifndef __SSE__
Anders Carlsson4fd3e632008-12-26 00:57:11 +000028#error "SSE instruction set not enabled"
Anders Carlsson566d8da2008-12-22 00:01:20 +000029#else
30
Anders Carlsson4fcc3132008-12-22 00:48:30 +000031#include <mmintrin.h>
32
Eric Christopher020f1ed2010-03-20 01:08:47 +000033typedef int __v4si __attribute__((__vector_size__(16)));
Anders Carlsson398082e2008-12-22 17:42:23 +000034typedef float __v4sf __attribute__((__vector_size__(16)));
Anders Carlsson566d8da2008-12-22 00:01:20 +000035typedef float __m128 __attribute__((__vector_size__(16)));
36
Chandler Carruth7acb9532010-11-22 08:06:31 +000037// This header should only be included in a hosted environment as it depends on
38// a standard library to provide allocation routines.
39#if __STDC_HOSTED__
Anders Carlsson398082e2008-12-22 17:42:23 +000040#include <mm_malloc.h>
Chandler Carruth7acb9532010-11-22 08:06:31 +000041#endif
Anders Carlsson398082e2008-12-22 17:42:23 +000042
Chris Lattner1bddbcb2010-03-22 18:14:12 +000043static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000044_mm_add_ss(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000045{
Eli Friedman80c80042009-06-06 02:13:04 +000046 a[0] += b[0];
47 return a;
Anders Carlsson566d8da2008-12-22 00:01:20 +000048}
49
Chris Lattner1bddbcb2010-03-22 18:14:12 +000050static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000051_mm_add_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000052{
53 return a + b;
54}
55
Chris Lattner1bddbcb2010-03-22 18:14:12 +000056static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000057_mm_sub_ss(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000058{
Eli Friedman80c80042009-06-06 02:13:04 +000059 a[0] -= b[0];
60 return a;
Anders Carlsson566d8da2008-12-22 00:01:20 +000061}
62
Chris Lattner1bddbcb2010-03-22 18:14:12 +000063static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000064_mm_sub_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000065{
66 return a - b;
67}
68
Chris Lattner1bddbcb2010-03-22 18:14:12 +000069static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000070_mm_mul_ss(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000071{
Eli Friedman80c80042009-06-06 02:13:04 +000072 a[0] *= b[0];
73 return a;
Anders Carlsson566d8da2008-12-22 00:01:20 +000074}
75
Chris Lattner1bddbcb2010-03-22 18:14:12 +000076static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000077_mm_mul_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000078{
79 return a * b;
80}
81
Chris Lattner1bddbcb2010-03-22 18:14:12 +000082static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000083_mm_div_ss(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000084{
Eli Friedman80c80042009-06-06 02:13:04 +000085 a[0] /= b[0];
86 return a;
Anders Carlsson566d8da2008-12-22 00:01:20 +000087}
88
Chris Lattner1bddbcb2010-03-22 18:14:12 +000089static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000090_mm_div_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000091{
92 return a / b;
93}
94
Chris Lattner1bddbcb2010-03-22 18:14:12 +000095static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000096_mm_sqrt_ss(__m128 a)
Anders Carlsson566d8da2008-12-22 00:01:20 +000097{
98 return __builtin_ia32_sqrtss(a);
99}
100
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000101static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000102_mm_sqrt_ps(__m128 a)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000103{
104 return __builtin_ia32_sqrtps(a);
105}
106
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000107static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000108_mm_rcp_ss(__m128 a)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000109{
110 return __builtin_ia32_rcpss(a);
111}
112
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000113static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000114_mm_rcp_ps(__m128 a)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000115{
116 return __builtin_ia32_rcpps(a);
117}
118
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000119static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000120_mm_rsqrt_ss(__m128 a)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000121{
122 return __builtin_ia32_rsqrtss(a);
123}
124
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000125static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000126_mm_rsqrt_ps(__m128 a)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000127{
128 return __builtin_ia32_rsqrtps(a);
129}
130
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000131static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000132_mm_min_ss(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000133{
134 return __builtin_ia32_minss(a, b);
135}
136
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000137static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000138_mm_min_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000139{
140 return __builtin_ia32_minps(a, b);
141}
142
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000143static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000144_mm_max_ss(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000145{
146 return __builtin_ia32_maxss(a, b);
147}
148
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000149static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000150_mm_max_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000151{
152 return __builtin_ia32_maxps(a, b);
153}
154
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000155static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000156_mm_and_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000157{
Eli Friedman80c80042009-06-06 02:13:04 +0000158 return (__m128)((__v4si)a & (__v4si)b);
Anders Carlsson566d8da2008-12-22 00:01:20 +0000159}
160
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000161static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000162_mm_andnot_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000163{
Eli Friedman80c80042009-06-06 02:13:04 +0000164 return (__m128)(~(__v4si)a & (__v4si)b);
Anders Carlsson566d8da2008-12-22 00:01:20 +0000165}
166
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000167static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000168_mm_or_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000169{
Eli Friedman80c80042009-06-06 02:13:04 +0000170 return (__m128)((__v4si)a | (__v4si)b);
Anders Carlsson566d8da2008-12-22 00:01:20 +0000171}
172
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000173static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000174_mm_xor_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000175{
Chris Lattner2c483452010-01-07 00:36:41 +0000176 return (__m128)((__v4si)a ^ (__v4si)b);
Anders Carlsson566d8da2008-12-22 00:01:20 +0000177}
178
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000179static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000180_mm_cmpeq_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000181{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000182 return (__m128)__builtin_ia32_cmpss(a, b, 0);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000183}
184
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000185static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000186_mm_cmpeq_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000187{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000188 return (__m128)__builtin_ia32_cmpps(a, b, 0);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000189}
190
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000191static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000192_mm_cmplt_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000193{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000194 return (__m128)__builtin_ia32_cmpss(a, b, 1);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000195}
196
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000197static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000198_mm_cmplt_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000199{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000200 return (__m128)__builtin_ia32_cmpps(a, b, 1);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000201}
202
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000203static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000204_mm_cmple_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000205{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000206 return (__m128)__builtin_ia32_cmpss(a, b, 2);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000207}
208
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000209static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000210_mm_cmple_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000211{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000212 return (__m128)__builtin_ia32_cmpps(a, b, 2);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000213}
214
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000215static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000216_mm_cmpgt_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000217{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000218 return (__m128)__builtin_ia32_cmpss(b, a, 1);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000219}
220
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000221static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000222_mm_cmpgt_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000223{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000224 return (__m128)__builtin_ia32_cmpps(b, a, 1);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000225}
226
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000227static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000228_mm_cmpge_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000229{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000230 return (__m128)__builtin_ia32_cmpss(b, a, 2);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000231}
232
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000233static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000234_mm_cmpge_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000235{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000236 return (__m128)__builtin_ia32_cmpps(b, a, 2);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000237}
238
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000239static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000240_mm_cmpneq_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000241{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000242 return (__m128)__builtin_ia32_cmpss(a, b, 4);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000243}
244
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000245static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000246_mm_cmpneq_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000247{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000248 return (__m128)__builtin_ia32_cmpps(a, b, 4);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000249}
250
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000251static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000252_mm_cmpnlt_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000253{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000254 return (__m128)__builtin_ia32_cmpss(a, b, 5);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000255}
256
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000257static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000258_mm_cmpnlt_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000259{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000260 return (__m128)__builtin_ia32_cmpps(a, b, 5);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000261}
262
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000263static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000264_mm_cmpnle_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000265{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000266 return (__m128)__builtin_ia32_cmpss(a, b, 6);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000267}
268
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000269static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000270_mm_cmpnle_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000271{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000272 return (__m128)__builtin_ia32_cmpps(a, b, 6);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000273}
274
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000275static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000276_mm_cmpngt_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000277{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000278 return (__m128)__builtin_ia32_cmpss(b, a, 5);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000279}
280
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000281static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000282_mm_cmpngt_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000283{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000284 return (__m128)__builtin_ia32_cmpps(b, a, 5);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000285}
286
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000287static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000288_mm_cmpnge_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000289{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000290 return (__m128)__builtin_ia32_cmpss(b, a, 6);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000291}
292
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000293static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000294_mm_cmpnge_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000295{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000296 return (__m128)__builtin_ia32_cmpps(b, a, 6);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000297}
298
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000299static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000300_mm_cmpord_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000301{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000302 return (__m128)__builtin_ia32_cmpss(a, b, 7);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000303}
304
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000305static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000306_mm_cmpord_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000307{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000308 return (__m128)__builtin_ia32_cmpps(a, b, 7);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000309}
310
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000311static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000312_mm_cmpunord_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000313{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000314 return (__m128)__builtin_ia32_cmpss(a, b, 3);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000315}
316
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000317static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000318_mm_cmpunord_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000319{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000320 return (__m128)__builtin_ia32_cmpps(a, b, 3);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000321}
322
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000323static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000324_mm_comieq_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000325{
326 return __builtin_ia32_comieq(a, b);
327}
328
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000329static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000330_mm_comilt_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000331{
332 return __builtin_ia32_comilt(a, b);
333}
334
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000335static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000336_mm_comile_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000337{
338 return __builtin_ia32_comile(a, b);
339}
340
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000341static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000342_mm_comigt_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000343{
344 return __builtin_ia32_comigt(a, b);
345}
346
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000347static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000348_mm_comige_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000349{
350 return __builtin_ia32_comige(a, b);
351}
352
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000353static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000354_mm_comineq_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000355{
356 return __builtin_ia32_comineq(a, b);
357}
358
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000359static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000360_mm_ucomieq_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000361{
362 return __builtin_ia32_ucomieq(a, b);
363}
364
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000365static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000366_mm_ucomilt_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000367{
368 return __builtin_ia32_ucomilt(a, b);
369}
370
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000371static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000372_mm_ucomile_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000373{
374 return __builtin_ia32_ucomile(a, b);
375}
376
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000377static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000378_mm_ucomigt_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000379{
380 return __builtin_ia32_ucomigt(a, b);
381}
382
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000383static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000384_mm_ucomige_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000385{
386 return __builtin_ia32_ucomige(a, b);
387}
388
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000389static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000390_mm_ucomineq_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000391{
392 return __builtin_ia32_ucomineq(a, b);
393}
394
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000395static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000396_mm_cvtss_si32(__m128 a)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000397{
398 return __builtin_ia32_cvtss2si(a);
399}
400
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000401static __inline__ int __attribute__((__always_inline__, __nodebug__))
Chris Lattneref5ebf62010-02-16 18:21:25 +0000402_mm_cvt_ss2si(__m128 a)
403{
404 return _mm_cvtss_si32(a);
405}
406
Eli Friedman80c80042009-06-06 02:13:04 +0000407#ifdef __x86_64__
408
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000409static __inline__ long long __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000410_mm_cvtss_si64(__m128 a)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000411{
412 return __builtin_ia32_cvtss2si64(a);
413}
414
Eli Friedman80c80042009-06-06 02:13:04 +0000415#endif
416
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000417static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000418_mm_cvtps_pi32(__m128 a)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000419{
420 return (__m64)__builtin_ia32_cvtps2pi(a);
421}
422
Chandler Carruthfa38c812010-07-22 06:47:28 +0000423static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
424_mm_cvt_ps2pi(__m128 a)
425{
426 return _mm_cvtps_pi32(a);
427}
428
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000429static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000430_mm_cvttss_si32(__m128 a)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000431{
Eli Friedman80c80042009-06-06 02:13:04 +0000432 return a[0];
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000433}
434
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000435static __inline__ int __attribute__((__always_inline__, __nodebug__))
Chris Lattneref5ebf62010-02-16 18:21:25 +0000436_mm_cvtt_ss2si(__m128 a)
437{
438 return _mm_cvttss_si32(a);
439}
440
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000441static __inline__ long long __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000442_mm_cvttss_si64(__m128 a)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000443{
Eli Friedman80c80042009-06-06 02:13:04 +0000444 return a[0];
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000445}
446
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000447static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000448_mm_cvttps_pi32(__m128 a)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000449{
450 return (__m64)__builtin_ia32_cvttps2pi(a);
451}
452
Chandler Carruthfa38c812010-07-22 06:47:28 +0000453static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
454_mm_cvtt_ps2pi(__m128 a)
455{
456 return _mm_cvttps_pi32(a);
457}
458
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000459static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000460_mm_cvtsi32_ss(__m128 a, int b)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000461{
Eli Friedman80c80042009-06-06 02:13:04 +0000462 a[0] = b;
463 return a;
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000464}
465
Chandler Carruthfa38c812010-07-22 06:47:28 +0000466static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Chandler Carruthd74e3982010-08-08 08:30:05 +0000467_mm_cvt_si2ss(__m128 a, int b)
Chandler Carruthfa38c812010-07-22 06:47:28 +0000468{
469 return _mm_cvtsi32_ss(a, b);
470}
471
Anders Carlsson1b76b802008-12-22 01:26:50 +0000472#ifdef __x86_64__
473
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000474static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000475_mm_cvtsi64_ss(__m128 a, long long b)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000476{
Eli Friedman80c80042009-06-06 02:13:04 +0000477 a[0] = b;
478 return a;
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000479}
480
Anders Carlsson1b76b802008-12-22 01:26:50 +0000481#endif
482
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000483static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000484_mm_cvtpi32_ps(__m128 a, __m64 b)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000485{
486 return __builtin_ia32_cvtpi2ps(a, (__v2si)b);
487}
488
Chandler Carruthfa38c812010-07-22 06:47:28 +0000489static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Chandler Carruthd74e3982010-08-08 08:30:05 +0000490_mm_cvt_pi2ps(__m128 a, __m64 b)
Chandler Carruthfa38c812010-07-22 06:47:28 +0000491{
492 return _mm_cvtpi32_ps(a, b);
493}
494
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000495static __inline__ float __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000496_mm_cvtss_f32(__m128 a)
Anders Carlsson1b76b802008-12-22 01:26:50 +0000497{
Anders Carlssona6431dc2008-12-22 07:08:03 +0000498 return a[0];
Anders Carlsson1b76b802008-12-22 01:26:50 +0000499}
500
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000501static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Chris Lattner21b91a32010-02-01 20:14:14 +0000502_mm_loadh_pi(__m128 a, const __m64 *p)
Anders Carlsson97700862008-12-22 02:43:30 +0000503{
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000504 typedef float __mm_loadh_pi_v2f32 __attribute__((__vector_size__(8)));
505 struct __mm_loadh_pi_struct {
506 __mm_loadh_pi_v2f32 u;
507 } __attribute__((__packed__, __may_alias__));
508 __mm_loadh_pi_v2f32 b = ((struct __mm_loadh_pi_struct*)p)->u;
509 __m128 bb = __builtin_shufflevector(b, b, 0, 1, 0, 1);
510 return __builtin_shufflevector(a, bb, 0, 1, 4, 5);
Anders Carlsson97700862008-12-22 02:43:30 +0000511}
512
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000513static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Chris Lattner21b91a32010-02-01 20:14:14 +0000514_mm_loadl_pi(__m128 a, const __m64 *p)
Anders Carlsson97700862008-12-22 02:43:30 +0000515{
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000516 typedef float __mm_loadl_pi_v2f32 __attribute__((__vector_size__(8)));
517 struct __mm_loadl_pi_struct {
518 __mm_loadl_pi_v2f32 u;
519 } __attribute__((__packed__, __may_alias__));
520 __mm_loadl_pi_v2f32 b = ((struct __mm_loadl_pi_struct*)p)->u;
521 __m128 bb = __builtin_shufflevector(b, b, 0, 1, 0, 1);
522 return __builtin_shufflevector(a, bb, 4, 5, 2, 3);
Anders Carlsson97700862008-12-22 02:43:30 +0000523}
524
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000525static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Chris Lattner21b91a32010-02-01 20:14:14 +0000526_mm_load_ss(const float *p)
Anders Carlsson97700862008-12-22 02:43:30 +0000527{
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000528 struct __mm_load_ss_struct {
529 float u;
530 } __attribute__((__packed__, __may_alias__));
531 float u = ((struct __mm_load_ss_struct*)p)->u;
532 return (__m128){ u, 0, 0, 0 };
Anders Carlsson97700862008-12-22 02:43:30 +0000533}
534
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000535static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Chris Lattner21b91a32010-02-01 20:14:14 +0000536_mm_load1_ps(const float *p)
Anders Carlsson97700862008-12-22 02:43:30 +0000537{
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000538 struct __mm_load1_ps_struct {
539 float u;
540 } __attribute__((__packed__, __may_alias__));
541 float u = ((struct __mm_load1_ps_struct*)p)->u;
542 return (__m128){ u, u, u, u };
Anders Carlsson97700862008-12-22 02:43:30 +0000543}
544
Eli Friedmandb7351a2009-06-02 05:55:48 +0000545#define _mm_load_ps1(p) _mm_load1_ps(p)
546
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000547static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Chris Lattner21b91a32010-02-01 20:14:14 +0000548_mm_load_ps(const float *p)
Anders Carlsson97700862008-12-22 02:43:30 +0000549{
550 return *(__m128*)p;
551}
552
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000553static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Chris Lattner21b91a32010-02-01 20:14:14 +0000554_mm_loadu_ps(const float *p)
Anders Carlsson97700862008-12-22 02:43:30 +0000555{
Bill Wendlingeed92a12011-05-13 00:11:39 +0000556 struct __loadu_ps {
557 __m128 v;
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000558 } __attribute__((__packed__, __may_alias__));
Bill Wendlingeed92a12011-05-13 00:11:39 +0000559 return ((struct __loadu_ps*)p)->v;
Anders Carlsson97700862008-12-22 02:43:30 +0000560}
561
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000562static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Chris Lattner21b91a32010-02-01 20:14:14 +0000563_mm_loadr_ps(const float *p)
Anders Carlsson97700862008-12-22 02:43:30 +0000564{
565 __m128 a = _mm_load_ps(p);
566 return __builtin_shufflevector(a, a, 3, 2, 1, 0);
567}
568
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000569static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000570_mm_set_ss(float w)
Anders Carlssona6ba0012008-12-22 02:51:35 +0000571{
572 return (__m128){ w, 0, 0, 0 };
573}
574
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000575static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000576_mm_set1_ps(float w)
Anders Carlssona6ba0012008-12-22 02:51:35 +0000577{
578 return (__m128){ w, w, w, w };
579}
580
Anders Carlsson12868cc2008-12-27 04:26:15 +0000581// Microsoft specific.
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000582static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000583_mm_set_ps1(float w)
Anders Carlsson12868cc2008-12-27 04:26:15 +0000584{
585 return _mm_set1_ps(w);
586}
587
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000588static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000589_mm_set_ps(float z, float y, float x, float w)
Anders Carlssona6ba0012008-12-22 02:51:35 +0000590{
591 return (__m128){ w, x, y, z };
592}
593
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000594static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000595_mm_setr_ps(float z, float y, float x, float w)
Anders Carlssona6ba0012008-12-22 02:51:35 +0000596{
597 return (__m128){ z, y, x, w };
598}
599
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000600static __inline__ __m128 __attribute__((__always_inline__))
Mike Stumpdae44132009-02-13 14:24:50 +0000601_mm_setzero_ps(void)
Anders Carlssona6ba0012008-12-22 02:51:35 +0000602{
603 return (__m128){ 0, 0, 0, 0 };
604}
605
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000606static __inline__ void __attribute__((__always_inline__))
Mike Stumpdae44132009-02-13 14:24:50 +0000607_mm_storeh_pi(__m64 *p, __m128 a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000608{
609 __builtin_ia32_storehps((__v2si *)p, a);
610}
611
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000612static __inline__ void __attribute__((__always_inline__))
Mike Stumpdae44132009-02-13 14:24:50 +0000613_mm_storel_pi(__m64 *p, __m128 a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000614{
615 __builtin_ia32_storelps((__v2si *)p, a);
616}
617
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000618static __inline__ void __attribute__((__always_inline__))
Mike Stumpdae44132009-02-13 14:24:50 +0000619_mm_store_ss(float *p, __m128 a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000620{
Eli Friedman7c06f6b2011-09-15 23:15:27 +0000621 struct __mm_store_ss_struct {
622 float u;
623 } __attribute__((__packed__, __may_alias__));
624 ((struct __mm_store_ss_struct*)p)->u = a[0];
Anders Carlsson09b93052008-12-22 03:16:40 +0000625}
626
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000627static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000628_mm_storeu_ps(float *p, __m128 a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000629{
630 __builtin_ia32_storeups(p, a);
631}
632
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000633static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000634_mm_store1_ps(float *p, __m128 a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000635{
636 a = __builtin_shufflevector(a, a, 0, 0, 0, 0);
637 _mm_storeu_ps(p, a);
638}
639
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000640static __inline__ void __attribute__((__always_inline__, __nodebug__))
Chandler Carruthfa38c812010-07-22 06:47:28 +0000641_mm_store_ps1(float *p, __m128 a)
642{
643 return _mm_store1_ps(p, a);
644}
645
646static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000647_mm_store_ps(float *p, __m128 a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000648{
649 *(__m128 *)p = a;
650}
651
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000652static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000653_mm_storer_ps(float *p, __m128 a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000654{
655 a = __builtin_shufflevector(a, a, 3, 2, 1, 0);
656 _mm_store_ps(p, a);
657}
658
Chris Lattner551f37c2010-08-27 20:10:06 +0000659#define _MM_HINT_T0 3
Anders Carlssondedad4e2008-12-22 03:50:21 +0000660#define _MM_HINT_T1 2
Chris Lattner551f37c2010-08-27 20:10:06 +0000661#define _MM_HINT_T2 1
Anders Carlssondedad4e2008-12-22 03:50:21 +0000662#define _MM_HINT_NTA 0
663
Nick Lewyckyc2b9b362010-05-30 18:26:21 +0000664/* FIXME: We have to #define this because "sel" must be a constant integer, and
Anders Carlsson62af71c2008-12-22 04:55:36 +0000665 Sema doesn't do any form of constant propagation yet. */
Anders Carlssondedad4e2008-12-22 03:50:21 +0000666
Craig Topper34a1da42011-12-24 07:55:14 +0000667#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), 0, (sel)))
Anders Carlssondedad4e2008-12-22 03:50:21 +0000668
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000669static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000670_mm_stream_pi(__m64 *p, __m64 a)
Anders Carlssondedad4e2008-12-22 03:50:21 +0000671{
672 __builtin_ia32_movntq(p, a);
673}
674
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000675static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000676_mm_stream_ps(float *p, __m128 a)
Anders Carlssondedad4e2008-12-22 03:50:21 +0000677{
678 __builtin_ia32_movntps(p, a);
679}
680
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000681static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000682_mm_sfence(void)
Anders Carlssondedad4e2008-12-22 03:50:21 +0000683{
684 __builtin_ia32_sfence();
685}
686
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000687static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000688_mm_extract_pi16(__m64 a, int n)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000689{
Anders Carlsson62af71c2008-12-22 04:55:36 +0000690 __v4hi b = (__v4hi)a;
Eli Friedman80c80042009-06-06 02:13:04 +0000691 return (unsigned short)b[n & 3];
Anders Carlsson62af71c2008-12-22 04:55:36 +0000692}
693
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000694static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000695_mm_insert_pi16(__m64 a, int d, int n)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000696{
Eli Friedman80c80042009-06-06 02:13:04 +0000697 __v4hi b = (__v4hi)a;
698 b[n & 3] = d;
Eli Friedman17d2e3a2009-06-06 03:45:06 +0000699 return (__m64)b;
Anders Carlsson62af71c2008-12-22 04:55:36 +0000700}
701
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000702static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000703_mm_max_pi16(__m64 a, __m64 b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000704{
705 return (__m64)__builtin_ia32_pmaxsw((__v4hi)a, (__v4hi)b);
706}
707
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000708static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000709_mm_max_pu8(__m64 a, __m64 b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000710{
711 return (__m64)__builtin_ia32_pmaxub((__v8qi)a, (__v8qi)b);
712}
713
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000714static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000715_mm_min_pi16(__m64 a, __m64 b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000716{
717 return (__m64)__builtin_ia32_pminsw((__v4hi)a, (__v4hi)b);
718}
719
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000720static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000721_mm_min_pu8(__m64 a, __m64 b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000722{
723 return (__m64)__builtin_ia32_pminub((__v8qi)a, (__v8qi)b);
724}
725
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000726static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000727_mm_movemask_pi8(__m64 a)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000728{
729 return __builtin_ia32_pmovmskb((__v8qi)a);
730}
731
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000732static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000733_mm_mulhi_pu16(__m64 a, __m64 b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000734{
735 return (__m64)__builtin_ia32_pmulhuw((__v4hi)a, (__v4hi)b);
736}
737
Bob Wilson32bae372011-11-05 06:08:06 +0000738#define _mm_shuffle_pi16(a, n) __extension__ ({ \
739 __m64 __a = (a); \
740 (__m64)__builtin_ia32_pshufw((__v4hi)__a, (n)); })
Anders Carlsson62af71c2008-12-22 04:55:36 +0000741
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000742static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000743_mm_maskmove_si64(__m64 d, __m64 n, char *p)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000744{
745 __builtin_ia32_maskmovq((__v8qi)d, (__v8qi)n, p);
746}
747
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000748static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000749_mm_avg_pu8(__m64 a, __m64 b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000750{
751 return (__m64)__builtin_ia32_pavgb((__v8qi)a, (__v8qi)b);
752}
753
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000754static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000755_mm_avg_pu16(__m64 a, __m64 b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000756{
757 return (__m64)__builtin_ia32_pavgw((__v4hi)a, (__v4hi)b);
758}
759
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000760static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000761_mm_sad_pu8(__m64 a, __m64 b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000762{
763 return (__m64)__builtin_ia32_psadbw((__v8qi)a, (__v8qi)b);
764}
Anders Carlssonc1f9afd2008-12-22 05:00:07 +0000765
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000766static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000767_mm_getcsr(void)
Anders Carlssonc1f9afd2008-12-22 05:00:07 +0000768{
769 return __builtin_ia32_stmxcsr();
770}
771
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000772static __inline__ void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000773_mm_setcsr(unsigned int i)
Anders Carlssonc1f9afd2008-12-22 05:00:07 +0000774{
775 __builtin_ia32_ldmxcsr(i);
776}
777
Bob Wilson32bae372011-11-05 06:08:06 +0000778#define _mm_shuffle_ps(a, b, mask) __extension__ ({ \
779 __m128 __a = (a); \
780 __m128 __b = (b); \
781 (__m128)__builtin_shufflevector((__v4sf)__a, (__v4sf)__b, \
782 (mask) & 0x3, ((mask) & 0xc) >> 2, \
783 (((mask) & 0x30) >> 4) + 4, \
784 (((mask) & 0xc0) >> 6) + 4); })
Anders Carlsson50099cb2008-12-22 05:20:34 +0000785
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000786static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000787_mm_unpackhi_ps(__m128 a, __m128 b)
Anders Carlsson50099cb2008-12-22 05:20:34 +0000788{
789 return __builtin_shufflevector(a, b, 2, 6, 3, 7);
790}
791
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000792static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000793_mm_unpacklo_ps(__m128 a, __m128 b)
Anders Carlsson50099cb2008-12-22 05:20:34 +0000794{
795 return __builtin_shufflevector(a, b, 0, 4, 1, 5);
796}
797
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000798static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000799_mm_move_ss(__m128 a, __m128 b)
Anders Carlsson50099cb2008-12-22 05:20:34 +0000800{
801 return __builtin_shufflevector(a, b, 4, 1, 2, 3);
802}
803
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000804static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000805_mm_movehl_ps(__m128 a, __m128 b)
Anders Carlsson50099cb2008-12-22 05:20:34 +0000806{
807 return __builtin_shufflevector(a, b, 6, 7, 2, 3);
808}
809
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000810static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000811_mm_movelh_ps(__m128 a, __m128 b)
Anders Carlsson50099cb2008-12-22 05:20:34 +0000812{
813 return __builtin_shufflevector(a, b, 0, 1, 4, 5);
814}
815
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000816static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000817_mm_cvtpi16_ps(__m64 a)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000818{
819 __m64 b, c;
820 __m128 r;
821
822 b = _mm_setzero_si64();
823 b = _mm_cmpgt_pi16(b, a);
824 c = _mm_unpackhi_pi16(a, b);
825 r = _mm_setzero_ps();
826 r = _mm_cvtpi32_ps(r, c);
827 r = _mm_movelh_ps(r, r);
828 c = _mm_unpacklo_pi16(a, b);
829 r = _mm_cvtpi32_ps(r, c);
830
831 return r;
832}
833
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000834static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000835_mm_cvtpu16_ps(__m64 a)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000836{
837 __m64 b, c;
838 __m128 r;
839
840 b = _mm_setzero_si64();
841 c = _mm_unpackhi_pi16(a, b);
842 r = _mm_setzero_ps();
843 r = _mm_cvtpi32_ps(r, c);
844 r = _mm_movelh_ps(r, r);
845 c = _mm_unpacklo_pi16(a, b);
846 r = _mm_cvtpi32_ps(r, c);
847
848 return r;
849}
850
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000851static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000852_mm_cvtpi8_ps(__m64 a)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000853{
854 __m64 b;
855
856 b = _mm_setzero_si64();
857 b = _mm_cmpgt_pi8(b, a);
858 b = _mm_unpacklo_pi8(a, b);
859
860 return _mm_cvtpi16_ps(b);
861}
862
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000863static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000864_mm_cvtpu8_ps(__m64 a)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000865{
866 __m64 b;
867
868 b = _mm_setzero_si64();
869 b = _mm_unpacklo_pi8(a, b);
870
871 return _mm_cvtpi16_ps(b);
872}
873
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000874static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000875_mm_cvtpi32x2_ps(__m64 a, __m64 b)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000876{
877 __m128 c;
878
879 c = _mm_setzero_ps();
880 c = _mm_cvtpi32_ps(c, b);
881 c = _mm_movelh_ps(c, c);
882
883 return _mm_cvtpi32_ps(c, a);
884}
885
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000886static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000887_mm_cvtps_pi16(__m128 a)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000888{
889 __m64 b, c;
890
891 b = _mm_cvtps_pi32(a);
892 a = _mm_movehl_ps(a, a);
893 c = _mm_cvtps_pi32(a);
894
895 return _mm_packs_pi16(b, c);
896}
897
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000898static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000899_mm_cvtps_pi8(__m128 a)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000900{
901 __m64 b, c;
902
903 b = _mm_cvtps_pi16(a);
904 c = _mm_setzero_si64();
905
906 return _mm_packs_pi16(b, c);
907}
908
Chris Lattner1bddbcb2010-03-22 18:14:12 +0000909static __inline__ int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000910_mm_movemask_ps(__m128 a)
Anders Carlsson50099cb2008-12-22 05:20:34 +0000911{
912 return __builtin_ia32_movmskps(a);
913}
914
Anders Carlssonb5955092008-12-22 05:42:03 +0000915#define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))
916
Anders Carlsson4cc44272009-02-11 06:29:32 +0000917#define _MM_EXCEPT_INVALID (0x0001)
918#define _MM_EXCEPT_DENORM (0x0002)
919#define _MM_EXCEPT_DIV_ZERO (0x0004)
920#define _MM_EXCEPT_OVERFLOW (0x0008)
921#define _MM_EXCEPT_UNDERFLOW (0x0010)
922#define _MM_EXCEPT_INEXACT (0x0020)
Anders Carlssonbbd1fa22009-01-21 01:49:39 +0000923#define _MM_EXCEPT_MASK (0x003f)
Anders Carlsson4cc44272009-02-11 06:29:32 +0000924
Anders Carlssonbbd1fa22009-01-21 01:49:39 +0000925#define _MM_MASK_INVALID (0x0080)
926#define _MM_MASK_DENORM (0x0100)
Anders Carlssona837a072009-02-14 04:01:38 +0000927#define _MM_MASK_DIV_ZERO (0x0200)
928#define _MM_MASK_OVERFLOW (0x0400)
929#define _MM_MASK_UNDERFLOW (0x0800)
930#define _MM_MASK_INEXACT (0x1000)
Anders Carlssonbbd1fa22009-01-21 01:49:39 +0000931#define _MM_MASK_MASK (0x1f80)
932
Anders Carlssonbbd1fa22009-01-21 01:49:39 +0000933#define _MM_ROUND_NEAREST (0x0000)
934#define _MM_ROUND_DOWN (0x2000)
935#define _MM_ROUND_UP (0x4000)
936#define _MM_ROUND_TOWARD_ZERO (0x6000)
Anders Carlsson4cc44272009-02-11 06:29:32 +0000937#define _MM_ROUND_MASK (0x6000)
Anders Carlssonbbd1fa22009-01-21 01:49:39 +0000938
939#define _MM_FLUSH_ZERO_MASK (0x8000)
940#define _MM_FLUSH_ZERO_ON (0x8000)
941#define _MM_FLUSH_ZERO_OFF (0x8000)
Anders Carlssonb5955092008-12-22 05:42:03 +0000942
943#define _MM_GET_EXCEPTION_MASK() (_mm_getcsr() & _MM_MASK_MASK)
944#define _MM_GET_EXCEPTION_STATE() (_mm_getcsr() & _MM_EXCEPT_MASK)
Anders Carlsson62005c12009-01-20 21:51:44 +0000945#define _MM_GET_FLUSH_ZERO_MODE() (_mm_getcsr() & _MM_FLUSH_ZERO_MASK)
Anders Carlssonb5955092008-12-22 05:42:03 +0000946#define _MM_GET_ROUNDING_MODE() (_mm_getcsr() & _MM_ROUND_MASK)
947
Anders Carlsson62005c12009-01-20 21:51:44 +0000948#define _MM_SET_EXCEPTION_MASK(x) (_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (x)))
Anders Carlssonb5955092008-12-22 05:42:03 +0000949#define _MM_SET_EXCEPTION_STATE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (x)))
Anders Carlsson62005c12009-01-20 21:51:44 +0000950#define _MM_SET_FLUSH_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (x)))
Anders Carlssonb5955092008-12-22 05:42:03 +0000951#define _MM_SET_ROUNDING_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (x)))
952
953#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \
954do { \
955 __m128 tmp3, tmp2, tmp1, tmp0; \
956 tmp0 = _mm_unpacklo_ps((row0), (row1)); \
957 tmp2 = _mm_unpacklo_ps((row2), (row3)); \
958 tmp1 = _mm_unpackhi_ps((row0), (row1)); \
959 tmp3 = _mm_unpackhi_ps((row2), (row3)); \
960 (row0) = _mm_movelh_ps(tmp0, tmp2); \
961 (row1) = _mm_movehl_ps(tmp2, tmp0); \
962 (row2) = _mm_movelh_ps(tmp1, tmp3); \
Chris Lattner9afb2272010-01-27 07:54:50 +0000963 (row3) = _mm_movehl_ps(tmp3, tmp1); \
Anders Carlssonb5955092008-12-22 05:42:03 +0000964} while (0)
965
Chandler Carruthfa38c812010-07-22 06:47:28 +0000966/* Aliases for compatibility. */
967#define _m_pextrw _mm_extract_pi16
968#define _m_pinsrw _mm_insert_pi16
969#define _m_pmaxsw _mm_max_pi16
970#define _m_pmaxub _mm_max_pu8
971#define _m_pminsw _mm_min_pi16
972#define _m_pminub _mm_min_pu8
973#define _m_pmovmskb _mm_movemask_pi8
974#define _m_pmulhuw _mm_mulhi_pu16
975#define _m_pshufw _mm_shuffle_pi16
976#define _m_maskmovq _mm_maskmove_si64
977#define _m_pavgb _mm_avg_pu8
978#define _m_pavgw _mm_avg_pu16
979#define _m_psadbw _mm_sad_pu8
980#define _m_ _mm_
981#define _m_ _mm_
982
Eli Friedmanc7d95dc2009-06-11 18:50:02 +0000983/* Ugly hack for backwards-compatibility (compatible with gcc) */
984#ifdef __SSE2__
Daniel Dunbar3eef3e12009-06-07 08:33:23 +0000985#include <emmintrin.h>
Eli Friedmanc7d95dc2009-06-11 18:50:02 +0000986#endif
Daniel Dunbar3eef3e12009-06-07 08:33:23 +0000987
Anders Carlsson566d8da2008-12-22 00:01:20 +0000988#endif /* __SSE__ */
989
990#endif /* __XMMINTRIN_H */