blob: 2f3888bebc76774fd652e1cf374320a5f7e5b1ba [file] [log] [blame]
Anders Carlsson566d8da2008-12-22 00:01:20 +00001/*===---- xmmintrin.h - SSE intrinsics -------------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __XMMINTRIN_H
25#define __XMMINTRIN_H
26
27#ifndef __SSE__
Anders Carlsson4fd3e632008-12-26 00:57:11 +000028#error "SSE instruction set not enabled"
Anders Carlsson566d8da2008-12-22 00:01:20 +000029#else
30
Anders Carlsson4fcc3132008-12-22 00:48:30 +000031#include <mmintrin.h>
32
Anders Carlsson398082e2008-12-22 17:42:23 +000033typedef float __v4sf __attribute__((__vector_size__(16)));
Anders Carlsson566d8da2008-12-22 00:01:20 +000034typedef float __m128 __attribute__((__vector_size__(16)));
35
Anders Carlsson398082e2008-12-22 17:42:23 +000036#include <mm_malloc.h>
Anders Carlsson398082e2008-12-22 17:42:23 +000037
Anders Carlssona2f12ae2009-02-14 01:00:11 +000038static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000039_mm_add_ss(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000040{
Eli Friedman80c80042009-06-06 02:13:04 +000041 a[0] += b[0];
42 return a;
Anders Carlsson566d8da2008-12-22 00:01:20 +000043}
44
Anders Carlssona2f12ae2009-02-14 01:00:11 +000045static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000046_mm_add_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000047{
48 return a + b;
49}
50
Anders Carlssona2f12ae2009-02-14 01:00:11 +000051static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000052_mm_sub_ss(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000053{
Eli Friedman80c80042009-06-06 02:13:04 +000054 a[0] -= b[0];
55 return a;
Anders Carlsson566d8da2008-12-22 00:01:20 +000056}
57
Anders Carlssona2f12ae2009-02-14 01:00:11 +000058static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000059_mm_sub_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000060{
61 return a - b;
62}
63
Anders Carlssona2f12ae2009-02-14 01:00:11 +000064static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000065_mm_mul_ss(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000066{
Eli Friedman80c80042009-06-06 02:13:04 +000067 a[0] *= b[0];
68 return a;
Anders Carlsson566d8da2008-12-22 00:01:20 +000069}
70
Anders Carlssona2f12ae2009-02-14 01:00:11 +000071static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000072_mm_mul_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000073{
74 return a * b;
75}
76
Anders Carlssona2f12ae2009-02-14 01:00:11 +000077static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000078_mm_div_ss(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000079{
Eli Friedman80c80042009-06-06 02:13:04 +000080 a[0] /= b[0];
81 return a;
Anders Carlsson566d8da2008-12-22 00:01:20 +000082}
83
Anders Carlssona2f12ae2009-02-14 01:00:11 +000084static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000085_mm_div_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +000086{
87 return a / b;
88}
89
Anders Carlssona2f12ae2009-02-14 01:00:11 +000090static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000091_mm_sqrt_ss(__m128 a)
Anders Carlsson566d8da2008-12-22 00:01:20 +000092{
93 return __builtin_ia32_sqrtss(a);
94}
95
Anders Carlssona2f12ae2009-02-14 01:00:11 +000096static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +000097_mm_sqrt_ps(__m128 a)
Anders Carlsson566d8da2008-12-22 00:01:20 +000098{
99 return __builtin_ia32_sqrtps(a);
100}
101
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000102static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000103_mm_rcp_ss(__m128 a)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000104{
105 return __builtin_ia32_rcpss(a);
106}
107
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000108static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000109_mm_rcp_ps(__m128 a)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000110{
111 return __builtin_ia32_rcpps(a);
112}
113
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000114static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000115_mm_rsqrt_ss(__m128 a)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000116{
117 return __builtin_ia32_rsqrtss(a);
118}
119
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000120static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000121_mm_rsqrt_ps(__m128 a)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000122{
123 return __builtin_ia32_rsqrtps(a);
124}
125
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000126static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000127_mm_min_ss(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000128{
129 return __builtin_ia32_minss(a, b);
130}
131
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000132static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000133_mm_min_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000134{
135 return __builtin_ia32_minps(a, b);
136}
137
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000138static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000139_mm_max_ss(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000140{
141 return __builtin_ia32_maxss(a, b);
142}
143
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000144static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000145_mm_max_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000146{
147 return __builtin_ia32_maxps(a, b);
148}
149
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000150static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000151_mm_and_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000152{
Eli Friedman80c80042009-06-06 02:13:04 +0000153 typedef int __v4si __attribute__((__vector_size__(16)));
154 return (__m128)((__v4si)a & (__v4si)b);
Anders Carlsson566d8da2008-12-22 00:01:20 +0000155}
156
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000157static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000158_mm_andnot_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000159{
Eli Friedman80c80042009-06-06 02:13:04 +0000160 typedef int __v4si __attribute__((__vector_size__(16)));
161 return (__m128)(~(__v4si)a & (__v4si)b);
Anders Carlsson566d8da2008-12-22 00:01:20 +0000162}
163
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000164static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000165_mm_or_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000166{
Eli Friedman80c80042009-06-06 02:13:04 +0000167 typedef int __v4si __attribute__((__vector_size__(16)));
168 return (__m128)((__v4si)a | (__v4si)b);
Anders Carlsson566d8da2008-12-22 00:01:20 +0000169}
170
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000171static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000172_mm_xor_ps(__m128 a, __m128 b)
Anders Carlsson566d8da2008-12-22 00:01:20 +0000173{
Eli Friedman80c80042009-06-06 02:13:04 +0000174 typedef int __v4si __attribute__((__vector_size__(16)));
Chris Lattner2c483452010-01-07 00:36:41 +0000175 return (__m128)((__v4si)a ^ (__v4si)b);
Anders Carlsson566d8da2008-12-22 00:01:20 +0000176}
177
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000178static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000179_mm_cmpeq_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000180{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000181 return (__m128)__builtin_ia32_cmpss(a, b, 0);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000182}
183
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000184static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000185_mm_cmpeq_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000186{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000187 return (__m128)__builtin_ia32_cmpps(a, b, 0);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000188}
189
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000190static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000191_mm_cmplt_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000192{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000193 return (__m128)__builtin_ia32_cmpss(a, b, 1);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000194}
195
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000196static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000197_mm_cmplt_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000198{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000199 return (__m128)__builtin_ia32_cmpps(a, b, 1);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000200}
201
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000202static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000203_mm_cmple_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000204{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000205 return (__m128)__builtin_ia32_cmpss(a, b, 2);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000206}
207
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000208static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000209_mm_cmple_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000210{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000211 return (__m128)__builtin_ia32_cmpps(a, b, 2);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000212}
213
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000214static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000215_mm_cmpgt_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000216{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000217 return (__m128)__builtin_ia32_cmpss(b, a, 1);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000218}
219
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000220static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000221_mm_cmpgt_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000222{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000223 return (__m128)__builtin_ia32_cmpps(b, a, 1);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000224}
225
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000226static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000227_mm_cmpge_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000228{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000229 return (__m128)__builtin_ia32_cmpss(b, a, 2);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000230}
231
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000232static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000233_mm_cmpge_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000234{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000235 return (__m128)__builtin_ia32_cmpps(b, a, 2);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000236}
237
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000238static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000239_mm_cmpneq_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000240{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000241 return (__m128)__builtin_ia32_cmpss(a, b, 4);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000242}
243
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000244static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000245_mm_cmpneq_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000246{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000247 return (__m128)__builtin_ia32_cmpps(a, b, 4);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000248}
249
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000250static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000251_mm_cmpnlt_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000252{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000253 return (__m128)__builtin_ia32_cmpss(a, b, 5);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000254}
255
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000256static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000257_mm_cmpnlt_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000258{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000259 return (__m128)__builtin_ia32_cmpps(a, b, 5);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000260}
261
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000262static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000263_mm_cmpnle_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000264{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000265 return (__m128)__builtin_ia32_cmpss(a, b, 6);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000266}
267
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000268static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000269_mm_cmpnle_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000270{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000271 return (__m128)__builtin_ia32_cmpps(a, b, 6);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000272}
273
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000274static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000275_mm_cmpngt_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000276{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000277 return (__m128)__builtin_ia32_cmpss(b, a, 5);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000278}
279
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000280static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000281_mm_cmpngt_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000282{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000283 return (__m128)__builtin_ia32_cmpps(b, a, 5);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000284}
285
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000286static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000287_mm_cmpnge_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000288{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000289 return (__m128)__builtin_ia32_cmpss(b, a, 6);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000290}
291
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000292static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000293_mm_cmpnge_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000294{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000295 return (__m128)__builtin_ia32_cmpps(b, a, 6);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000296}
297
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000298static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000299_mm_cmpord_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000300{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000301 return (__m128)__builtin_ia32_cmpss(a, b, 7);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000302}
303
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000304static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000305_mm_cmpord_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000306{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000307 return (__m128)__builtin_ia32_cmpps(a, b, 7);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000308}
309
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000310static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000311_mm_cmpunord_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000312{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000313 return (__m128)__builtin_ia32_cmpss(a, b, 3);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000314}
315
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000316static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000317_mm_cmpunord_ps(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000318{
Anders Carlsson79dcf5f2009-05-18 19:16:46 +0000319 return (__m128)__builtin_ia32_cmpps(a, b, 3);
Anders Carlssonf62c6812008-12-22 00:28:39 +0000320}
321
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000322static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000323_mm_comieq_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000324{
325 return __builtin_ia32_comieq(a, b);
326}
327
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000328static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000329_mm_comilt_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000330{
331 return __builtin_ia32_comilt(a, b);
332}
333
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000334static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000335_mm_comile_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000336{
337 return __builtin_ia32_comile(a, b);
338}
339
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000340static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000341_mm_comigt_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000342{
343 return __builtin_ia32_comigt(a, b);
344}
345
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000346static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000347_mm_comige_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000348{
349 return __builtin_ia32_comige(a, b);
350}
351
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000352static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000353_mm_comineq_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000354{
355 return __builtin_ia32_comineq(a, b);
356}
357
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000358static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000359_mm_ucomieq_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000360{
361 return __builtin_ia32_ucomieq(a, b);
362}
363
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000364static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000365_mm_ucomilt_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000366{
367 return __builtin_ia32_ucomilt(a, b);
368}
369
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000370static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000371_mm_ucomile_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000372{
373 return __builtin_ia32_ucomile(a, b);
374}
375
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000376static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000377_mm_ucomigt_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000378{
379 return __builtin_ia32_ucomigt(a, b);
380}
381
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000382static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000383_mm_ucomige_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000384{
385 return __builtin_ia32_ucomige(a, b);
386}
387
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000388static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000389_mm_ucomineq_ss(__m128 a, __m128 b)
Anders Carlssonf62c6812008-12-22 00:28:39 +0000390{
391 return __builtin_ia32_ucomineq(a, b);
392}
393
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000394static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000395_mm_cvtss_si32(__m128 a)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000396{
397 return __builtin_ia32_cvtss2si(a);
398}
399
Chris Lattneref5ebf62010-02-16 18:21:25 +0000400static inline int __attribute__((__always_inline__, __nodebug__))
401_mm_cvt_ss2si(__m128 a)
402{
403 return _mm_cvtss_si32(a);
404}
405
Eli Friedman80c80042009-06-06 02:13:04 +0000406#ifdef __x86_64__
407
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000408static inline long long __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000409_mm_cvtss_si64(__m128 a)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000410{
411 return __builtin_ia32_cvtss2si64(a);
412}
413
Eli Friedman80c80042009-06-06 02:13:04 +0000414#endif
415
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000416static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000417_mm_cvtps_pi32(__m128 a)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000418{
419 return (__m64)__builtin_ia32_cvtps2pi(a);
420}
421
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000422static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000423_mm_cvttss_si32(__m128 a)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000424{
Eli Friedman80c80042009-06-06 02:13:04 +0000425 return a[0];
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000426}
427
Chris Lattneref5ebf62010-02-16 18:21:25 +0000428static inline int __attribute__((__always_inline__, __nodebug__))
429_mm_cvtt_ss2si(__m128 a)
430{
431 return _mm_cvttss_si32(a);
432}
433
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000434static inline long long __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000435_mm_cvttss_si64(__m128 a)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000436{
Eli Friedman80c80042009-06-06 02:13:04 +0000437 return a[0];
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000438}
439
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000440static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000441_mm_cvttps_pi32(__m128 a)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000442{
443 return (__m64)__builtin_ia32_cvttps2pi(a);
444}
445
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000446static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000447_mm_cvtsi32_ss(__m128 a, int b)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000448{
Eli Friedman80c80042009-06-06 02:13:04 +0000449 a[0] = b;
450 return a;
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000451}
452
Anders Carlsson1b76b802008-12-22 01:26:50 +0000453#ifdef __x86_64__
454
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000455static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000456_mm_cvtsi64_ss(__m128 a, long long b)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000457{
Eli Friedman80c80042009-06-06 02:13:04 +0000458 a[0] = b;
459 return a;
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000460}
461
Anders Carlsson1b76b802008-12-22 01:26:50 +0000462#endif
463
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000464static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000465_mm_cvtpi32_ps(__m128 a, __m64 b)
Anders Carlsson4fcc3132008-12-22 00:48:30 +0000466{
467 return __builtin_ia32_cvtpi2ps(a, (__v2si)b);
468}
469
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000470static inline float __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000471_mm_cvtss_f32(__m128 a)
Anders Carlsson1b76b802008-12-22 01:26:50 +0000472{
Anders Carlssona6431dc2008-12-22 07:08:03 +0000473 return a[0];
Anders Carlsson1b76b802008-12-22 01:26:50 +0000474}
475
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000476static inline __m128 __attribute__((__always_inline__, __nodebug__))
Chris Lattner21b91a32010-02-01 20:14:14 +0000477_mm_loadh_pi(__m128 a, const __m64 *p)
Anders Carlsson97700862008-12-22 02:43:30 +0000478{
Eli Friedmane0ae8bd2009-06-07 07:12:56 +0000479 __m128 b;
480 b[0] = *(float*)p;
481 b[1] = *((float*)p+1);
482 return __builtin_shufflevector(a, b, 0, 1, 4, 5);
Anders Carlsson97700862008-12-22 02:43:30 +0000483}
484
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000485static inline __m128 __attribute__((__always_inline__, __nodebug__))
Chris Lattner21b91a32010-02-01 20:14:14 +0000486_mm_loadl_pi(__m128 a, const __m64 *p)
Anders Carlsson97700862008-12-22 02:43:30 +0000487{
Eli Friedman80c80042009-06-06 02:13:04 +0000488 __m128 b;
489 b[0] = *(float*)p;
490 b[1] = *((float*)p+1);
Eli Friedmane0ae8bd2009-06-07 07:12:56 +0000491 return __builtin_shufflevector(a, b, 4, 5, 2, 3);
Anders Carlsson97700862008-12-22 02:43:30 +0000492}
493
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000494static inline __m128 __attribute__((__always_inline__, __nodebug__))
Chris Lattner21b91a32010-02-01 20:14:14 +0000495_mm_load_ss(const float *p)
Anders Carlsson97700862008-12-22 02:43:30 +0000496{
497 return (__m128){ *p, 0, 0, 0 };
498}
499
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000500static inline __m128 __attribute__((__always_inline__, __nodebug__))
Chris Lattner21b91a32010-02-01 20:14:14 +0000501_mm_load1_ps(const float *p)
Anders Carlsson97700862008-12-22 02:43:30 +0000502{
503 return (__m128){ *p, *p, *p, *p };
504}
505
Eli Friedmandb7351a2009-06-02 05:55:48 +0000506#define _mm_load_ps1(p) _mm_load1_ps(p)
507
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000508static inline __m128 __attribute__((__always_inline__, __nodebug__))
Chris Lattner21b91a32010-02-01 20:14:14 +0000509_mm_load_ps(const float *p)
Anders Carlsson97700862008-12-22 02:43:30 +0000510{
511 return *(__m128*)p;
512}
513
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000514static inline __m128 __attribute__((__always_inline__, __nodebug__))
Chris Lattner21b91a32010-02-01 20:14:14 +0000515_mm_loadu_ps(const float *p)
Anders Carlsson97700862008-12-22 02:43:30 +0000516{
517 return __builtin_ia32_loadups(p);
518}
519
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000520static inline __m128 __attribute__((__always_inline__, __nodebug__))
Chris Lattner21b91a32010-02-01 20:14:14 +0000521_mm_loadr_ps(const float *p)
Anders Carlsson97700862008-12-22 02:43:30 +0000522{
523 __m128 a = _mm_load_ps(p);
524 return __builtin_shufflevector(a, a, 3, 2, 1, 0);
525}
526
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000527static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000528_mm_set_ss(float w)
Anders Carlssona6ba0012008-12-22 02:51:35 +0000529{
530 return (__m128){ w, 0, 0, 0 };
531}
532
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000533static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000534_mm_set1_ps(float w)
Anders Carlssona6ba0012008-12-22 02:51:35 +0000535{
536 return (__m128){ w, w, w, w };
537}
538
Anders Carlsson12868cc2008-12-27 04:26:15 +0000539// Microsoft specific.
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000540static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000541_mm_set_ps1(float w)
Anders Carlsson12868cc2008-12-27 04:26:15 +0000542{
543 return _mm_set1_ps(w);
544}
545
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000546static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000547_mm_set_ps(float z, float y, float x, float w)
Anders Carlssona6ba0012008-12-22 02:51:35 +0000548{
549 return (__m128){ w, x, y, z };
550}
551
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000552static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000553_mm_setr_ps(float z, float y, float x, float w)
Anders Carlssona6ba0012008-12-22 02:51:35 +0000554{
555 return (__m128){ z, y, x, w };
556}
557
Daniel Dunbard99e31b2009-02-17 07:57:58 +0000558static inline __m128 __attribute__((__always_inline__))
Mike Stumpdae44132009-02-13 14:24:50 +0000559_mm_setzero_ps(void)
Anders Carlssona6ba0012008-12-22 02:51:35 +0000560{
561 return (__m128){ 0, 0, 0, 0 };
562}
563
Daniel Dunbard99e31b2009-02-17 07:57:58 +0000564static inline void __attribute__((__always_inline__))
Mike Stumpdae44132009-02-13 14:24:50 +0000565_mm_storeh_pi(__m64 *p, __m128 a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000566{
567 __builtin_ia32_storehps((__v2si *)p, a);
568}
569
Daniel Dunbard99e31b2009-02-17 07:57:58 +0000570static inline void __attribute__((__always_inline__))
Mike Stumpdae44132009-02-13 14:24:50 +0000571_mm_storel_pi(__m64 *p, __m128 a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000572{
573 __builtin_ia32_storelps((__v2si *)p, a);
574}
575
Daniel Dunbard99e31b2009-02-17 07:57:58 +0000576static inline void __attribute__((__always_inline__))
Mike Stumpdae44132009-02-13 14:24:50 +0000577_mm_store_ss(float *p, __m128 a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000578{
579 *p = a[0];
580}
581
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000582static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000583_mm_storeu_ps(float *p, __m128 a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000584{
585 __builtin_ia32_storeups(p, a);
586}
587
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000588static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000589_mm_store1_ps(float *p, __m128 a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000590{
591 a = __builtin_shufflevector(a, a, 0, 0, 0, 0);
592 _mm_storeu_ps(p, a);
593}
594
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000595static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000596_mm_store_ps(float *p, __m128 a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000597{
598 *(__m128 *)p = a;
599}
600
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000601static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000602_mm_storer_ps(float *p, __m128 a)
Anders Carlsson09b93052008-12-22 03:16:40 +0000603{
604 a = __builtin_shufflevector(a, a, 3, 2, 1, 0);
605 _mm_store_ps(p, a);
606}
607
Anders Carlssondedad4e2008-12-22 03:50:21 +0000608#define _MM_HINT_T0 1
609#define _MM_HINT_T1 2
610#define _MM_HINT_T2 3
611#define _MM_HINT_NTA 0
612
Anders Carlsson62af71c2008-12-22 04:55:36 +0000613/* FIXME: We have to #define this because "sel" must be a constant integer, and
614 Sema doesn't do any form of constant propagation yet. */
Anders Carlssondedad4e2008-12-22 03:50:21 +0000615
616#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)a, 0, sel))
617
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000618static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000619_mm_stream_pi(__m64 *p, __m64 a)
Anders Carlssondedad4e2008-12-22 03:50:21 +0000620{
621 __builtin_ia32_movntq(p, a);
622}
623
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000624static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000625_mm_stream_ps(float *p, __m128 a)
Anders Carlssondedad4e2008-12-22 03:50:21 +0000626{
627 __builtin_ia32_movntps(p, a);
628}
629
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000630static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000631_mm_sfence(void)
Anders Carlssondedad4e2008-12-22 03:50:21 +0000632{
633 __builtin_ia32_sfence();
634}
635
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000636static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000637_mm_extract_pi16(__m64 a, int n)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000638{
Anders Carlsson62af71c2008-12-22 04:55:36 +0000639 __v4hi b = (__v4hi)a;
Eli Friedman80c80042009-06-06 02:13:04 +0000640 return (unsigned short)b[n & 3];
Anders Carlsson62af71c2008-12-22 04:55:36 +0000641}
642
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000643static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000644_mm_insert_pi16(__m64 a, int d, int n)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000645{
Eli Friedman80c80042009-06-06 02:13:04 +0000646 __v4hi b = (__v4hi)a;
647 b[n & 3] = d;
Eli Friedman17d2e3a2009-06-06 03:45:06 +0000648 return (__m64)b;
Anders Carlsson62af71c2008-12-22 04:55:36 +0000649}
650
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000651static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000652_mm_max_pi16(__m64 a, __m64 b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000653{
654 return (__m64)__builtin_ia32_pmaxsw((__v4hi)a, (__v4hi)b);
655}
656
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000657static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000658_mm_max_pu8(__m64 a, __m64 b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000659{
660 return (__m64)__builtin_ia32_pmaxub((__v8qi)a, (__v8qi)b);
661}
662
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000663static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000664_mm_min_pi16(__m64 a, __m64 b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000665{
666 return (__m64)__builtin_ia32_pminsw((__v4hi)a, (__v4hi)b);
667}
668
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000669static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000670_mm_min_pu8(__m64 a, __m64 b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000671{
672 return (__m64)__builtin_ia32_pminub((__v8qi)a, (__v8qi)b);
673}
674
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000675static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000676_mm_movemask_pi8(__m64 a)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000677{
678 return __builtin_ia32_pmovmskb((__v8qi)a);
679}
680
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000681static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000682_mm_mulhi_pu16(__m64 a, __m64 b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000683{
684 return (__m64)__builtin_ia32_pmulhuw((__v4hi)a, (__v4hi)b);
685}
686
Eli Friedman098136a2009-06-06 08:08:06 +0000687#define _mm_shuffle_pi16(a, n) \
688 ((__m64)__builtin_shufflevector((__v4hi)(a), (__v4hi) {0}, \
689 (n) & 0x3, ((n) & 0xc) >> 2, \
690 ((n) & 0x30) >> 4, ((n) & 0xc0) >> 6))
Anders Carlsson62af71c2008-12-22 04:55:36 +0000691
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000692static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000693_mm_maskmove_si64(__m64 d, __m64 n, char *p)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000694{
695 __builtin_ia32_maskmovq((__v8qi)d, (__v8qi)n, p);
696}
697
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000698static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000699_mm_avg_pu8(__m64 a, __m64 b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000700{
701 return (__m64)__builtin_ia32_pavgb((__v8qi)a, (__v8qi)b);
702}
703
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000704static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000705_mm_avg_pu16(__m64 a, __m64 b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000706{
707 return (__m64)__builtin_ia32_pavgw((__v4hi)a, (__v4hi)b);
708}
709
Chris Lattner7add5472009-04-17 17:55:23 +0000710static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000711_mm_sad_pu8(__m64 a, __m64 b)
Anders Carlsson62af71c2008-12-22 04:55:36 +0000712{
713 return (__m64)__builtin_ia32_psadbw((__v8qi)a, (__v8qi)b);
714}
Anders Carlssonc1f9afd2008-12-22 05:00:07 +0000715
Chris Lattner7add5472009-04-17 17:55:23 +0000716static inline unsigned int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000717_mm_getcsr(void)
Anders Carlssonc1f9afd2008-12-22 05:00:07 +0000718{
719 return __builtin_ia32_stmxcsr();
720}
721
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000722static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000723_mm_setcsr(unsigned int i)
Anders Carlssonc1f9afd2008-12-22 05:00:07 +0000724{
725 __builtin_ia32_ldmxcsr(i);
726}
727
Eli Friedman098136a2009-06-06 08:08:06 +0000728#define _mm_shuffle_ps(a, b, mask) \
729 (__builtin_shufflevector(a, b, (mask) & 0x3, ((mask) & 0xc) >> 2, \
730 (((mask) & 0x30) >> 4) + 4, \
731 (((mask) & 0xc0) >> 6) + 4))
Anders Carlsson50099cb2008-12-22 05:20:34 +0000732
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000733static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000734_mm_unpackhi_ps(__m128 a, __m128 b)
Anders Carlsson50099cb2008-12-22 05:20:34 +0000735{
736 return __builtin_shufflevector(a, b, 2, 6, 3, 7);
737}
738
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000739static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000740_mm_unpacklo_ps(__m128 a, __m128 b)
Anders Carlsson50099cb2008-12-22 05:20:34 +0000741{
742 return __builtin_shufflevector(a, b, 0, 4, 1, 5);
743}
744
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000745static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000746_mm_move_ss(__m128 a, __m128 b)
Anders Carlsson50099cb2008-12-22 05:20:34 +0000747{
748 return __builtin_shufflevector(a, b, 4, 1, 2, 3);
749}
750
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000751static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000752_mm_movehl_ps(__m128 a, __m128 b)
Anders Carlsson50099cb2008-12-22 05:20:34 +0000753{
754 return __builtin_shufflevector(a, b, 6, 7, 2, 3);
755}
756
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000757static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000758_mm_movelh_ps(__m128 a, __m128 b)
Anders Carlsson50099cb2008-12-22 05:20:34 +0000759{
760 return __builtin_shufflevector(a, b, 0, 1, 4, 5);
761}
762
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000763static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000764_mm_cvtpi16_ps(__m64 a)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000765{
766 __m64 b, c;
767 __m128 r;
768
769 b = _mm_setzero_si64();
770 b = _mm_cmpgt_pi16(b, a);
771 c = _mm_unpackhi_pi16(a, b);
772 r = _mm_setzero_ps();
773 r = _mm_cvtpi32_ps(r, c);
774 r = _mm_movelh_ps(r, r);
775 c = _mm_unpacklo_pi16(a, b);
776 r = _mm_cvtpi32_ps(r, c);
777
778 return r;
779}
780
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000781static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000782_mm_cvtpu16_ps(__m64 a)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000783{
784 __m64 b, c;
785 __m128 r;
786
787 b = _mm_setzero_si64();
788 c = _mm_unpackhi_pi16(a, b);
789 r = _mm_setzero_ps();
790 r = _mm_cvtpi32_ps(r, c);
791 r = _mm_movelh_ps(r, r);
792 c = _mm_unpacklo_pi16(a, b);
793 r = _mm_cvtpi32_ps(r, c);
794
795 return r;
796}
797
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000798static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000799_mm_cvtpi8_ps(__m64 a)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000800{
801 __m64 b;
802
803 b = _mm_setzero_si64();
804 b = _mm_cmpgt_pi8(b, a);
805 b = _mm_unpacklo_pi8(a, b);
806
807 return _mm_cvtpi16_ps(b);
808}
809
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000810static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000811_mm_cvtpu8_ps(__m64 a)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000812{
813 __m64 b;
814
815 b = _mm_setzero_si64();
816 b = _mm_unpacklo_pi8(a, b);
817
818 return _mm_cvtpi16_ps(b);
819}
820
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000821static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000822_mm_cvtpi32x2_ps(__m64 a, __m64 b)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000823{
824 __m128 c;
825
826 c = _mm_setzero_ps();
827 c = _mm_cvtpi32_ps(c, b);
828 c = _mm_movelh_ps(c, c);
829
830 return _mm_cvtpi32_ps(c, a);
831}
832
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000833static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000834_mm_cvtps_pi16(__m128 a)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000835{
836 __m64 b, c;
837
838 b = _mm_cvtps_pi32(a);
839 a = _mm_movehl_ps(a, a);
840 c = _mm_cvtps_pi32(a);
841
842 return _mm_packs_pi16(b, c);
843}
844
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000845static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000846_mm_cvtps_pi8(__m128 a)
Anders Carlssona6431dc2008-12-22 07:08:03 +0000847{
848 __m64 b, c;
849
850 b = _mm_cvtps_pi16(a);
851 c = _mm_setzero_si64();
852
853 return _mm_packs_pi16(b, c);
854}
855
Anders Carlssona2f12ae2009-02-14 01:00:11 +0000856static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stumpdae44132009-02-13 14:24:50 +0000857_mm_movemask_ps(__m128 a)
Anders Carlsson50099cb2008-12-22 05:20:34 +0000858{
859 return __builtin_ia32_movmskps(a);
860}
861
Anders Carlssonb5955092008-12-22 05:42:03 +0000862#define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))
863
Anders Carlsson4cc44272009-02-11 06:29:32 +0000864#define _MM_EXCEPT_INVALID (0x0001)
865#define _MM_EXCEPT_DENORM (0x0002)
866#define _MM_EXCEPT_DIV_ZERO (0x0004)
867#define _MM_EXCEPT_OVERFLOW (0x0008)
868#define _MM_EXCEPT_UNDERFLOW (0x0010)
869#define _MM_EXCEPT_INEXACT (0x0020)
Anders Carlssonbbd1fa22009-01-21 01:49:39 +0000870#define _MM_EXCEPT_MASK (0x003f)
Anders Carlsson4cc44272009-02-11 06:29:32 +0000871
Anders Carlssonbbd1fa22009-01-21 01:49:39 +0000872#define _MM_MASK_INVALID (0x0080)
873#define _MM_MASK_DENORM (0x0100)
Anders Carlssona837a072009-02-14 04:01:38 +0000874#define _MM_MASK_DIV_ZERO (0x0200)
875#define _MM_MASK_OVERFLOW (0x0400)
876#define _MM_MASK_UNDERFLOW (0x0800)
877#define _MM_MASK_INEXACT (0x1000)
Anders Carlssonbbd1fa22009-01-21 01:49:39 +0000878#define _MM_MASK_MASK (0x1f80)
879
Anders Carlssonbbd1fa22009-01-21 01:49:39 +0000880#define _MM_ROUND_NEAREST (0x0000)
881#define _MM_ROUND_DOWN (0x2000)
882#define _MM_ROUND_UP (0x4000)
883#define _MM_ROUND_TOWARD_ZERO (0x6000)
Anders Carlsson4cc44272009-02-11 06:29:32 +0000884#define _MM_ROUND_MASK (0x6000)
Anders Carlssonbbd1fa22009-01-21 01:49:39 +0000885
886#define _MM_FLUSH_ZERO_MASK (0x8000)
887#define _MM_FLUSH_ZERO_ON (0x8000)
888#define _MM_FLUSH_ZERO_OFF (0x8000)
Anders Carlssonb5955092008-12-22 05:42:03 +0000889
890#define _MM_GET_EXCEPTION_MASK() (_mm_getcsr() & _MM_MASK_MASK)
891#define _MM_GET_EXCEPTION_STATE() (_mm_getcsr() & _MM_EXCEPT_MASK)
Anders Carlsson62005c12009-01-20 21:51:44 +0000892#define _MM_GET_FLUSH_ZERO_MODE() (_mm_getcsr() & _MM_FLUSH_ZERO_MASK)
Anders Carlssonb5955092008-12-22 05:42:03 +0000893#define _MM_GET_ROUNDING_MODE() (_mm_getcsr() & _MM_ROUND_MASK)
894
Anders Carlsson62005c12009-01-20 21:51:44 +0000895#define _MM_SET_EXCEPTION_MASK(x) (_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (x)))
Anders Carlssonb5955092008-12-22 05:42:03 +0000896#define _MM_SET_EXCEPTION_STATE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (x)))
Anders Carlsson62005c12009-01-20 21:51:44 +0000897#define _MM_SET_FLUSH_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (x)))
Anders Carlssonb5955092008-12-22 05:42:03 +0000898#define _MM_SET_ROUNDING_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (x)))
899
900#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \
901do { \
902 __m128 tmp3, tmp2, tmp1, tmp0; \
903 tmp0 = _mm_unpacklo_ps((row0), (row1)); \
904 tmp2 = _mm_unpacklo_ps((row2), (row3)); \
905 tmp1 = _mm_unpackhi_ps((row0), (row1)); \
906 tmp3 = _mm_unpackhi_ps((row2), (row3)); \
907 (row0) = _mm_movelh_ps(tmp0, tmp2); \
908 (row1) = _mm_movehl_ps(tmp2, tmp0); \
909 (row2) = _mm_movelh_ps(tmp1, tmp3); \
Chris Lattner9afb2272010-01-27 07:54:50 +0000910 (row3) = _mm_movehl_ps(tmp3, tmp1); \
Anders Carlssonb5955092008-12-22 05:42:03 +0000911} while (0)
912
Eli Friedmanc7d95dc2009-06-11 18:50:02 +0000913/* Ugly hack for backwards-compatibility (compatible with gcc) */
914#ifdef __SSE2__
Daniel Dunbar3eef3e12009-06-07 08:33:23 +0000915#include <emmintrin.h>
Eli Friedmanc7d95dc2009-06-11 18:50:02 +0000916#endif
Daniel Dunbar3eef3e12009-06-07 08:33:23 +0000917
Anders Carlsson566d8da2008-12-22 00:01:20 +0000918#endif /* __SSE__ */
919
920#endif /* __XMMINTRIN_H */