blob: c104f6301a0c5d369a58a1fd0d96569986bdbf7e [file] [log] [blame]
Anders Carlsson2df1ce42008-12-22 00:01:20 +00001/*===---- xmmintrin.h - SSE intrinsics -------------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __XMMINTRIN_H
25#define __XMMINTRIN_H
26
27#ifndef __SSE__
Anders Carlsson19ef5d42008-12-26 00:57:11 +000028#error "SSE instruction set not enabled"
Anders Carlsson2df1ce42008-12-22 00:01:20 +000029#else
30
Anders Carlsson157643c2008-12-22 00:48:30 +000031#include <mmintrin.h>
32
Anders Carlssonc7250232008-12-22 17:42:23 +000033typedef float __v4sf __attribute__((__vector_size__(16)));
Anders Carlsson2df1ce42008-12-22 00:01:20 +000034typedef float __m128 __attribute__((__vector_size__(16)));
35
Anders Carlssonc7250232008-12-22 17:42:23 +000036#include <mm_malloc.h>
Anders Carlssonc7250232008-12-22 17:42:23 +000037
Anders Carlsson823c02e2009-02-14 01:00:11 +000038static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +000039_mm_add_ss(__m128 a, __m128 b)
Anders Carlsson2df1ce42008-12-22 00:01:20 +000040{
41 return __builtin_ia32_addss(a, b);
42}
43
Anders Carlsson823c02e2009-02-14 01:00:11 +000044static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +000045_mm_add_ps(__m128 a, __m128 b)
Anders Carlsson2df1ce42008-12-22 00:01:20 +000046{
47 return a + b;
48}
49
Anders Carlsson823c02e2009-02-14 01:00:11 +000050static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +000051_mm_sub_ss(__m128 a, __m128 b)
Anders Carlsson2df1ce42008-12-22 00:01:20 +000052{
53 return __builtin_ia32_subss(a, b);
54}
55
Anders Carlsson823c02e2009-02-14 01:00:11 +000056static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +000057_mm_sub_ps(__m128 a, __m128 b)
Anders Carlsson2df1ce42008-12-22 00:01:20 +000058{
59 return a - b;
60}
61
Anders Carlsson823c02e2009-02-14 01:00:11 +000062static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +000063_mm_mul_ss(__m128 a, __m128 b)
Anders Carlsson2df1ce42008-12-22 00:01:20 +000064{
65 return __builtin_ia32_mulss(a, b);
66}
67
Anders Carlsson823c02e2009-02-14 01:00:11 +000068static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +000069_mm_mul_ps(__m128 a, __m128 b)
Anders Carlsson2df1ce42008-12-22 00:01:20 +000070{
71 return a * b;
72}
73
Anders Carlsson823c02e2009-02-14 01:00:11 +000074static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +000075_mm_div_ss(__m128 a, __m128 b)
Anders Carlsson2df1ce42008-12-22 00:01:20 +000076{
77 return __builtin_ia32_divss(a, b);
78}
79
Anders Carlsson823c02e2009-02-14 01:00:11 +000080static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +000081_mm_div_ps(__m128 a, __m128 b)
Anders Carlsson2df1ce42008-12-22 00:01:20 +000082{
83 return a / b;
84}
85
Anders Carlsson823c02e2009-02-14 01:00:11 +000086static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +000087_mm_sqrt_ss(__m128 a)
Anders Carlsson2df1ce42008-12-22 00:01:20 +000088{
89 return __builtin_ia32_sqrtss(a);
90}
91
Anders Carlsson823c02e2009-02-14 01:00:11 +000092static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +000093_mm_sqrt_ps(__m128 a)
Anders Carlsson2df1ce42008-12-22 00:01:20 +000094{
95 return __builtin_ia32_sqrtps(a);
96}
97
Anders Carlsson823c02e2009-02-14 01:00:11 +000098static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +000099_mm_rcp_ss(__m128 a)
Anders Carlsson2df1ce42008-12-22 00:01:20 +0000100{
101 return __builtin_ia32_rcpss(a);
102}
103
Anders Carlsson823c02e2009-02-14 01:00:11 +0000104static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000105_mm_rcp_ps(__m128 a)
Anders Carlsson2df1ce42008-12-22 00:01:20 +0000106{
107 return __builtin_ia32_rcpps(a);
108}
109
Anders Carlsson823c02e2009-02-14 01:00:11 +0000110static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000111_mm_rsqrt_ss(__m128 a)
Anders Carlsson2df1ce42008-12-22 00:01:20 +0000112{
113 return __builtin_ia32_rsqrtss(a);
114}
115
Anders Carlsson823c02e2009-02-14 01:00:11 +0000116static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000117_mm_rsqrt_ps(__m128 a)
Anders Carlsson2df1ce42008-12-22 00:01:20 +0000118{
119 return __builtin_ia32_rsqrtps(a);
120}
121
Anders Carlsson823c02e2009-02-14 01:00:11 +0000122static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000123_mm_min_ss(__m128 a, __m128 b)
Anders Carlsson2df1ce42008-12-22 00:01:20 +0000124{
125 return __builtin_ia32_minss(a, b);
126}
127
Anders Carlsson823c02e2009-02-14 01:00:11 +0000128static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000129_mm_min_ps(__m128 a, __m128 b)
Anders Carlsson2df1ce42008-12-22 00:01:20 +0000130{
131 return __builtin_ia32_minps(a, b);
132}
133
Anders Carlsson823c02e2009-02-14 01:00:11 +0000134static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000135_mm_max_ss(__m128 a, __m128 b)
Anders Carlsson2df1ce42008-12-22 00:01:20 +0000136{
137 return __builtin_ia32_maxss(a, b);
138}
139
Anders Carlsson823c02e2009-02-14 01:00:11 +0000140static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000141_mm_max_ps(__m128 a, __m128 b)
Anders Carlsson2df1ce42008-12-22 00:01:20 +0000142{
143 return __builtin_ia32_maxps(a, b);
144}
145
Anders Carlsson823c02e2009-02-14 01:00:11 +0000146static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000147_mm_and_ps(__m128 a, __m128 b)
Anders Carlsson2df1ce42008-12-22 00:01:20 +0000148{
149 return __builtin_ia32_andps(a, b);
150}
151
Anders Carlsson823c02e2009-02-14 01:00:11 +0000152static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000153_mm_andnot_ps(__m128 a, __m128 b)
Anders Carlsson2df1ce42008-12-22 00:01:20 +0000154{
155 return __builtin_ia32_andnps(a, b);
156}
157
Anders Carlsson823c02e2009-02-14 01:00:11 +0000158static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000159_mm_or_ps(__m128 a, __m128 b)
Anders Carlsson2df1ce42008-12-22 00:01:20 +0000160{
161 return __builtin_ia32_orps(a, b);
162}
163
Anders Carlsson823c02e2009-02-14 01:00:11 +0000164static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000165_mm_xor_ps(__m128 a, __m128 b)
Anders Carlsson2df1ce42008-12-22 00:01:20 +0000166{
167 return __builtin_ia32_xorps(a, b);
168}
169
Anders Carlsson823c02e2009-02-14 01:00:11 +0000170static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000171_mm_cmpeq_ss(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000172{
Anders Carlsson20812002009-05-18 19:16:46 +0000173 return (__m128)__builtin_ia32_cmpss(a, b, 0);
Anders Carlsson60053dd2008-12-22 00:28:39 +0000174}
175
Anders Carlsson823c02e2009-02-14 01:00:11 +0000176static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000177_mm_cmpeq_ps(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000178{
Anders Carlsson20812002009-05-18 19:16:46 +0000179 return (__m128)__builtin_ia32_cmpps(a, b, 0);
Anders Carlsson60053dd2008-12-22 00:28:39 +0000180}
181
Anders Carlsson823c02e2009-02-14 01:00:11 +0000182static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000183_mm_cmplt_ss(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000184{
Anders Carlsson20812002009-05-18 19:16:46 +0000185 return (__m128)__builtin_ia32_cmpss(a, b, 1);
Anders Carlsson60053dd2008-12-22 00:28:39 +0000186}
187
Anders Carlsson823c02e2009-02-14 01:00:11 +0000188static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000189_mm_cmplt_ps(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000190{
Anders Carlsson20812002009-05-18 19:16:46 +0000191 return (__m128)__builtin_ia32_cmpps(a, b, 1);
Anders Carlsson60053dd2008-12-22 00:28:39 +0000192}
193
Anders Carlsson823c02e2009-02-14 01:00:11 +0000194static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000195_mm_cmple_ss(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000196{
Anders Carlsson20812002009-05-18 19:16:46 +0000197 return (__m128)__builtin_ia32_cmpss(a, b, 2);
Anders Carlsson60053dd2008-12-22 00:28:39 +0000198}
199
Anders Carlsson823c02e2009-02-14 01:00:11 +0000200static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000201_mm_cmple_ps(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000202{
Anders Carlsson20812002009-05-18 19:16:46 +0000203 return (__m128)__builtin_ia32_cmpps(a, b, 2);
Anders Carlsson60053dd2008-12-22 00:28:39 +0000204}
205
Anders Carlsson823c02e2009-02-14 01:00:11 +0000206static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000207_mm_cmpgt_ss(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000208{
Anders Carlsson20812002009-05-18 19:16:46 +0000209 return (__m128)__builtin_ia32_cmpss(b, a, 1);
Anders Carlsson60053dd2008-12-22 00:28:39 +0000210}
211
Anders Carlsson823c02e2009-02-14 01:00:11 +0000212static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000213_mm_cmpgt_ps(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000214{
Anders Carlsson20812002009-05-18 19:16:46 +0000215 return (__m128)__builtin_ia32_cmpps(b, a, 1);
Anders Carlsson60053dd2008-12-22 00:28:39 +0000216}
217
Anders Carlsson823c02e2009-02-14 01:00:11 +0000218static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000219_mm_cmpge_ss(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000220{
Anders Carlsson20812002009-05-18 19:16:46 +0000221 return (__m128)__builtin_ia32_cmpss(b, a, 2);
Anders Carlsson60053dd2008-12-22 00:28:39 +0000222}
223
Anders Carlsson823c02e2009-02-14 01:00:11 +0000224static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000225_mm_cmpge_ps(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000226{
Anders Carlsson20812002009-05-18 19:16:46 +0000227 return (__m128)__builtin_ia32_cmpps(b, a, 2);
Anders Carlsson60053dd2008-12-22 00:28:39 +0000228}
229
Anders Carlsson823c02e2009-02-14 01:00:11 +0000230static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000231_mm_cmpneq_ss(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000232{
Anders Carlsson20812002009-05-18 19:16:46 +0000233 return (__m128)__builtin_ia32_cmpss(a, b, 4);
Anders Carlsson60053dd2008-12-22 00:28:39 +0000234}
235
Anders Carlsson823c02e2009-02-14 01:00:11 +0000236static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000237_mm_cmpneq_ps(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000238{
Anders Carlsson20812002009-05-18 19:16:46 +0000239 return (__m128)__builtin_ia32_cmpps(a, b, 4);
Anders Carlsson60053dd2008-12-22 00:28:39 +0000240}
241
Anders Carlsson823c02e2009-02-14 01:00:11 +0000242static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000243_mm_cmpnlt_ss(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000244{
Anders Carlsson20812002009-05-18 19:16:46 +0000245 return (__m128)__builtin_ia32_cmpss(a, b, 5);
Anders Carlsson60053dd2008-12-22 00:28:39 +0000246}
247
Anders Carlsson823c02e2009-02-14 01:00:11 +0000248static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000249_mm_cmpnlt_ps(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000250{
Anders Carlsson20812002009-05-18 19:16:46 +0000251 return (__m128)__builtin_ia32_cmpps(a, b, 5);
Anders Carlsson60053dd2008-12-22 00:28:39 +0000252}
253
Anders Carlsson823c02e2009-02-14 01:00:11 +0000254static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000255_mm_cmpnle_ss(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000256{
Anders Carlsson20812002009-05-18 19:16:46 +0000257 return (__m128)__builtin_ia32_cmpss(a, b, 6);
Anders Carlsson60053dd2008-12-22 00:28:39 +0000258}
259
Anders Carlsson823c02e2009-02-14 01:00:11 +0000260static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000261_mm_cmpnle_ps(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000262{
Anders Carlsson20812002009-05-18 19:16:46 +0000263 return (__m128)__builtin_ia32_cmpps(a, b, 6);
Anders Carlsson60053dd2008-12-22 00:28:39 +0000264}
265
Anders Carlsson823c02e2009-02-14 01:00:11 +0000266static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000267_mm_cmpngt_ss(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000268{
Anders Carlsson20812002009-05-18 19:16:46 +0000269 return (__m128)__builtin_ia32_cmpss(b, a, 5);
Anders Carlsson60053dd2008-12-22 00:28:39 +0000270}
271
Anders Carlsson823c02e2009-02-14 01:00:11 +0000272static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000273_mm_cmpngt_ps(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000274{
Anders Carlsson20812002009-05-18 19:16:46 +0000275 return (__m128)__builtin_ia32_cmpps(b, a, 5);
Anders Carlsson60053dd2008-12-22 00:28:39 +0000276}
277
Anders Carlsson823c02e2009-02-14 01:00:11 +0000278static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000279_mm_cmpnge_ss(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000280{
Anders Carlsson20812002009-05-18 19:16:46 +0000281 return (__m128)__builtin_ia32_cmpss(b, a, 6);
Anders Carlsson60053dd2008-12-22 00:28:39 +0000282}
283
Anders Carlsson823c02e2009-02-14 01:00:11 +0000284static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000285_mm_cmpnge_ps(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000286{
Anders Carlsson20812002009-05-18 19:16:46 +0000287 return (__m128)__builtin_ia32_cmpps(b, a, 6);
Anders Carlsson60053dd2008-12-22 00:28:39 +0000288}
289
Anders Carlsson823c02e2009-02-14 01:00:11 +0000290static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000291_mm_cmpord_ss(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000292{
Anders Carlsson20812002009-05-18 19:16:46 +0000293 return (__m128)__builtin_ia32_cmpss(a, b, 7);
Anders Carlsson60053dd2008-12-22 00:28:39 +0000294}
295
Anders Carlsson823c02e2009-02-14 01:00:11 +0000296static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000297_mm_cmpord_ps(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000298{
Anders Carlsson20812002009-05-18 19:16:46 +0000299 return (__m128)__builtin_ia32_cmpps(a, b, 7);
Anders Carlsson60053dd2008-12-22 00:28:39 +0000300}
301
Anders Carlsson823c02e2009-02-14 01:00:11 +0000302static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000303_mm_cmpunord_ss(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000304{
Anders Carlsson20812002009-05-18 19:16:46 +0000305 return (__m128)__builtin_ia32_cmpss(a, b, 3);
Anders Carlsson60053dd2008-12-22 00:28:39 +0000306}
307
Anders Carlsson823c02e2009-02-14 01:00:11 +0000308static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000309_mm_cmpunord_ps(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000310{
Anders Carlsson20812002009-05-18 19:16:46 +0000311 return (__m128)__builtin_ia32_cmpps(a, b, 3);
Anders Carlsson60053dd2008-12-22 00:28:39 +0000312}
313
Anders Carlsson823c02e2009-02-14 01:00:11 +0000314static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000315_mm_comieq_ss(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000316{
317 return __builtin_ia32_comieq(a, b);
318}
319
Anders Carlsson823c02e2009-02-14 01:00:11 +0000320static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000321_mm_comilt_ss(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000322{
323 return __builtin_ia32_comilt(a, b);
324}
325
Anders Carlsson823c02e2009-02-14 01:00:11 +0000326static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000327_mm_comile_ss(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000328{
329 return __builtin_ia32_comile(a, b);
330}
331
Anders Carlsson823c02e2009-02-14 01:00:11 +0000332static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000333_mm_comigt_ss(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000334{
335 return __builtin_ia32_comigt(a, b);
336}
337
Anders Carlsson823c02e2009-02-14 01:00:11 +0000338static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000339_mm_comige_ss(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000340{
341 return __builtin_ia32_comige(a, b);
342}
343
Anders Carlsson823c02e2009-02-14 01:00:11 +0000344static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000345_mm_comineq_ss(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000346{
347 return __builtin_ia32_comineq(a, b);
348}
349
Anders Carlsson823c02e2009-02-14 01:00:11 +0000350static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000351_mm_ucomieq_ss(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000352{
353 return __builtin_ia32_ucomieq(a, b);
354}
355
Anders Carlsson823c02e2009-02-14 01:00:11 +0000356static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000357_mm_ucomilt_ss(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000358{
359 return __builtin_ia32_ucomilt(a, b);
360}
361
Anders Carlsson823c02e2009-02-14 01:00:11 +0000362static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000363_mm_ucomile_ss(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000364{
365 return __builtin_ia32_ucomile(a, b);
366}
367
Anders Carlsson823c02e2009-02-14 01:00:11 +0000368static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000369_mm_ucomigt_ss(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000370{
371 return __builtin_ia32_ucomigt(a, b);
372}
373
Anders Carlsson823c02e2009-02-14 01:00:11 +0000374static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000375_mm_ucomige_ss(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000376{
377 return __builtin_ia32_ucomige(a, b);
378}
379
Anders Carlsson823c02e2009-02-14 01:00:11 +0000380static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000381_mm_ucomineq_ss(__m128 a, __m128 b)
Anders Carlsson60053dd2008-12-22 00:28:39 +0000382{
383 return __builtin_ia32_ucomineq(a, b);
384}
385
Anders Carlsson823c02e2009-02-14 01:00:11 +0000386static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000387_mm_cvtss_si32(__m128 a)
Anders Carlsson157643c2008-12-22 00:48:30 +0000388{
389 return __builtin_ia32_cvtss2si(a);
390}
391
Anders Carlsson823c02e2009-02-14 01:00:11 +0000392static inline long long __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000393_mm_cvtss_si64(__m128 a)
Anders Carlsson157643c2008-12-22 00:48:30 +0000394{
395 return __builtin_ia32_cvtss2si64(a);
396}
397
Anders Carlsson823c02e2009-02-14 01:00:11 +0000398static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000399_mm_cvtps_pi32(__m128 a)
Anders Carlsson157643c2008-12-22 00:48:30 +0000400{
401 return (__m64)__builtin_ia32_cvtps2pi(a);
402}
403
Anders Carlsson823c02e2009-02-14 01:00:11 +0000404static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000405_mm_cvttss_si32(__m128 a)
Anders Carlsson157643c2008-12-22 00:48:30 +0000406{
407 return __builtin_ia32_cvttss2si(a);
408}
409
Anders Carlsson823c02e2009-02-14 01:00:11 +0000410static inline long long __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000411_mm_cvttss_si64(__m128 a)
Anders Carlsson157643c2008-12-22 00:48:30 +0000412{
413 return __builtin_ia32_cvttss2si64(a);
414}
415
Anders Carlsson823c02e2009-02-14 01:00:11 +0000416static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000417_mm_cvttps_pi32(__m128 a)
Anders Carlsson157643c2008-12-22 00:48:30 +0000418{
419 return (__m64)__builtin_ia32_cvttps2pi(a);
420}
421
Anders Carlsson823c02e2009-02-14 01:00:11 +0000422static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000423_mm_cvtsi32_ss(__m128 a, int b)
Anders Carlsson157643c2008-12-22 00:48:30 +0000424{
425 return __builtin_ia32_cvtsi2ss(a, b);
426}
427
Anders Carlsson1e4a9b72008-12-22 01:26:50 +0000428#ifdef __x86_64__
429
Anders Carlsson823c02e2009-02-14 01:00:11 +0000430static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000431_mm_cvtsi64_ss(__m128 a, long long b)
Anders Carlsson157643c2008-12-22 00:48:30 +0000432{
433 return __builtin_ia32_cvtsi642ss(a, b);
434}
435
Anders Carlsson1e4a9b72008-12-22 01:26:50 +0000436#endif
437
Anders Carlsson823c02e2009-02-14 01:00:11 +0000438static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000439_mm_cvtpi32_ps(__m128 a, __m64 b)
Anders Carlsson157643c2008-12-22 00:48:30 +0000440{
441 return __builtin_ia32_cvtpi2ps(a, (__v2si)b);
442}
443
Anders Carlsson823c02e2009-02-14 01:00:11 +0000444static inline float __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000445_mm_cvtss_f32(__m128 a)
Anders Carlsson1e4a9b72008-12-22 01:26:50 +0000446{
Anders Carlsson5aa0c502008-12-22 07:08:03 +0000447 return a[0];
Anders Carlsson1e4a9b72008-12-22 01:26:50 +0000448}
449
Anders Carlsson823c02e2009-02-14 01:00:11 +0000450static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000451_mm_loadh_pi(__m128 a, __m64 const *p)
Anders Carlsson13fd3a12008-12-22 02:43:30 +0000452{
453 return __builtin_ia32_loadhps(a, (__v2si *)p);
454}
455
Anders Carlsson823c02e2009-02-14 01:00:11 +0000456static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000457_mm_loadl_pi(__m128 a, __m64 const *p)
Anders Carlsson13fd3a12008-12-22 02:43:30 +0000458{
459 return __builtin_ia32_loadlps(a, (__v2si *)p);
460}
461
Anders Carlsson823c02e2009-02-14 01:00:11 +0000462static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000463_mm_load_ss(float *p)
Anders Carlsson13fd3a12008-12-22 02:43:30 +0000464{
465 return (__m128){ *p, 0, 0, 0 };
466}
467
Anders Carlsson823c02e2009-02-14 01:00:11 +0000468static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000469_mm_load1_ps(float *p)
Anders Carlsson13fd3a12008-12-22 02:43:30 +0000470{
471 return (__m128){ *p, *p, *p, *p };
472}
473
Eli Friedmanf83c2582009-06-02 05:55:48 +0000474#define _mm_load_ps1(p) _mm_load1_ps(p)
475
Anders Carlsson823c02e2009-02-14 01:00:11 +0000476static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000477_mm_load_ps(float *p)
Anders Carlsson13fd3a12008-12-22 02:43:30 +0000478{
479 return *(__m128*)p;
480}
481
Anders Carlsson823c02e2009-02-14 01:00:11 +0000482static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000483_mm_loadu_ps(float *p)
Anders Carlsson13fd3a12008-12-22 02:43:30 +0000484{
485 return __builtin_ia32_loadups(p);
486}
487
Anders Carlsson823c02e2009-02-14 01:00:11 +0000488static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000489_mm_loadr_ps(float *p)
Anders Carlsson13fd3a12008-12-22 02:43:30 +0000490{
491 __m128 a = _mm_load_ps(p);
492 return __builtin_shufflevector(a, a, 3, 2, 1, 0);
493}
494
Anders Carlsson823c02e2009-02-14 01:00:11 +0000495static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000496_mm_set_ss(float w)
Anders Carlssonc7d0d8b2008-12-22 02:51:35 +0000497{
498 return (__m128){ w, 0, 0, 0 };
499}
500
Anders Carlsson823c02e2009-02-14 01:00:11 +0000501static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000502_mm_set1_ps(float w)
Anders Carlssonc7d0d8b2008-12-22 02:51:35 +0000503{
504 return (__m128){ w, w, w, w };
505}
506
Anders Carlssonf562b392008-12-27 04:26:15 +0000507// Microsoft specific.
Anders Carlsson823c02e2009-02-14 01:00:11 +0000508static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000509_mm_set_ps1(float w)
Anders Carlssonf562b392008-12-27 04:26:15 +0000510{
511 return _mm_set1_ps(w);
512}
513
Anders Carlsson823c02e2009-02-14 01:00:11 +0000514static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000515_mm_set_ps(float z, float y, float x, float w)
Anders Carlssonc7d0d8b2008-12-22 02:51:35 +0000516{
517 return (__m128){ w, x, y, z };
518}
519
Anders Carlsson823c02e2009-02-14 01:00:11 +0000520static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000521_mm_setr_ps(float z, float y, float x, float w)
Anders Carlssonc7d0d8b2008-12-22 02:51:35 +0000522{
523 return (__m128){ z, y, x, w };
524}
525
Daniel Dunbarc9feeb52009-02-17 07:57:58 +0000526static inline __m128 __attribute__((__always_inline__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000527_mm_setzero_ps(void)
Anders Carlssonc7d0d8b2008-12-22 02:51:35 +0000528{
529 return (__m128){ 0, 0, 0, 0 };
530}
531
Daniel Dunbarc9feeb52009-02-17 07:57:58 +0000532static inline void __attribute__((__always_inline__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000533_mm_storeh_pi(__m64 *p, __m128 a)
Anders Carlsson1e719962008-12-22 03:16:40 +0000534{
535 __builtin_ia32_storehps((__v2si *)p, a);
536}
537
Daniel Dunbarc9feeb52009-02-17 07:57:58 +0000538static inline void __attribute__((__always_inline__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000539_mm_storel_pi(__m64 *p, __m128 a)
Anders Carlsson1e719962008-12-22 03:16:40 +0000540{
541 __builtin_ia32_storelps((__v2si *)p, a);
542}
543
Daniel Dunbarc9feeb52009-02-17 07:57:58 +0000544static inline void __attribute__((__always_inline__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000545_mm_store_ss(float *p, __m128 a)
Anders Carlsson1e719962008-12-22 03:16:40 +0000546{
547 *p = a[0];
548}
549
Anders Carlsson823c02e2009-02-14 01:00:11 +0000550static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000551_mm_storeu_ps(float *p, __m128 a)
Anders Carlsson1e719962008-12-22 03:16:40 +0000552{
553 __builtin_ia32_storeups(p, a);
554}
555
Anders Carlsson823c02e2009-02-14 01:00:11 +0000556static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000557_mm_store1_ps(float *p, __m128 a)
Anders Carlsson1e719962008-12-22 03:16:40 +0000558{
559 a = __builtin_shufflevector(a, a, 0, 0, 0, 0);
560 _mm_storeu_ps(p, a);
561}
562
Anders Carlsson823c02e2009-02-14 01:00:11 +0000563static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000564_mm_store_ps(float *p, __m128 a)
Anders Carlsson1e719962008-12-22 03:16:40 +0000565{
566 *(__m128 *)p = a;
567}
568
Anders Carlsson823c02e2009-02-14 01:00:11 +0000569static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000570_mm_storer_ps(float *p, __m128 a)
Anders Carlsson1e719962008-12-22 03:16:40 +0000571{
572 a = __builtin_shufflevector(a, a, 3, 2, 1, 0);
573 _mm_store_ps(p, a);
574}
575
Anders Carlsson70d5f462008-12-22 03:50:21 +0000576#define _MM_HINT_T0 1
577#define _MM_HINT_T1 2
578#define _MM_HINT_T2 3
579#define _MM_HINT_NTA 0
580
Anders Carlsson4cf8ac82008-12-22 04:55:36 +0000581/* FIXME: We have to #define this because "sel" must be a constant integer, and
582 Sema doesn't do any form of constant propagation yet. */
Anders Carlsson70d5f462008-12-22 03:50:21 +0000583
584#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)a, 0, sel))
585
Anders Carlsson823c02e2009-02-14 01:00:11 +0000586static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000587_mm_stream_pi(__m64 *p, __m64 a)
Anders Carlsson70d5f462008-12-22 03:50:21 +0000588{
589 __builtin_ia32_movntq(p, a);
590}
591
Anders Carlsson823c02e2009-02-14 01:00:11 +0000592static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000593_mm_stream_ps(float *p, __m128 a)
Anders Carlsson70d5f462008-12-22 03:50:21 +0000594{
595 __builtin_ia32_movntps(p, a);
596}
597
Anders Carlsson823c02e2009-02-14 01:00:11 +0000598static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000599_mm_sfence(void)
Anders Carlsson70d5f462008-12-22 03:50:21 +0000600{
601 __builtin_ia32_sfence();
602}
603
Anders Carlsson823c02e2009-02-14 01:00:11 +0000604static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000605_mm_extract_pi16(__m64 a, int n)
Anders Carlsson4cf8ac82008-12-22 04:55:36 +0000606{
607 /* FIXME:
608 * This should force n to be an immediate.
609 * This does not use the PEXTRW instruction. From looking at the LLVM source, the
610 instruction doesn't seem to be hooked up.
611 * The code could probably be made better :)
612 */
613 __v4hi b = (__v4hi)a;
614 return b[(n == 0) ? 0 : (n == 1 ? 1 : (n == 2 ? 2 : 3))];
615}
616
Anders Carlsson27046fc2008-12-22 07:34:23 +0000617/* FIXME: Implement this. We could add a __builtin_insertelement function that's similar to
618 the already existing __builtin_shufflevector.
619*/
620/*
Anders Carlsson823c02e2009-02-14 01:00:11 +0000621static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000622_mm_insert_pi16(__m64 a, int d, int n)
Anders Carlsson4cf8ac82008-12-22 04:55:36 +0000623{
Anders Carlsson4cf8ac82008-12-22 04:55:36 +0000624 return (__m64){ 0LL };
625}
Anders Carlsson27046fc2008-12-22 07:34:23 +0000626*/
Anders Carlsson4cf8ac82008-12-22 04:55:36 +0000627
Anders Carlsson823c02e2009-02-14 01:00:11 +0000628static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000629_mm_max_pi16(__m64 a, __m64 b)
Anders Carlsson4cf8ac82008-12-22 04:55:36 +0000630{
631 return (__m64)__builtin_ia32_pmaxsw((__v4hi)a, (__v4hi)b);
632}
633
Anders Carlsson823c02e2009-02-14 01:00:11 +0000634static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000635_mm_max_pu8(__m64 a, __m64 b)
Anders Carlsson4cf8ac82008-12-22 04:55:36 +0000636{
637 return (__m64)__builtin_ia32_pmaxub((__v8qi)a, (__v8qi)b);
638}
639
Anders Carlsson823c02e2009-02-14 01:00:11 +0000640static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000641_mm_min_pi16(__m64 a, __m64 b)
Anders Carlsson4cf8ac82008-12-22 04:55:36 +0000642{
643 return (__m64)__builtin_ia32_pminsw((__v4hi)a, (__v4hi)b);
644}
645
Anders Carlsson823c02e2009-02-14 01:00:11 +0000646static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000647_mm_min_pu8(__m64 a, __m64 b)
Anders Carlsson4cf8ac82008-12-22 04:55:36 +0000648{
649 return (__m64)__builtin_ia32_pminub((__v8qi)a, (__v8qi)b);
650}
651
Anders Carlsson823c02e2009-02-14 01:00:11 +0000652static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000653_mm_movemask_pi8(__m64 a)
Anders Carlsson4cf8ac82008-12-22 04:55:36 +0000654{
655 return __builtin_ia32_pmovmskb((__v8qi)a);
656}
657
Anders Carlsson823c02e2009-02-14 01:00:11 +0000658static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000659_mm_mulhi_pu16(__m64 a, __m64 b)
Anders Carlsson4cf8ac82008-12-22 04:55:36 +0000660{
661 return (__m64)__builtin_ia32_pmulhuw((__v4hi)a, (__v4hi)b);
662}
663
664#define _mm_shuffle_pi16(a, n) ((__m64)__builtin_ia32_pshufw((__v4hi)a, n))
665
Anders Carlsson823c02e2009-02-14 01:00:11 +0000666static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000667_mm_maskmove_si64(__m64 d, __m64 n, char *p)
Anders Carlsson4cf8ac82008-12-22 04:55:36 +0000668{
669 __builtin_ia32_maskmovq((__v8qi)d, (__v8qi)n, p);
670}
671
Anders Carlsson823c02e2009-02-14 01:00:11 +0000672static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000673_mm_avg_pu8(__m64 a, __m64 b)
Anders Carlsson4cf8ac82008-12-22 04:55:36 +0000674{
675 return (__m64)__builtin_ia32_pavgb((__v8qi)a, (__v8qi)b);
676}
677
Anders Carlsson823c02e2009-02-14 01:00:11 +0000678static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000679_mm_avg_pu16(__m64 a, __m64 b)
Anders Carlsson4cf8ac82008-12-22 04:55:36 +0000680{
681 return (__m64)__builtin_ia32_pavgw((__v4hi)a, (__v4hi)b);
682}
683
Chris Lattnerff8a9e62009-04-17 17:55:23 +0000684static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000685_mm_sad_pu8(__m64 a, __m64 b)
Anders Carlsson4cf8ac82008-12-22 04:55:36 +0000686{
687 return (__m64)__builtin_ia32_psadbw((__v8qi)a, (__v8qi)b);
688}
Anders Carlsson53b3d4a2008-12-22 05:00:07 +0000689
Chris Lattnerff8a9e62009-04-17 17:55:23 +0000690static inline unsigned int __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000691_mm_getcsr(void)
Anders Carlsson53b3d4a2008-12-22 05:00:07 +0000692{
693 return __builtin_ia32_stmxcsr();
694}
695
Anders Carlsson823c02e2009-02-14 01:00:11 +0000696static inline void __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000697_mm_setcsr(unsigned int i)
Anders Carlsson53b3d4a2008-12-22 05:00:07 +0000698{
699 __builtin_ia32_ldmxcsr(i);
700}
701
Anders Carlsson8b79fc82008-12-22 05:20:34 +0000702#define _mm_shuffle_ps(a, b, mask) (__builtin_ia32_shufps(a, b, mask))
703
Anders Carlsson823c02e2009-02-14 01:00:11 +0000704static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000705_mm_unpackhi_ps(__m128 a, __m128 b)
Anders Carlsson8b79fc82008-12-22 05:20:34 +0000706{
707 return __builtin_shufflevector(a, b, 2, 6, 3, 7);
708}
709
Anders Carlsson823c02e2009-02-14 01:00:11 +0000710static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000711_mm_unpacklo_ps(__m128 a, __m128 b)
Anders Carlsson8b79fc82008-12-22 05:20:34 +0000712{
713 return __builtin_shufflevector(a, b, 0, 4, 1, 5);
714}
715
Anders Carlsson823c02e2009-02-14 01:00:11 +0000716static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000717_mm_move_ss(__m128 a, __m128 b)
Anders Carlsson8b79fc82008-12-22 05:20:34 +0000718{
719 return __builtin_shufflevector(a, b, 4, 1, 2, 3);
720}
721
Anders Carlsson823c02e2009-02-14 01:00:11 +0000722static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000723_mm_movehl_ps(__m128 a, __m128 b)
Anders Carlsson8b79fc82008-12-22 05:20:34 +0000724{
725 return __builtin_shufflevector(a, b, 6, 7, 2, 3);
726}
727
Anders Carlsson823c02e2009-02-14 01:00:11 +0000728static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000729_mm_movelh_ps(__m128 a, __m128 b)
Anders Carlsson8b79fc82008-12-22 05:20:34 +0000730{
731 return __builtin_shufflevector(a, b, 0, 1, 4, 5);
732}
733
Anders Carlsson823c02e2009-02-14 01:00:11 +0000734static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000735_mm_cvtpi16_ps(__m64 a)
Anders Carlsson5aa0c502008-12-22 07:08:03 +0000736{
737 __m64 b, c;
738 __m128 r;
739
740 b = _mm_setzero_si64();
741 b = _mm_cmpgt_pi16(b, a);
742 c = _mm_unpackhi_pi16(a, b);
743 r = _mm_setzero_ps();
744 r = _mm_cvtpi32_ps(r, c);
745 r = _mm_movelh_ps(r, r);
746 c = _mm_unpacklo_pi16(a, b);
747 r = _mm_cvtpi32_ps(r, c);
748
749 return r;
750}
751
Anders Carlsson823c02e2009-02-14 01:00:11 +0000752static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000753_mm_cvtpu16_ps(__m64 a)
Anders Carlsson5aa0c502008-12-22 07:08:03 +0000754{
755 __m64 b, c;
756 __m128 r;
757
758 b = _mm_setzero_si64();
759 c = _mm_unpackhi_pi16(a, b);
760 r = _mm_setzero_ps();
761 r = _mm_cvtpi32_ps(r, c);
762 r = _mm_movelh_ps(r, r);
763 c = _mm_unpacklo_pi16(a, b);
764 r = _mm_cvtpi32_ps(r, c);
765
766 return r;
767}
768
Anders Carlsson823c02e2009-02-14 01:00:11 +0000769static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000770_mm_cvtpi8_ps(__m64 a)
Anders Carlsson5aa0c502008-12-22 07:08:03 +0000771{
772 __m64 b;
773
774 b = _mm_setzero_si64();
775 b = _mm_cmpgt_pi8(b, a);
776 b = _mm_unpacklo_pi8(a, b);
777
778 return _mm_cvtpi16_ps(b);
779}
780
Anders Carlsson823c02e2009-02-14 01:00:11 +0000781static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000782_mm_cvtpu8_ps(__m64 a)
Anders Carlsson5aa0c502008-12-22 07:08:03 +0000783{
784 __m64 b;
785
786 b = _mm_setzero_si64();
787 b = _mm_unpacklo_pi8(a, b);
788
789 return _mm_cvtpi16_ps(b);
790}
791
Anders Carlsson823c02e2009-02-14 01:00:11 +0000792static inline __m128 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000793_mm_cvtpi32x2_ps(__m64 a, __m64 b)
Anders Carlsson5aa0c502008-12-22 07:08:03 +0000794{
795 __m128 c;
796
797 c = _mm_setzero_ps();
798 c = _mm_cvtpi32_ps(c, b);
799 c = _mm_movelh_ps(c, c);
800
801 return _mm_cvtpi32_ps(c, a);
802}
803
Anders Carlsson823c02e2009-02-14 01:00:11 +0000804static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000805_mm_cvtps_pi16(__m128 a)
Anders Carlsson5aa0c502008-12-22 07:08:03 +0000806{
807 __m64 b, c;
808
809 b = _mm_cvtps_pi32(a);
810 a = _mm_movehl_ps(a, a);
811 c = _mm_cvtps_pi32(a);
812
813 return _mm_packs_pi16(b, c);
814}
815
Anders Carlsson823c02e2009-02-14 01:00:11 +0000816static inline __m64 __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000817_mm_cvtps_pi8(__m128 a)
Anders Carlsson5aa0c502008-12-22 07:08:03 +0000818{
819 __m64 b, c;
820
821 b = _mm_cvtps_pi16(a);
822 c = _mm_setzero_si64();
823
824 return _mm_packs_pi16(b, c);
825}
826
Anders Carlsson823c02e2009-02-14 01:00:11 +0000827static inline int __attribute__((__always_inline__, __nodebug__))
Mike Stump5b31ed32009-02-13 14:24:50 +0000828_mm_movemask_ps(__m128 a)
Anders Carlsson8b79fc82008-12-22 05:20:34 +0000829{
830 return __builtin_ia32_movmskps(a);
831}
832
Anders Carlsson25b16db2008-12-22 05:42:03 +0000833#define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))
834
Anders Carlsson5bc94e62009-02-11 06:29:32 +0000835#define _MM_EXCEPT_INVALID (0x0001)
836#define _MM_EXCEPT_DENORM (0x0002)
837#define _MM_EXCEPT_DIV_ZERO (0x0004)
838#define _MM_EXCEPT_OVERFLOW (0x0008)
839#define _MM_EXCEPT_UNDERFLOW (0x0010)
840#define _MM_EXCEPT_INEXACT (0x0020)
Anders Carlsson43c2bab2009-01-21 01:49:39 +0000841#define _MM_EXCEPT_MASK (0x003f)
Anders Carlsson5bc94e62009-02-11 06:29:32 +0000842
Anders Carlsson43c2bab2009-01-21 01:49:39 +0000843#define _MM_MASK_INVALID (0x0080)
844#define _MM_MASK_DENORM (0x0100)
Anders Carlsson11352f92009-02-14 04:01:38 +0000845#define _MM_MASK_DIV_ZERO (0x0200)
846#define _MM_MASK_OVERFLOW (0x0400)
847#define _MM_MASK_UNDERFLOW (0x0800)
848#define _MM_MASK_INEXACT (0x1000)
Anders Carlsson43c2bab2009-01-21 01:49:39 +0000849#define _MM_MASK_MASK (0x1f80)
850
Anders Carlsson43c2bab2009-01-21 01:49:39 +0000851#define _MM_ROUND_NEAREST (0x0000)
852#define _MM_ROUND_DOWN (0x2000)
853#define _MM_ROUND_UP (0x4000)
854#define _MM_ROUND_TOWARD_ZERO (0x6000)
Anders Carlsson5bc94e62009-02-11 06:29:32 +0000855#define _MM_ROUND_MASK (0x6000)
Anders Carlsson43c2bab2009-01-21 01:49:39 +0000856
857#define _MM_FLUSH_ZERO_MASK (0x8000)
858#define _MM_FLUSH_ZERO_ON (0x8000)
859#define _MM_FLUSH_ZERO_OFF (0x8000)
Anders Carlsson25b16db2008-12-22 05:42:03 +0000860
861#define _MM_GET_EXCEPTION_MASK() (_mm_getcsr() & _MM_MASK_MASK)
862#define _MM_GET_EXCEPTION_STATE() (_mm_getcsr() & _MM_EXCEPT_MASK)
Anders Carlsson7322ea22009-01-20 21:51:44 +0000863#define _MM_GET_FLUSH_ZERO_MODE() (_mm_getcsr() & _MM_FLUSH_ZERO_MASK)
Anders Carlsson25b16db2008-12-22 05:42:03 +0000864#define _MM_GET_ROUNDING_MODE() (_mm_getcsr() & _MM_ROUND_MASK)
865
Anders Carlsson7322ea22009-01-20 21:51:44 +0000866#define _MM_SET_EXCEPTION_MASK(x) (_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (x)))
Anders Carlsson25b16db2008-12-22 05:42:03 +0000867#define _MM_SET_EXCEPTION_STATE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (x)))
Anders Carlsson7322ea22009-01-20 21:51:44 +0000868#define _MM_SET_FLUSH_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (x)))
Anders Carlsson25b16db2008-12-22 05:42:03 +0000869#define _MM_SET_ROUNDING_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (x)))
870
871#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \
872do { \
873 __m128 tmp3, tmp2, tmp1, tmp0; \
874 tmp0 = _mm_unpacklo_ps((row0), (row1)); \
875 tmp2 = _mm_unpacklo_ps((row2), (row3)); \
876 tmp1 = _mm_unpackhi_ps((row0), (row1)); \
877 tmp3 = _mm_unpackhi_ps((row2), (row3)); \
878 (row0) = _mm_movelh_ps(tmp0, tmp2); \
879 (row1) = _mm_movehl_ps(tmp2, tmp0); \
880 (row2) = _mm_movelh_ps(tmp1, tmp3); \
881 (row3) = _mm_movelh_ps(tmp3, tmp1); \
882} while (0)
883
Anders Carlsson19ef5d42008-12-26 00:57:11 +0000884#include <emmintrin.h>
885
Anders Carlsson2df1ce42008-12-22 00:01:20 +0000886#endif /* __SSE__ */
887
888#endif /* __XMMINTRIN_H */