blob: 09386ab95f088eeaa64cb7276f6e9b3daa7dfc98 [file] [log] [blame]
Anders Carlssonc7fee2c2008-12-24 01:45:22 +00001/*===---- xmmintrin.h - SSE intrinsics -------------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __EMMINTRIN_H
25#define __EMMINTRIN_H
26
27#ifndef __SSE2__
28#error "SSE2 instruction set not enabled"
29#else
30
31#include <xmmintrin.h>
32
33typedef double __m128d __attribute__((__vector_size__(16)));
34typedef long long __m128i __attribute__((__vector_size__(16)));
35
36typedef int __v4si __attribute__((__vector_size__(16)));
37
38static inline __m128d __attribute__((__always_inline__)) _mm_add_sd(__m128d a, __m128d b)
39{
40 return __builtin_ia32_addsd(a, b);
41}
42
43static inline __m128d __attribute__((__always_inline__)) _mm_add_pd(__m128d a, __m128d b)
44{
45 return a + b;
46}
47
48static inline __m128d __attribute__((__always_inline__)) _mm_sub_sd(__m128d a, __m128d b)
49{
50 return __builtin_ia32_subsd(a, b);
51}
52
53static inline __m128d __attribute__((__always_inline__)) _mm_sub_pd(__m128d a, __m128d b)
54{
55 return a - b;
56}
57
58static inline __m128d __attribute__((__always_inline__)) _mm_mul_sd(__m128d a, __m128d b)
59{
60 return __builtin_ia32_mulsd(a, b);
61}
62
63static inline __m128d __attribute__((__always_inline__)) _mm_mul_pd(__m128d a, __m128d b)
64{
65 return a * b;
66}
67
68static inline __m128d __attribute__((__always_inline__)) _mm_div_sd(__m128d a, __m128d b)
69{
70 return __builtin_ia32_divsd(a, b);
71}
72
73static inline __m128d __attribute__((__always_inline__)) _mm_div_pd(__m128d a, __m128d b)
74{
75 return a / b;
76}
77
78static inline __m128d __attribute__((__always_inline__)) _mm_sqrt_sd(__m128d a, __m128d b)
79{
80 __m128d c = __builtin_ia32_sqrtsd(b);
81 return (__m128d) { c[0], a[1] };
82}
83
84static inline __m128d __attribute__((__always_inline__)) _mm_sqrt_pd(__m128d a)
85{
86 return __builtin_ia32_sqrtpd(a);
87}
88
89static inline __m128d __attribute__((__always_inline__)) _mm_min_sd(__m128d a, __m128d b)
90{
91 return __builtin_ia32_minsd(a, b);
92}
93
94static inline __m128d __attribute__((__always_inline__)) _mm_min_pd(__m128d a, __m128d b)
95{
96 return __builtin_ia32_minpd(a, b);
97}
98
99static inline __m128d __attribute__((__always_inline__)) _mm_max_sd(__m128d a, __m128d b)
100{
101 return __builtin_ia32_maxsd(a, b);
102}
103
104static inline __m128d __attribute__((__always_inline__)) _mm_max_pd(__m128d a, __m128d b)
105{
106 return __builtin_ia32_maxpd(a, b);
107}
108
109static inline __m128d __attribute__((__always_inline__)) _mm_and_pd(__m128d a, __m128d b)
110{
111 return __builtin_ia32_andpd(a, b);
112}
113
114static inline __m128d __attribute__((__always_inline__)) _mm_andnot_pd(__m128d a, __m128d b)
115{
116 return __builtin_ia32_andnpd(a, b);
117}
118
119static inline __m128d __attribute__((__always_inline__)) _mm_or_pd(__m128d a, __m128d b)
120{
121 return __builtin_ia32_orpd(a, b);
122}
123
124static inline __m128d __attribute__((__always_inline__)) _mm_xor_pd(__m128d a, __m128d b)
125{
126 return __builtin_ia32_xorpd(a, b);
127}
128
129static inline __m128d __attribute__((__always_inline__)) _mm_cmpeq_pd(__m128d a, __m128d b)
130{
131 return (__m128d)__builtin_ia32_cmpeqpd(a, b);
132}
133
134static inline __m128d __attribute__((__always_inline__)) _mm_cmplt_pd(__m128d a, __m128d b)
135{
136 return (__m128d)__builtin_ia32_cmpltpd(a, b);
137}
138
139static inline __m128d __attribute__((__always_inline__)) _mm_cmple_pd(__m128d a, __m128d b)
140{
141 return (__m128d)__builtin_ia32_cmplepd(a, b);
142}
143
144static inline __m128d __attribute__((__always_inline__)) _mm_cmpgt_pd(__m128d a, __m128d b)
145{
146 return (__m128d)__builtin_ia32_cmpltpd(b, a);
147}
148
149static inline __m128d __attribute__((__always_inline__)) _mm_cmpge_pd(__m128d a, __m128d b)
150{
151 return (__m128d)__builtin_ia32_cmplepd(b, a);
152}
153
154static inline __m128d __attribute__((__always_inline__)) _mm_cmpord_pd(__m128d a, __m128d b)
155{
156 return (__m128d)__builtin_ia32_cmpordpd(a, b);
157}
158
159static inline __m128d __attribute__((__always_inline__)) _mm_cmpunord_pd(__m128d a, __m128d b)
160{
161 return (__m128d)__builtin_ia32_cmpunordpd(a, b);
162}
163
164static inline __m128d __attribute__((__always_inline__)) _mm_cmpneq_pd(__m128d a, __m128d b)
165{
166 return (__m128d)__builtin_ia32_cmpneqpd(a, b);
167}
168
169static inline __m128d __attribute__((__always_inline__)) _mm_cmpnlt_pd(__m128d a, __m128d b)
170{
171 return (__m128d)__builtin_ia32_cmpnltpd(a, b);
172}
173
174static inline __m128d __attribute__((__always_inline__)) _mm_cmpnle_pd(__m128d a, __m128d b)
175{
176 return (__m128d)__builtin_ia32_cmpnlepd(a, b);
177}
178
179static inline __m128d __attribute__((__always_inline__)) _mm_cmpngt_pd(__m128d a, __m128d b)
180{
181 return (__m128d)__builtin_ia32_cmpnltpd(b, a);
182}
183
184static inline __m128d __attribute__((__always_inline__)) _mm_cmpnge_pd(__m128d a, __m128d b)
185{
186 return (__m128d)__builtin_ia32_cmpnlepd(b, a);
187}
188
189static inline __m128d __attribute__((__always_inline__)) _mm_cmpeq_sd(__m128d a, __m128d b)
190{
191 return (__m128d)__builtin_ia32_cmpeqsd(a, b);
192}
193
194static inline __m128d __attribute__((__always_inline__)) _mm_cmplt_sd(__m128d a, __m128d b)
195{
196 return (__m128d)__builtin_ia32_cmpltsd(a, b);
197}
198
199static inline __m128d __attribute__((__always_inline__)) _mm_cmple_sd(__m128d a, __m128d b)
200{
201 return (__m128d)__builtin_ia32_cmplesd(a, b);
202}
203
204static inline __m128d __attribute__((__always_inline__)) _mm_cmpgt_sd(__m128d a, __m128d b)
205{
206 return (__m128d)__builtin_ia32_cmpltsd(b, a);
207}
208
209static inline __m128d __attribute__((__always_inline__)) _mm_cmpge_sd(__m128d a, __m128d b)
210{
211 return (__m128d)__builtin_ia32_cmplesd(b, a);
212}
213
214static inline __m128d __attribute__((__always_inline__)) _mm_cmpord_sd(__m128d a, __m128d b)
215{
216 return (__m128d)__builtin_ia32_cmpordsd(a, b);
217}
218
219static inline __m128d __attribute__((__always_inline__)) _mm_cmpunord_sd(__m128d a, __m128d b)
220{
221 return (__m128d)__builtin_ia32_cmpunordsd(a, b);
222}
223
224static inline __m128d __attribute__((__always_inline__)) _mm_cmpneq_sd(__m128d a, __m128d b)
225{
226 return (__m128d)__builtin_ia32_cmpneqsd(a, b);
227}
228
229static inline __m128d __attribute__((__always_inline__)) _mm_cmpnlt_sd(__m128d a, __m128d b)
230{
231 return (__m128d)__builtin_ia32_cmpnltsd(a, b);
232}
233
234static inline __m128d __attribute__((__always_inline__)) _mm_cmpnle_sd(__m128d a, __m128d b)
235{
236 return (__m128d)__builtin_ia32_cmpnlesd(a, b);
237}
238
239static inline __m128d __attribute__((__always_inline__)) _mm_cmpngt_sd(__m128d a, __m128d b)
240{
241 return (__m128d)__builtin_ia32_cmpnltsd(b, a);
242}
243
244static inline __m128d __attribute__((__always_inline__)) _mm_cmpnge_sd(__m128d a, __m128d b)
245{
246 return (__m128d)__builtin_ia32_cmpnlesd(b, a);
247}
248
249static inline int __attribute__((__always_inline__)) _mm_comieq_sd(__m128d a, __m128d b)
250{
251 return __builtin_ia32_comisdeq(a, b);
252}
253
254static inline int __attribute__((__always_inline__)) _mm_comilt_sd(__m128d a, __m128d b)
255{
256 return __builtin_ia32_comisdlt(a, b);
257}
258
259static inline int __attribute__((__always_inline__)) _mm_comile_sd(__m128d a, __m128d b)
260{
261 return __builtin_ia32_comisdle(a, b);
262}
263
264static inline int __attribute__((__always_inline__)) _mm_comigt_sd(__m128d a, __m128d b)
265{
266 return __builtin_ia32_comisdgt(a, b);
267}
268
269static inline int __attribute__((__always_inline__)) _mm_comineq_sd(__m128d a, __m128d b)
270{
271 return __builtin_ia32_comisdneq(a, b);
272}
273
274static inline int __attribute__((__always_inline__)) _mm_ucomieq_sd(__m128d a, __m128d b)
275{
276 return __builtin_ia32_ucomisdeq(a, b);
277}
278
279static inline int __attribute__((__always_inline__)) _mm_ucomilt_sd(__m128d a, __m128d b)
280{
281 return __builtin_ia32_ucomisdlt(a, b);
282}
283
284static inline int __attribute__((__always_inline__)) _mm_ucomile_sd(__m128d a, __m128d b)
285{
286 return __builtin_ia32_ucomisdle(a, b);
287}
288
289static inline int __attribute__((__always_inline__)) _mm_ucomigt_sd(__m128d a, __m128d b)
290{
291 return __builtin_ia32_ucomisdgt(a, b);
292}
293
294static inline int __attribute__((__always_inline__)) _mm_ucomineq_sd(__m128d a, __m128d b)
295{
296 return __builtin_ia32_ucomisdneq(a, b);
297}
298
299static inline __m128 __attribute__((__always_inline__)) _mm_cvtpd_ps(__m128d a)
300{
301 return __builtin_ia32_cvtpd2ps(a);
302}
303
304static inline __m128d __attribute__((__always_inline__)) _mm_cvtps_pd(__m128 a)
305{
306 return __builtin_ia32_cvtps2pd(a);
307}
308
309static inline __m128d __attribute__((__always_inline__)) _mm_cvtepi32_pd(__m128i a)
310{
311 return __builtin_ia32_cvtdq2pd((__v4si)a);
312}
313
314static inline __m128i __attribute__((__always_inline__)) _mm_cvtpd_epi32(__m128d a)
315{
316 return __builtin_ia32_cvtpd2dq(a);
317}
318
319static inline int __attribute__((__always_inline__)) _mm_cvtsd_si32(__m128d a)
320{
321 return __builtin_ia32_cvtsd2si(a);
322}
323
324static inline __m128 __attribute__((__always_inline__)) _mm_cvtsd_ss(__m128 a, __m128d b)
325{
326 return __builtin_ia32_cvtsd2ss(a, b);
327}
328
329static inline __m128d __attribute__((__always_inline__)) _mm_cvtsi32_sd(__m128d a, int b)
330{
331 return __builtin_ia32_cvtsi2sd(a, b);
332}
333
334static inline __m128d __attribute__((__always_inline__)) _mm_cvtss_sd(__m128d a, __m128 b)
335{
336 return __builtin_ia32_cvtss2sd(a, b);
337}
338
339static inline __m128i __attribute__((__always_inline__)) _mm_cvttpd_epi32(__m128d a)
340{
341 return (__m128i)__builtin_ia32_cvttpd2dq(a);
342}
343
344static inline int __attribute__((__always_inline__)) _mm_cvttsd_si32(__m128d a)
345{
346 return __builtin_ia32_cvttsd2si(a);
347}
348
349static inline __m64 __attribute__((__always_inline__)) _mm_cvtpd_pi32(__m128d a)
350{
351 return (__m64)__builtin_ia32_cvtpd2pi(a);
352}
353
354static inline __m64 __attribute__((__always_inline__)) _mm_cvttpd_pi32(__m128d a)
355{
356 return (__m64)__builtin_ia32_cvttpd2pi(a);
357}
358
359static inline __m128d __attribute__((__always_inline__)) _mm_cvtpi32_pd(__m64 a)
360{
361 return __builtin_ia32_cvtpi2pd((__v2si)a);
362}
363
364static inline double __attribute__((__always_inline__)) _mm_cvtsd_f64(__m128d a)
365{
366 return a[0];
367}
368
369#endif /* __SSE2__ */
370
371#endif /* __EMMINTRIN_H */