blob: d10c7bb7b65c17b66b7496a5dd862673de5ef35f [file] [log] [blame]
Anders Carlssonc7fee2c2008-12-24 01:45:22 +00001/*===---- xmmintrin.h - SSE intrinsics -------------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __EMMINTRIN_H
25#define __EMMINTRIN_H
26
27#ifndef __SSE2__
28#error "SSE2 instruction set not enabled"
29#else
30
31#include <xmmintrin.h>
32
33typedef double __m128d __attribute__((__vector_size__(16)));
34typedef long long __m128i __attribute__((__vector_size__(16)));
35
36typedef int __v4si __attribute__((__vector_size__(16)));
Anders Carlssonbf5b2c82008-12-24 02:11:54 +000037typedef char __v16qi __attribute__((__vector_size__(16)));
Anders Carlssonc7fee2c2008-12-24 01:45:22 +000038
39static inline __m128d __attribute__((__always_inline__)) _mm_add_sd(__m128d a, __m128d b)
40{
41 return __builtin_ia32_addsd(a, b);
42}
43
44static inline __m128d __attribute__((__always_inline__)) _mm_add_pd(__m128d a, __m128d b)
45{
46 return a + b;
47}
48
49static inline __m128d __attribute__((__always_inline__)) _mm_sub_sd(__m128d a, __m128d b)
50{
51 return __builtin_ia32_subsd(a, b);
52}
53
54static inline __m128d __attribute__((__always_inline__)) _mm_sub_pd(__m128d a, __m128d b)
55{
56 return a - b;
57}
58
59static inline __m128d __attribute__((__always_inline__)) _mm_mul_sd(__m128d a, __m128d b)
60{
61 return __builtin_ia32_mulsd(a, b);
62}
63
64static inline __m128d __attribute__((__always_inline__)) _mm_mul_pd(__m128d a, __m128d b)
65{
66 return a * b;
67}
68
69static inline __m128d __attribute__((__always_inline__)) _mm_div_sd(__m128d a, __m128d b)
70{
71 return __builtin_ia32_divsd(a, b);
72}
73
74static inline __m128d __attribute__((__always_inline__)) _mm_div_pd(__m128d a, __m128d b)
75{
76 return a / b;
77}
78
79static inline __m128d __attribute__((__always_inline__)) _mm_sqrt_sd(__m128d a, __m128d b)
80{
81 __m128d c = __builtin_ia32_sqrtsd(b);
82 return (__m128d) { c[0], a[1] };
83}
84
85static inline __m128d __attribute__((__always_inline__)) _mm_sqrt_pd(__m128d a)
86{
87 return __builtin_ia32_sqrtpd(a);
88}
89
90static inline __m128d __attribute__((__always_inline__)) _mm_min_sd(__m128d a, __m128d b)
91{
92 return __builtin_ia32_minsd(a, b);
93}
94
95static inline __m128d __attribute__((__always_inline__)) _mm_min_pd(__m128d a, __m128d b)
96{
97 return __builtin_ia32_minpd(a, b);
98}
99
100static inline __m128d __attribute__((__always_inline__)) _mm_max_sd(__m128d a, __m128d b)
101{
102 return __builtin_ia32_maxsd(a, b);
103}
104
105static inline __m128d __attribute__((__always_inline__)) _mm_max_pd(__m128d a, __m128d b)
106{
107 return __builtin_ia32_maxpd(a, b);
108}
109
110static inline __m128d __attribute__((__always_inline__)) _mm_and_pd(__m128d a, __m128d b)
111{
112 return __builtin_ia32_andpd(a, b);
113}
114
115static inline __m128d __attribute__((__always_inline__)) _mm_andnot_pd(__m128d a, __m128d b)
116{
117 return __builtin_ia32_andnpd(a, b);
118}
119
120static inline __m128d __attribute__((__always_inline__)) _mm_or_pd(__m128d a, __m128d b)
121{
122 return __builtin_ia32_orpd(a, b);
123}
124
125static inline __m128d __attribute__((__always_inline__)) _mm_xor_pd(__m128d a, __m128d b)
126{
127 return __builtin_ia32_xorpd(a, b);
128}
129
130static inline __m128d __attribute__((__always_inline__)) _mm_cmpeq_pd(__m128d a, __m128d b)
131{
132 return (__m128d)__builtin_ia32_cmpeqpd(a, b);
133}
134
135static inline __m128d __attribute__((__always_inline__)) _mm_cmplt_pd(__m128d a, __m128d b)
136{
137 return (__m128d)__builtin_ia32_cmpltpd(a, b);
138}
139
140static inline __m128d __attribute__((__always_inline__)) _mm_cmple_pd(__m128d a, __m128d b)
141{
142 return (__m128d)__builtin_ia32_cmplepd(a, b);
143}
144
145static inline __m128d __attribute__((__always_inline__)) _mm_cmpgt_pd(__m128d a, __m128d b)
146{
147 return (__m128d)__builtin_ia32_cmpltpd(b, a);
148}
149
150static inline __m128d __attribute__((__always_inline__)) _mm_cmpge_pd(__m128d a, __m128d b)
151{
152 return (__m128d)__builtin_ia32_cmplepd(b, a);
153}
154
155static inline __m128d __attribute__((__always_inline__)) _mm_cmpord_pd(__m128d a, __m128d b)
156{
157 return (__m128d)__builtin_ia32_cmpordpd(a, b);
158}
159
160static inline __m128d __attribute__((__always_inline__)) _mm_cmpunord_pd(__m128d a, __m128d b)
161{
162 return (__m128d)__builtin_ia32_cmpunordpd(a, b);
163}
164
165static inline __m128d __attribute__((__always_inline__)) _mm_cmpneq_pd(__m128d a, __m128d b)
166{
167 return (__m128d)__builtin_ia32_cmpneqpd(a, b);
168}
169
170static inline __m128d __attribute__((__always_inline__)) _mm_cmpnlt_pd(__m128d a, __m128d b)
171{
172 return (__m128d)__builtin_ia32_cmpnltpd(a, b);
173}
174
175static inline __m128d __attribute__((__always_inline__)) _mm_cmpnle_pd(__m128d a, __m128d b)
176{
177 return (__m128d)__builtin_ia32_cmpnlepd(a, b);
178}
179
180static inline __m128d __attribute__((__always_inline__)) _mm_cmpngt_pd(__m128d a, __m128d b)
181{
182 return (__m128d)__builtin_ia32_cmpnltpd(b, a);
183}
184
185static inline __m128d __attribute__((__always_inline__)) _mm_cmpnge_pd(__m128d a, __m128d b)
186{
187 return (__m128d)__builtin_ia32_cmpnlepd(b, a);
188}
189
190static inline __m128d __attribute__((__always_inline__)) _mm_cmpeq_sd(__m128d a, __m128d b)
191{
192 return (__m128d)__builtin_ia32_cmpeqsd(a, b);
193}
194
195static inline __m128d __attribute__((__always_inline__)) _mm_cmplt_sd(__m128d a, __m128d b)
196{
197 return (__m128d)__builtin_ia32_cmpltsd(a, b);
198}
199
200static inline __m128d __attribute__((__always_inline__)) _mm_cmple_sd(__m128d a, __m128d b)
201{
202 return (__m128d)__builtin_ia32_cmplesd(a, b);
203}
204
205static inline __m128d __attribute__((__always_inline__)) _mm_cmpgt_sd(__m128d a, __m128d b)
206{
207 return (__m128d)__builtin_ia32_cmpltsd(b, a);
208}
209
210static inline __m128d __attribute__((__always_inline__)) _mm_cmpge_sd(__m128d a, __m128d b)
211{
212 return (__m128d)__builtin_ia32_cmplesd(b, a);
213}
214
215static inline __m128d __attribute__((__always_inline__)) _mm_cmpord_sd(__m128d a, __m128d b)
216{
217 return (__m128d)__builtin_ia32_cmpordsd(a, b);
218}
219
220static inline __m128d __attribute__((__always_inline__)) _mm_cmpunord_sd(__m128d a, __m128d b)
221{
222 return (__m128d)__builtin_ia32_cmpunordsd(a, b);
223}
224
225static inline __m128d __attribute__((__always_inline__)) _mm_cmpneq_sd(__m128d a, __m128d b)
226{
227 return (__m128d)__builtin_ia32_cmpneqsd(a, b);
228}
229
230static inline __m128d __attribute__((__always_inline__)) _mm_cmpnlt_sd(__m128d a, __m128d b)
231{
232 return (__m128d)__builtin_ia32_cmpnltsd(a, b);
233}
234
235static inline __m128d __attribute__((__always_inline__)) _mm_cmpnle_sd(__m128d a, __m128d b)
236{
237 return (__m128d)__builtin_ia32_cmpnlesd(a, b);
238}
239
240static inline __m128d __attribute__((__always_inline__)) _mm_cmpngt_sd(__m128d a, __m128d b)
241{
242 return (__m128d)__builtin_ia32_cmpnltsd(b, a);
243}
244
245static inline __m128d __attribute__((__always_inline__)) _mm_cmpnge_sd(__m128d a, __m128d b)
246{
247 return (__m128d)__builtin_ia32_cmpnlesd(b, a);
248}
249
250static inline int __attribute__((__always_inline__)) _mm_comieq_sd(__m128d a, __m128d b)
251{
252 return __builtin_ia32_comisdeq(a, b);
253}
254
255static inline int __attribute__((__always_inline__)) _mm_comilt_sd(__m128d a, __m128d b)
256{
257 return __builtin_ia32_comisdlt(a, b);
258}
259
260static inline int __attribute__((__always_inline__)) _mm_comile_sd(__m128d a, __m128d b)
261{
262 return __builtin_ia32_comisdle(a, b);
263}
264
265static inline int __attribute__((__always_inline__)) _mm_comigt_sd(__m128d a, __m128d b)
266{
267 return __builtin_ia32_comisdgt(a, b);
268}
269
270static inline int __attribute__((__always_inline__)) _mm_comineq_sd(__m128d a, __m128d b)
271{
272 return __builtin_ia32_comisdneq(a, b);
273}
274
275static inline int __attribute__((__always_inline__)) _mm_ucomieq_sd(__m128d a, __m128d b)
276{
277 return __builtin_ia32_ucomisdeq(a, b);
278}
279
280static inline int __attribute__((__always_inline__)) _mm_ucomilt_sd(__m128d a, __m128d b)
281{
282 return __builtin_ia32_ucomisdlt(a, b);
283}
284
285static inline int __attribute__((__always_inline__)) _mm_ucomile_sd(__m128d a, __m128d b)
286{
287 return __builtin_ia32_ucomisdle(a, b);
288}
289
290static inline int __attribute__((__always_inline__)) _mm_ucomigt_sd(__m128d a, __m128d b)
291{
292 return __builtin_ia32_ucomisdgt(a, b);
293}
294
295static inline int __attribute__((__always_inline__)) _mm_ucomineq_sd(__m128d a, __m128d b)
296{
297 return __builtin_ia32_ucomisdneq(a, b);
298}
299
300static inline __m128 __attribute__((__always_inline__)) _mm_cvtpd_ps(__m128d a)
301{
302 return __builtin_ia32_cvtpd2ps(a);
303}
304
305static inline __m128d __attribute__((__always_inline__)) _mm_cvtps_pd(__m128 a)
306{
307 return __builtin_ia32_cvtps2pd(a);
308}
309
310static inline __m128d __attribute__((__always_inline__)) _mm_cvtepi32_pd(__m128i a)
311{
312 return __builtin_ia32_cvtdq2pd((__v4si)a);
313}
314
315static inline __m128i __attribute__((__always_inline__)) _mm_cvtpd_epi32(__m128d a)
316{
317 return __builtin_ia32_cvtpd2dq(a);
318}
319
320static inline int __attribute__((__always_inline__)) _mm_cvtsd_si32(__m128d a)
321{
322 return __builtin_ia32_cvtsd2si(a);
323}
324
325static inline __m128 __attribute__((__always_inline__)) _mm_cvtsd_ss(__m128 a, __m128d b)
326{
327 return __builtin_ia32_cvtsd2ss(a, b);
328}
329
330static inline __m128d __attribute__((__always_inline__)) _mm_cvtsi32_sd(__m128d a, int b)
331{
332 return __builtin_ia32_cvtsi2sd(a, b);
333}
334
335static inline __m128d __attribute__((__always_inline__)) _mm_cvtss_sd(__m128d a, __m128 b)
336{
337 return __builtin_ia32_cvtss2sd(a, b);
338}
339
340static inline __m128i __attribute__((__always_inline__)) _mm_cvttpd_epi32(__m128d a)
341{
342 return (__m128i)__builtin_ia32_cvttpd2dq(a);
343}
344
345static inline int __attribute__((__always_inline__)) _mm_cvttsd_si32(__m128d a)
346{
347 return __builtin_ia32_cvttsd2si(a);
348}
349
350static inline __m64 __attribute__((__always_inline__)) _mm_cvtpd_pi32(__m128d a)
351{
352 return (__m64)__builtin_ia32_cvtpd2pi(a);
353}
354
355static inline __m64 __attribute__((__always_inline__)) _mm_cvttpd_pi32(__m128d a)
356{
357 return (__m64)__builtin_ia32_cvttpd2pi(a);
358}
359
360static inline __m128d __attribute__((__always_inline__)) _mm_cvtpi32_pd(__m64 a)
361{
362 return __builtin_ia32_cvtpi2pd((__v2si)a);
363}
364
365static inline double __attribute__((__always_inline__)) _mm_cvtsd_f64(__m128d a)
366{
367 return a[0];
368}
369
Anders Carlssonbf5b2c82008-12-24 02:11:54 +0000370static inline __m128d __attribute__((__always_inline__)) _mm_load_pd(double const *dp)
371{
372 return *(__m128d*)dp;
373}
374
375static inline __m128d __attribute__((__always_inline__)) _mm_load1_pd(double const *dp)
376{
377 return (__m128d){ dp[0], dp[0] };
378}
379
380static inline __m128d __attribute__((__always_inline__)) _mm_loadr_pd(double const *dp)
381{
382 return (__m128d){ dp[1], dp[0] };
383}
384
385static inline __m128d __attribute__((__always_inline__)) _mm_loadu_pd(double const *dp)
386{
387 return __builtin_ia32_loadupd(dp);
388}
389
390static inline __m128d __attribute__((__always_inline__)) _mm_load_sd(double const *dp)
391{
392 return (__m128d){ *dp, 0.0 };
393}
394
395static inline __m128d __attribute__((__always_inline__)) _mm_loadh_pd(__m128d a, double const *dp)
396{
397 return __builtin_shufflevector(a, *(__m128d *)dp, 0, 2);
398}
399
400static inline __m128d __attribute__((__always_inline__)) _mm_loadl_pd(__m128d a, double const *dp)
401{
402 return __builtin_shufflevector(a, *(__m128d *)dp, 2, 1);
403}
404
405static inline __m128d __attribute__((__always_inline__)) _mm_set_sd(double w)
406{
407 return (__m128d){ w, 0 };
408}
409
410static inline __m128d __attribute__((__always_inline__)) _mm_set1_pd(double w)
411{
412 return (__m128d){ w, w };
413}
414
415static inline __m128d __attribute__((__always_inline__)) _mm_set_pd(double w, double x)
416{
417 return (__m128d){ w, x };
418}
419
420static inline __m128d __attribute__((__always_inline__)) _mm_setr_pd(double w, double x)
421{
422 return (__m128d){ x, w };
423}
424
425static inline __m128d __attribute__((__always_inline__)) _mm_setzero_pd(void)
426{
427 return (__m128d){ 0, 0 };
428}
429
430static inline __m128d __attribute__((__always_inline__)) _mm_move_sd(__m128d a, __m128d b)
431{
432 return (__m128d){ b[0], a[1] };
433}
434
435static inline void __attribute__((__always_inline__)) _mm_store_sd(double *dp, __m128d a)
436{
437 dp[0] = a[0];
438}
439
440static inline void __attribute__((__always_inline__)) _mm_store1_pd(double *dp, __m128d a)
441{
442 dp[0] = a[0];
443 dp[1] = a[0];
444}
445
446static inline void __attribute__((__always_inline__)) _mm_store_pd(double *dp, __m128d a)
447{
448 *(__m128d *)dp = a;
449}
450
451static inline void __attribute__((__always_inline__)) _mm_storeu_pd(double *dp, __m128d a)
452{
453 __builtin_ia32_storeupd(dp, a);
454}
455
456static inline void __attribute__((__always_inline__)) _mm_storer_pd(double *dp, __m128d a)
457{
458 dp[0] = a[1];
459 dp[1] = a[0];
460}
461
462static inline void __attribute__((__always_inline__)) _mm_storeh_pd(double *dp, __m128d a)
463{
464 dp[0] = a[1];
465}
466
467static inline void __attribute__((__always_inline__)) _mm_storel_pd(double *dp, __m128d a)
468{
469 dp[0] = a[0];
470}
471
Anders Carlssonc7fee2c2008-12-24 01:45:22 +0000472#endif /* __SSE2__ */
473
474#endif /* __EMMINTRIN_H */