blob: 292f10d9e93565f7d665b8b99a74636fb313fc71 [file] [log] [blame]
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001/*===---- avxintrin.h - AVX intrinsics -------------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
Benjamin Kramer6f35f3c2010-08-20 23:00:03 +000024#ifndef __IMMINTRIN_H
25#error "Never use <avxintrin.h> directly; include <immintrin.h> instead."
26#endif
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000027
Richard Smith49e56442013-07-14 05:41:45 +000028#ifndef __AVXINTRIN_H
29#define __AVXINTRIN_H
30
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000031typedef double __v4df __attribute__ ((__vector_size__ (32)));
32typedef float __v8sf __attribute__ ((__vector_size__ (32)));
33typedef long long __v4di __attribute__ ((__vector_size__ (32)));
34typedef int __v8si __attribute__ ((__vector_size__ (32)));
35typedef short __v16hi __attribute__ ((__vector_size__ (32)));
36typedef char __v32qi __attribute__ ((__vector_size__ (32)));
37
Chandler Carruthcbe64112015-10-01 23:40:12 +000038/* We need an explicitly signed variant for char. Note that this shouldn't
39 * appear in the interface though. */
40typedef signed char __v32qs __attribute__((__vector_size__(32)));
41
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000042typedef float __m256 __attribute__ ((__vector_size__ (32)));
43typedef double __m256d __attribute__((__vector_size__(32)));
44typedef long long __m256i __attribute__((__vector_size__(32)));
45
Eric Christopher4d1851682015-06-17 07:09:20 +000046/* Define the default attributes for the functions in this file. */
Michael Kupersteine45af542015-06-30 13:36:19 +000047#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx")))
Eric Christopher4d1851682015-06-17 07:09:20 +000048
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000049/* Arithmetic */
Michael Kupersteine45af542015-06-30 13:36:19 +000050static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +000051_mm256_add_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000052{
David Blaikie3302f2b2013-01-16 23:08:36 +000053 return __a+__b;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000054}
55
Michael Kupersteine45af542015-06-30 13:36:19 +000056static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +000057_mm256_add_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000058{
David Blaikie3302f2b2013-01-16 23:08:36 +000059 return __a+__b;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000060}
61
Michael Kupersteine45af542015-06-30 13:36:19 +000062static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +000063_mm256_sub_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000064{
David Blaikie3302f2b2013-01-16 23:08:36 +000065 return __a-__b;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000066}
67
Michael Kupersteine45af542015-06-30 13:36:19 +000068static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +000069_mm256_sub_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000070{
David Blaikie3302f2b2013-01-16 23:08:36 +000071 return __a-__b;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000072}
73
Michael Kupersteine45af542015-06-30 13:36:19 +000074static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +000075_mm256_addsub_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000076{
David Blaikie3302f2b2013-01-16 23:08:36 +000077 return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000078}
79
Michael Kupersteine45af542015-06-30 13:36:19 +000080static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +000081_mm256_addsub_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000082{
David Blaikie3302f2b2013-01-16 23:08:36 +000083 return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000084}
85
Michael Kupersteine45af542015-06-30 13:36:19 +000086static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +000087_mm256_div_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000088{
David Blaikie3302f2b2013-01-16 23:08:36 +000089 return __a / __b;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000090}
91
Michael Kupersteine45af542015-06-30 13:36:19 +000092static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +000093_mm256_div_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000094{
David Blaikie3302f2b2013-01-16 23:08:36 +000095 return __a / __b;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000096}
97
Michael Kupersteine45af542015-06-30 13:36:19 +000098static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +000099_mm256_max_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000100{
David Blaikie3302f2b2013-01-16 23:08:36 +0000101 return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000102}
103
Michael Kupersteine45af542015-06-30 13:36:19 +0000104static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000105_mm256_max_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000106{
David Blaikie3302f2b2013-01-16 23:08:36 +0000107 return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000108}
109
Michael Kupersteine45af542015-06-30 13:36:19 +0000110static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000111_mm256_min_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000112{
David Blaikie3302f2b2013-01-16 23:08:36 +0000113 return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000114}
115
Michael Kupersteine45af542015-06-30 13:36:19 +0000116static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000117_mm256_min_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000118{
David Blaikie3302f2b2013-01-16 23:08:36 +0000119 return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000120}
121
Michael Kupersteine45af542015-06-30 13:36:19 +0000122static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000123_mm256_mul_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000124{
David Blaikie3302f2b2013-01-16 23:08:36 +0000125 return __a * __b;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000126}
127
Michael Kupersteine45af542015-06-30 13:36:19 +0000128static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000129_mm256_mul_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000130{
David Blaikie3302f2b2013-01-16 23:08:36 +0000131 return __a * __b;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000132}
133
Michael Kupersteine45af542015-06-30 13:36:19 +0000134static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000135_mm256_sqrt_pd(__m256d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000136{
David Blaikie3302f2b2013-01-16 23:08:36 +0000137 return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000138}
139
Michael Kupersteine45af542015-06-30 13:36:19 +0000140static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000141_mm256_sqrt_ps(__m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000142{
David Blaikie3302f2b2013-01-16 23:08:36 +0000143 return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000144}
145
Michael Kupersteine45af542015-06-30 13:36:19 +0000146static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000147_mm256_rsqrt_ps(__m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000148{
David Blaikie3302f2b2013-01-16 23:08:36 +0000149 return (__m256)__builtin_ia32_rsqrtps256((__v8sf)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000150}
151
Michael Kupersteine45af542015-06-30 13:36:19 +0000152static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000153_mm256_rcp_ps(__m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000154{
David Blaikie3302f2b2013-01-16 23:08:36 +0000155 return (__m256)__builtin_ia32_rcpps256((__v8sf)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000156}
157
Chad Rosier060d03b2011-12-17 00:15:26 +0000158#define _mm256_round_pd(V, M) __extension__ ({ \
Craig Topper71481662015-11-10 05:08:05 +0000159 (__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000160
Chad Rosier060d03b2011-12-17 00:15:26 +0000161#define _mm256_round_ps(V, M) __extension__ ({ \
Craig Topper71481662015-11-10 05:08:05 +0000162 (__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000163
164#define _mm256_ceil_pd(V) _mm256_round_pd((V), _MM_FROUND_CEIL)
165#define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR)
166#define _mm256_ceil_ps(V) _mm256_round_ps((V), _MM_FROUND_CEIL)
167#define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR)
168
169/* Logical */
Michael Kupersteine45af542015-06-30 13:36:19 +0000170static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000171_mm256_and_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000172{
David Blaikie3302f2b2013-01-16 23:08:36 +0000173 return (__m256d)((__v4di)__a & (__v4di)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000174}
175
Michael Kupersteine45af542015-06-30 13:36:19 +0000176static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000177_mm256_and_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000178{
David Blaikie3302f2b2013-01-16 23:08:36 +0000179 return (__m256)((__v8si)__a & (__v8si)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000180}
181
Michael Kupersteine45af542015-06-30 13:36:19 +0000182static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000183_mm256_andnot_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000184{
David Blaikie3302f2b2013-01-16 23:08:36 +0000185 return (__m256d)(~(__v4di)__a & (__v4di)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000186}
187
Michael Kupersteine45af542015-06-30 13:36:19 +0000188static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000189_mm256_andnot_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000190{
David Blaikie3302f2b2013-01-16 23:08:36 +0000191 return (__m256)(~(__v8si)__a & (__v8si)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000192}
193
Michael Kupersteine45af542015-06-30 13:36:19 +0000194static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000195_mm256_or_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000196{
David Blaikie3302f2b2013-01-16 23:08:36 +0000197 return (__m256d)((__v4di)__a | (__v4di)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000198}
199
Michael Kupersteine45af542015-06-30 13:36:19 +0000200static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000201_mm256_or_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000202{
David Blaikie3302f2b2013-01-16 23:08:36 +0000203 return (__m256)((__v8si)__a | (__v8si)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000204}
205
Michael Kupersteine45af542015-06-30 13:36:19 +0000206static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000207_mm256_xor_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000208{
David Blaikie3302f2b2013-01-16 23:08:36 +0000209 return (__m256d)((__v4di)__a ^ (__v4di)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000210}
211
Michael Kupersteine45af542015-06-30 13:36:19 +0000212static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000213_mm256_xor_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000214{
David Blaikie3302f2b2013-01-16 23:08:36 +0000215 return (__m256)((__v8si)__a ^ (__v8si)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000216}
217
218/* Horizontal arithmetic */
Michael Kupersteine45af542015-06-30 13:36:19 +0000219static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000220_mm256_hadd_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000221{
David Blaikie3302f2b2013-01-16 23:08:36 +0000222 return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000223}
224
Michael Kupersteine45af542015-06-30 13:36:19 +0000225static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000226_mm256_hadd_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000227{
David Blaikie3302f2b2013-01-16 23:08:36 +0000228 return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000229}
230
Michael Kupersteine45af542015-06-30 13:36:19 +0000231static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000232_mm256_hsub_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000233{
David Blaikie3302f2b2013-01-16 23:08:36 +0000234 return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000235}
236
Michael Kupersteine45af542015-06-30 13:36:19 +0000237static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000238_mm256_hsub_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000239{
David Blaikie3302f2b2013-01-16 23:08:36 +0000240 return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000241}
242
243/* Vector permutations */
Michael Kupersteine45af542015-06-30 13:36:19 +0000244static __inline __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000245_mm_permutevar_pd(__m128d __a, __m128i __c)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000246{
David Blaikie3302f2b2013-01-16 23:08:36 +0000247 return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000248}
249
Michael Kupersteine45af542015-06-30 13:36:19 +0000250static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000251_mm256_permutevar_pd(__m256d __a, __m256i __c)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000252{
David Blaikie3302f2b2013-01-16 23:08:36 +0000253 return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000254}
255
Michael Kupersteine45af542015-06-30 13:36:19 +0000256static __inline __m128 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000257_mm_permutevar_ps(__m128 __a, __m128i __c)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000258{
David Blaikie3302f2b2013-01-16 23:08:36 +0000259 return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000260}
261
Michael Kupersteine45af542015-06-30 13:36:19 +0000262static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000263_mm256_permutevar_ps(__m256 __a, __m256i __c)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000264{
Craig Topper9fee8ab2015-01-31 06:33:59 +0000265 return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000266}
267
Chad Rosier93375d52011-12-17 01:39:56 +0000268#define _mm_permute_pd(A, C) __extension__ ({ \
Craig Topper71481662015-11-10 05:08:05 +0000269 (__m128d)__builtin_shufflevector((__v2df)(__m128d)(A), \
270 (__v2df)_mm_setzero_pd(), \
Craig Topperfec9f8e2012-02-08 05:16:54 +0000271 (C) & 0x1, ((C) & 0x2) >> 1); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000272
Chad Rosier93375d52011-12-17 01:39:56 +0000273#define _mm256_permute_pd(A, C) __extension__ ({ \
Craig Topper71481662015-11-10 05:08:05 +0000274 (__m256d)__builtin_shufflevector((__v4df)(__m256d)(A), \
275 (__v4df)_mm256_setzero_pd(), \
Craig Topperfec9f8e2012-02-08 05:16:54 +0000276 (C) & 0x1, ((C) & 0x2) >> 1, \
277 2 + (((C) & 0x4) >> 2), \
278 2 + (((C) & 0x8) >> 3)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000279
Chad Rosier7caca842011-12-17 01:51:05 +0000280#define _mm_permute_ps(A, C) __extension__ ({ \
Craig Topper71481662015-11-10 05:08:05 +0000281 (__m128)__builtin_shufflevector((__v4sf)(__m128)(A), \
282 (__v4sf)_mm_setzero_ps(), \
Craig Topperfec9f8e2012-02-08 05:16:54 +0000283 (C) & 0x3, ((C) & 0xc) >> 2, \
Craig Topper678a53c2012-03-30 05:09:18 +0000284 ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000285
Chad Rosier7caca842011-12-17 01:51:05 +0000286#define _mm256_permute_ps(A, C) __extension__ ({ \
Craig Topper71481662015-11-10 05:08:05 +0000287 (__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \
288 (__v8sf)_mm256_setzero_ps(), \
Craig Topperfec9f8e2012-02-08 05:16:54 +0000289 (C) & 0x3, ((C) & 0xc) >> 2, \
290 ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6, \
291 4 + (((C) & 0x03) >> 0), \
292 4 + (((C) & 0x0c) >> 2), \
293 4 + (((C) & 0x30) >> 4), \
294 4 + (((C) & 0xc0) >> 6)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000295
Chad Rosier9138fea252011-12-16 21:07:34 +0000296#define _mm256_permute2f128_pd(V1, V2, M) __extension__ ({ \
Craig Topper71481662015-11-10 05:08:05 +0000297 (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)(__m256d)(V1), \
298 (__v4df)(__m256d)(V2), (M)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000299
Chad Rosier9138fea252011-12-16 21:07:34 +0000300#define _mm256_permute2f128_ps(V1, V2, M) __extension__ ({ \
Craig Topper71481662015-11-10 05:08:05 +0000301 (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)(__m256)(V1), \
302 (__v8sf)(__m256)(V2), (M)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000303
Chad Rosier9138fea252011-12-16 21:07:34 +0000304#define _mm256_permute2f128_si256(V1, V2, M) __extension__ ({ \
Craig Topper71481662015-11-10 05:08:05 +0000305 (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)(__m256i)(V1), \
306 (__v8si)(__m256i)(V2), (M)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000307
308/* Vector Blend */
Eli Friedmanf16beb32011-11-10 00:11:13 +0000309#define _mm256_blend_pd(V1, V2, M) __extension__ ({ \
Craig Topper71481662015-11-10 05:08:05 +0000310 (__m256d)__builtin_shufflevector((__v4df)(__m256d)(V1), \
311 (__v4df)(__m256d)(V2), \
Filipe Cabecinhas5d289b42014-05-13 02:37:02 +0000312 (((M) & 0x01) ? 4 : 0), \
313 (((M) & 0x02) ? 5 : 1), \
314 (((M) & 0x04) ? 6 : 2), \
315 (((M) & 0x08) ? 7 : 3)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000316
Eli Friedmanf16beb32011-11-10 00:11:13 +0000317#define _mm256_blend_ps(V1, V2, M) __extension__ ({ \
Craig Topper71481662015-11-10 05:08:05 +0000318 (__m256)__builtin_shufflevector((__v8sf)(__m256)(V1), \
319 (__v8sf)(__m256)(V2), \
Filipe Cabecinhas5d289b42014-05-13 02:37:02 +0000320 (((M) & 0x01) ? 8 : 0), \
321 (((M) & 0x02) ? 9 : 1), \
322 (((M) & 0x04) ? 10 : 2), \
323 (((M) & 0x08) ? 11 : 3), \
324 (((M) & 0x10) ? 12 : 4), \
325 (((M) & 0x20) ? 13 : 5), \
326 (((M) & 0x40) ? 14 : 6), \
327 (((M) & 0x80) ? 15 : 7)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000328
Michael Kupersteine45af542015-06-30 13:36:19 +0000329static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000330_mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000331{
David Blaikie3302f2b2013-01-16 23:08:36 +0000332 return (__m256d)__builtin_ia32_blendvpd256(
333 (__v4df)__a, (__v4df)__b, (__v4df)__c);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000334}
335
Michael Kupersteine45af542015-06-30 13:36:19 +0000336static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000337_mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000338{
David Blaikie5bb70032013-01-16 23:13:42 +0000339 return (__m256)__builtin_ia32_blendvps256(
David Blaikie3302f2b2013-01-16 23:08:36 +0000340 (__v8sf)__a, (__v8sf)__b, (__v8sf)__c);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000341}
342
343/* Vector Dot Product */
Eli Friedmanf16beb32011-11-10 00:11:13 +0000344#define _mm256_dp_ps(V1, V2, M) __extension__ ({ \
Craig Topper71481662015-11-10 05:08:05 +0000345 (__m256)__builtin_ia32_dpps256((__v8sf)(__m256)(V1), \
346 (__v8sf)(__m256)(V2), (M)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000347
348/* Vector shuffle */
Bob Wilsonc9b97cc2011-11-05 06:08:06 +0000349#define _mm256_shuffle_ps(a, b, mask) __extension__ ({ \
Craig Topper71481662015-11-10 05:08:05 +0000350 (__m256)__builtin_shufflevector((__v8sf)(__m256)(a), \
351 (__v8sf)(__m256)(b), \
352 (mask) & 0x3, \
353 ((mask) & 0xc) >> 2, \
354 (((mask) & 0x30) >> 4) + 8, \
355 (((mask) & 0xc0) >> 6) + 8, \
356 ((mask) & 0x3) + 4, \
357 (((mask) & 0xc) >> 2) + 4, \
358 (((mask) & 0x30) >> 4) + 12, \
359 (((mask) & 0xc0) >> 6) + 12); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000360
Bob Wilsonc9b97cc2011-11-05 06:08:06 +0000361#define _mm256_shuffle_pd(a, b, mask) __extension__ ({ \
Craig Topper71481662015-11-10 05:08:05 +0000362 (__m256d)__builtin_shufflevector((__v4df)(__m256d)(a), \
363 (__v4df)(__m256d)(b), \
364 (mask) & 0x1, \
365 (((mask) & 0x2) >> 1) + 4, \
366 (((mask) & 0x4) >> 2) + 2, \
367 (((mask) & 0x8) >> 3) + 6); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000368
369/* Compare */
370#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
371#define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */
372#define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */
373#define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */
374#define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */
375#define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */
376#define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */
377#define _CMP_ORD_Q 0x07 /* Ordered (nonsignaling) */
378#define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */
379#define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unord, signaling) */
380#define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */
381#define _CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */
382#define _CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */
383#define _CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */
384#define _CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */
385#define _CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */
386#define _CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */
387#define _CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */
388#define _CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */
389#define _CMP_UNORD_S 0x13 /* Unordered (signaling) */
390#define _CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */
391#define _CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */
392#define _CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unord, non-signaling) */
393#define _CMP_ORD_S 0x17 /* Ordered (signaling) */
394#define _CMP_EQ_US 0x18 /* Equal (unordered, signaling) */
395#define _CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unord, non-sign) */
396#define _CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */
397#define _CMP_FALSE_OS 0x1b /* False (ordered, signaling) */
398#define _CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */
399#define _CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */
400#define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */
401#define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */
402
Bob Wilsonc9b97cc2011-11-05 06:08:06 +0000403#define _mm_cmp_pd(a, b, c) __extension__ ({ \
Craig Topper71481662015-11-10 05:08:05 +0000404 (__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \
405 (__v2df)(__m128d)(b), (c)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000406
Bob Wilsonc9b97cc2011-11-05 06:08:06 +0000407#define _mm_cmp_ps(a, b, c) __extension__ ({ \
Craig Topper71481662015-11-10 05:08:05 +0000408 (__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \
409 (__v4sf)(__m128)(b), (c)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000410
Bob Wilsonc9b97cc2011-11-05 06:08:06 +0000411#define _mm256_cmp_pd(a, b, c) __extension__ ({ \
Craig Topper71481662015-11-10 05:08:05 +0000412 (__m256d)__builtin_ia32_cmppd256((__v4df)(__m256d)(a), \
413 (__v4df)(__m256d)(b), (c)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000414
Bob Wilsonc9b97cc2011-11-05 06:08:06 +0000415#define _mm256_cmp_ps(a, b, c) __extension__ ({ \
Craig Topper71481662015-11-10 05:08:05 +0000416 (__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \
417 (__v8sf)(__m256)(b), (c)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000418
Bob Wilsonc9b97cc2011-11-05 06:08:06 +0000419#define _mm_cmp_sd(a, b, c) __extension__ ({ \
Craig Topper71481662015-11-10 05:08:05 +0000420 (__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \
421 (__v2df)(__m128d)(b), (c)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000422
Bob Wilsonc9b97cc2011-11-05 06:08:06 +0000423#define _mm_cmp_ss(a, b, c) __extension__ ({ \
Craig Topper71481662015-11-10 05:08:05 +0000424 (__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \
425 (__v4sf)(__m128)(b), (c)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000426
Michael Kupersteine45af542015-06-30 13:36:19 +0000427static __inline int __DEFAULT_FN_ATTRS
Craig Topper459554f2015-01-31 06:31:30 +0000428_mm256_extract_epi32(__m256i __a, const int __imm)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000429{
David Blaikie3302f2b2013-01-16 23:08:36 +0000430 __v8si __b = (__v8si)__a;
Manman Renc94122e2013-10-23 20:33:14 +0000431 return __b[__imm & 7];
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000432}
433
Michael Kupersteine45af542015-06-30 13:36:19 +0000434static __inline int __DEFAULT_FN_ATTRS
Craig Topper459554f2015-01-31 06:31:30 +0000435_mm256_extract_epi16(__m256i __a, const int __imm)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000436{
David Blaikie3302f2b2013-01-16 23:08:36 +0000437 __v16hi __b = (__v16hi)__a;
Manman Renc94122e2013-10-23 20:33:14 +0000438 return __b[__imm & 15];
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000439}
440
Michael Kupersteine45af542015-06-30 13:36:19 +0000441static __inline int __DEFAULT_FN_ATTRS
Craig Topper459554f2015-01-31 06:31:30 +0000442_mm256_extract_epi8(__m256i __a, const int __imm)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000443{
David Blaikie3302f2b2013-01-16 23:08:36 +0000444 __v32qi __b = (__v32qi)__a;
Manman Renc94122e2013-10-23 20:33:14 +0000445 return __b[__imm & 31];
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000446}
447
448#ifdef __x86_64__
Michael Kupersteine45af542015-06-30 13:36:19 +0000449static __inline long long __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000450_mm256_extract_epi64(__m256i __a, const int __imm)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000451{
David Blaikie3302f2b2013-01-16 23:08:36 +0000452 __v4di __b = (__v4di)__a;
Manman Renc94122e2013-10-23 20:33:14 +0000453 return __b[__imm & 3];
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000454}
455#endif
456
Michael Kupersteine45af542015-06-30 13:36:19 +0000457static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000458_mm256_insert_epi32(__m256i __a, int __b, int const __imm)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000459{
David Blaikie3302f2b2013-01-16 23:08:36 +0000460 __v8si __c = (__v8si)__a;
461 __c[__imm & 7] = __b;
462 return (__m256i)__c;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000463}
464
Michael Kupersteine45af542015-06-30 13:36:19 +0000465static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000466_mm256_insert_epi16(__m256i __a, int __b, int const __imm)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000467{
David Blaikie3302f2b2013-01-16 23:08:36 +0000468 __v16hi __c = (__v16hi)__a;
469 __c[__imm & 15] = __b;
470 return (__m256i)__c;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000471}
472
Michael Kupersteine45af542015-06-30 13:36:19 +0000473static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000474_mm256_insert_epi8(__m256i __a, int __b, int const __imm)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000475{
David Blaikie3302f2b2013-01-16 23:08:36 +0000476 __v32qi __c = (__v32qi)__a;
477 __c[__imm & 31] = __b;
478 return (__m256i)__c;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000479}
480
481#ifdef __x86_64__
Michael Kupersteine45af542015-06-30 13:36:19 +0000482static __inline __m256i __DEFAULT_FN_ATTRS
Filipe Cabecinhasd7400292015-02-19 19:00:33 +0000483_mm256_insert_epi64(__m256i __a, long long __b, int const __imm)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000484{
David Blaikie3302f2b2013-01-16 23:08:36 +0000485 __v4di __c = (__v4di)__a;
486 __c[__imm & 3] = __b;
487 return (__m256i)__c;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000488}
489#endif
490
491/* Conversion */
Michael Kupersteine45af542015-06-30 13:36:19 +0000492static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000493_mm256_cvtepi32_pd(__m128i __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000494{
David Blaikie3302f2b2013-01-16 23:08:36 +0000495 return (__m256d)__builtin_ia32_cvtdq2pd256((__v4si) __a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000496}
497
Michael Kupersteine45af542015-06-30 13:36:19 +0000498static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000499_mm256_cvtepi32_ps(__m256i __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000500{
David Blaikie3302f2b2013-01-16 23:08:36 +0000501 return (__m256)__builtin_ia32_cvtdq2ps256((__v8si) __a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000502}
503
Michael Kupersteine45af542015-06-30 13:36:19 +0000504static __inline __m128 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000505_mm256_cvtpd_ps(__m256d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000506{
David Blaikie3302f2b2013-01-16 23:08:36 +0000507 return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000508}
509
Michael Kupersteine45af542015-06-30 13:36:19 +0000510static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000511_mm256_cvtps_epi32(__m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000512{
David Blaikie3302f2b2013-01-16 23:08:36 +0000513 return (__m256i)__builtin_ia32_cvtps2dq256((__v8sf) __a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000514}
515
Michael Kupersteine45af542015-06-30 13:36:19 +0000516static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000517_mm256_cvtps_pd(__m128 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000518{
David Blaikie3302f2b2013-01-16 23:08:36 +0000519 return (__m256d)__builtin_ia32_cvtps2pd256((__v4sf) __a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000520}
521
Michael Kupersteine45af542015-06-30 13:36:19 +0000522static __inline __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000523_mm256_cvttpd_epi32(__m256d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000524{
David Blaikie3302f2b2013-01-16 23:08:36 +0000525 return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000526}
527
Michael Kupersteine45af542015-06-30 13:36:19 +0000528static __inline __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000529_mm256_cvtpd_epi32(__m256d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000530{
David Blaikie3302f2b2013-01-16 23:08:36 +0000531 return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000532}
533
Michael Kupersteine45af542015-06-30 13:36:19 +0000534static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000535_mm256_cvttps_epi32(__m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000536{
David Blaikie3302f2b2013-01-16 23:08:36 +0000537 return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000538}
539
540/* Vector replicate */
Michael Kupersteine45af542015-06-30 13:36:19 +0000541static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000542_mm256_movehdup_ps(__m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000543{
David Blaikie3302f2b2013-01-16 23:08:36 +0000544 return __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000545}
546
Michael Kupersteine45af542015-06-30 13:36:19 +0000547static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000548_mm256_moveldup_ps(__m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000549{
David Blaikie3302f2b2013-01-16 23:08:36 +0000550 return __builtin_shufflevector(__a, __a, 0, 0, 2, 2, 4, 4, 6, 6);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000551}
552
Michael Kupersteine45af542015-06-30 13:36:19 +0000553static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000554_mm256_movedup_pd(__m256d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000555{
David Blaikie3302f2b2013-01-16 23:08:36 +0000556 return __builtin_shufflevector(__a, __a, 0, 0, 2, 2);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000557}
558
559/* Unpack and Interleave */
Michael Kupersteine45af542015-06-30 13:36:19 +0000560static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000561_mm256_unpackhi_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000562{
David Blaikie3302f2b2013-01-16 23:08:36 +0000563 return __builtin_shufflevector(__a, __b, 1, 5, 1+2, 5+2);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000564}
565
Michael Kupersteine45af542015-06-30 13:36:19 +0000566static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000567_mm256_unpacklo_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000568{
David Blaikie3302f2b2013-01-16 23:08:36 +0000569 return __builtin_shufflevector(__a, __b, 0, 4, 0+2, 4+2);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000570}
571
Michael Kupersteine45af542015-06-30 13:36:19 +0000572static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000573_mm256_unpackhi_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000574{
David Blaikie3302f2b2013-01-16 23:08:36 +0000575 return __builtin_shufflevector(__a, __b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000576}
577
Michael Kupersteine45af542015-06-30 13:36:19 +0000578static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000579_mm256_unpacklo_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000580{
David Blaikie3302f2b2013-01-16 23:08:36 +0000581 return __builtin_shufflevector(__a, __b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000582}
583
584/* Bit Test */
Michael Kupersteine45af542015-06-30 13:36:19 +0000585static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000586_mm_testz_pd(__m128d __a, __m128d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000587{
David Blaikie3302f2b2013-01-16 23:08:36 +0000588 return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000589}
590
Michael Kupersteine45af542015-06-30 13:36:19 +0000591static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000592_mm_testc_pd(__m128d __a, __m128d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000593{
David Blaikie3302f2b2013-01-16 23:08:36 +0000594 return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000595}
596
Michael Kupersteine45af542015-06-30 13:36:19 +0000597static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000598_mm_testnzc_pd(__m128d __a, __m128d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000599{
David Blaikie3302f2b2013-01-16 23:08:36 +0000600 return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000601}
602
Michael Kupersteine45af542015-06-30 13:36:19 +0000603static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000604_mm_testz_ps(__m128 __a, __m128 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000605{
David Blaikie3302f2b2013-01-16 23:08:36 +0000606 return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000607}
608
Michael Kupersteine45af542015-06-30 13:36:19 +0000609static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000610_mm_testc_ps(__m128 __a, __m128 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000611{
David Blaikie3302f2b2013-01-16 23:08:36 +0000612 return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000613}
614
Michael Kupersteine45af542015-06-30 13:36:19 +0000615static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000616_mm_testnzc_ps(__m128 __a, __m128 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000617{
David Blaikie3302f2b2013-01-16 23:08:36 +0000618 return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000619}
620
Michael Kupersteine45af542015-06-30 13:36:19 +0000621static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000622_mm256_testz_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000623{
David Blaikie3302f2b2013-01-16 23:08:36 +0000624 return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000625}
626
Michael Kupersteine45af542015-06-30 13:36:19 +0000627static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000628_mm256_testc_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000629{
David Blaikie3302f2b2013-01-16 23:08:36 +0000630 return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000631}
632
Michael Kupersteine45af542015-06-30 13:36:19 +0000633static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000634_mm256_testnzc_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000635{
David Blaikie3302f2b2013-01-16 23:08:36 +0000636 return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000637}
638
Michael Kupersteine45af542015-06-30 13:36:19 +0000639static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000640_mm256_testz_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000641{
David Blaikie3302f2b2013-01-16 23:08:36 +0000642 return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000643}
644
Michael Kupersteine45af542015-06-30 13:36:19 +0000645static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000646_mm256_testc_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000647{
David Blaikie3302f2b2013-01-16 23:08:36 +0000648 return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000649}
650
Michael Kupersteine45af542015-06-30 13:36:19 +0000651static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000652_mm256_testnzc_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000653{
David Blaikie3302f2b2013-01-16 23:08:36 +0000654 return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000655}
656
Michael Kupersteine45af542015-06-30 13:36:19 +0000657static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000658_mm256_testz_si256(__m256i __a, __m256i __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000659{
David Blaikie3302f2b2013-01-16 23:08:36 +0000660 return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000661}
662
Michael Kupersteine45af542015-06-30 13:36:19 +0000663static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000664_mm256_testc_si256(__m256i __a, __m256i __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000665{
David Blaikie3302f2b2013-01-16 23:08:36 +0000666 return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000667}
668
Michael Kupersteine45af542015-06-30 13:36:19 +0000669static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000670_mm256_testnzc_si256(__m256i __a, __m256i __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000671{
David Blaikie3302f2b2013-01-16 23:08:36 +0000672 return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000673}
674
675/* Vector extract sign mask */
Michael Kupersteine45af542015-06-30 13:36:19 +0000676static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000677_mm256_movemask_pd(__m256d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000678{
David Blaikie3302f2b2013-01-16 23:08:36 +0000679 return __builtin_ia32_movmskpd256((__v4df)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000680}
681
Michael Kupersteine45af542015-06-30 13:36:19 +0000682static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000683_mm256_movemask_ps(__m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000684{
David Blaikie3302f2b2013-01-16 23:08:36 +0000685 return __builtin_ia32_movmskps256((__v8sf)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000686}
687
David Blaikie3302f2b2013-01-16 23:08:36 +0000688/* Vector __zero */
Michael Kupersteine45af542015-06-30 13:36:19 +0000689static __inline void __DEFAULT_FN_ATTRS
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000690_mm256_zeroall(void)
691{
692 __builtin_ia32_vzeroall();
693}
694
Michael Kupersteine45af542015-06-30 13:36:19 +0000695static __inline void __DEFAULT_FN_ATTRS
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000696_mm256_zeroupper(void)
697{
698 __builtin_ia32_vzeroupper();
699}
700
701/* Vector load with broadcast */
Michael Kupersteine45af542015-06-30 13:36:19 +0000702static __inline __m128 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000703_mm_broadcast_ss(float const *__a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000704{
Adam Nemet286ae082014-05-29 20:47:29 +0000705 float __f = *__a;
706 return (__m128)(__v4sf){ __f, __f, __f, __f };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000707}
708
Michael Kupersteine45af542015-06-30 13:36:19 +0000709static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000710_mm256_broadcast_sd(double const *__a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000711{
Adam Nemet286ae082014-05-29 20:47:29 +0000712 double __d = *__a;
713 return (__m256d)(__v4df){ __d, __d, __d, __d };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000714}
715
Michael Kupersteine45af542015-06-30 13:36:19 +0000716static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000717_mm256_broadcast_ss(float const *__a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000718{
Adam Nemet286ae082014-05-29 20:47:29 +0000719 float __f = *__a;
720 return (__m256)(__v8sf){ __f, __f, __f, __f, __f, __f, __f, __f };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000721}
722
Michael Kupersteine45af542015-06-30 13:36:19 +0000723static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000724_mm256_broadcast_pd(__m128d const *__a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000725{
David Blaikie3302f2b2013-01-16 23:08:36 +0000726 return (__m256d)__builtin_ia32_vbroadcastf128_pd256(__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000727}
728
Michael Kupersteine45af542015-06-30 13:36:19 +0000729static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000730_mm256_broadcast_ps(__m128 const *__a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000731{
David Blaikie3302f2b2013-01-16 23:08:36 +0000732 return (__m256)__builtin_ia32_vbroadcastf128_ps256(__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000733}
734
735/* SIMD load ops */
Michael Kupersteine45af542015-06-30 13:36:19 +0000736static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000737_mm256_load_pd(double const *__p)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000738{
David Blaikie3302f2b2013-01-16 23:08:36 +0000739 return *(__m256d *)__p;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000740}
741
Michael Kupersteine45af542015-06-30 13:36:19 +0000742static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000743_mm256_load_ps(float const *__p)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000744{
David Blaikie3302f2b2013-01-16 23:08:36 +0000745 return *(__m256 *)__p;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000746}
747
Michael Kupersteine45af542015-06-30 13:36:19 +0000748static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000749_mm256_loadu_pd(double const *__p)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000750{
Craig Topper9e9301a2012-01-25 04:26:17 +0000751 struct __loadu_pd {
David Blaikie3302f2b2013-01-16 23:08:36 +0000752 __m256d __v;
David Majnemer1cf22e62015-02-04 00:26:10 +0000753 } __attribute__((__packed__, __may_alias__));
David Blaikie3302f2b2013-01-16 23:08:36 +0000754 return ((struct __loadu_pd*)__p)->__v;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000755}
756
Michael Kupersteine45af542015-06-30 13:36:19 +0000757static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000758_mm256_loadu_ps(float const *__p)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000759{
Craig Topper9e9301a2012-01-25 04:26:17 +0000760 struct __loadu_ps {
David Blaikie3302f2b2013-01-16 23:08:36 +0000761 __m256 __v;
David Majnemer1cf22e62015-02-04 00:26:10 +0000762 } __attribute__((__packed__, __may_alias__));
David Blaikie3302f2b2013-01-16 23:08:36 +0000763 return ((struct __loadu_ps*)__p)->__v;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000764}
765
Michael Kupersteine45af542015-06-30 13:36:19 +0000766static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000767_mm256_load_si256(__m256i const *__p)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000768{
David Blaikie3302f2b2013-01-16 23:08:36 +0000769 return *__p;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000770}
771
Michael Kupersteine45af542015-06-30 13:36:19 +0000772static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000773_mm256_loadu_si256(__m256i const *__p)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000774{
Craig Topper9e9301a2012-01-25 04:26:17 +0000775 struct __loadu_si256 {
David Blaikie3302f2b2013-01-16 23:08:36 +0000776 __m256i __v;
David Majnemer1cf22e62015-02-04 00:26:10 +0000777 } __attribute__((__packed__, __may_alias__));
David Blaikie3302f2b2013-01-16 23:08:36 +0000778 return ((struct __loadu_si256*)__p)->__v;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000779}
780
Michael Kupersteine45af542015-06-30 13:36:19 +0000781static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000782_mm256_lddqu_si256(__m256i const *__p)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000783{
David Blaikie3302f2b2013-01-16 23:08:36 +0000784 return (__m256i)__builtin_ia32_lddqu256((char const *)__p);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000785}
786
787/* SIMD store ops */
Michael Kupersteine45af542015-06-30 13:36:19 +0000788static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000789_mm256_store_pd(double *__p, __m256d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000790{
David Blaikie3302f2b2013-01-16 23:08:36 +0000791 *(__m256d *)__p = __a;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000792}
793
Michael Kupersteine45af542015-06-30 13:36:19 +0000794static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000795_mm256_store_ps(float *__p, __m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000796{
David Blaikie3302f2b2013-01-16 23:08:36 +0000797 *(__m256 *)__p = __a;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000798}
799
Michael Kupersteine45af542015-06-30 13:36:19 +0000800static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000801_mm256_storeu_pd(double *__p, __m256d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000802{
David Blaikie3302f2b2013-01-16 23:08:36 +0000803 __builtin_ia32_storeupd256(__p, (__v4df)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000804}
805
Michael Kupersteine45af542015-06-30 13:36:19 +0000806static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000807_mm256_storeu_ps(float *__p, __m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000808{
David Blaikie3302f2b2013-01-16 23:08:36 +0000809 __builtin_ia32_storeups256(__p, (__v8sf)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000810}
811
Michael Kupersteine45af542015-06-30 13:36:19 +0000812static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000813_mm256_store_si256(__m256i *__p, __m256i __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000814{
David Blaikie3302f2b2013-01-16 23:08:36 +0000815 *__p = __a;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000816}
817
Michael Kupersteine45af542015-06-30 13:36:19 +0000818static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000819_mm256_storeu_si256(__m256i *__p, __m256i __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000820{
David Blaikie3302f2b2013-01-16 23:08:36 +0000821 __builtin_ia32_storedqu256((char *)__p, (__v32qi)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000822}
823
824/* Conditional load ops */
Michael Kupersteine45af542015-06-30 13:36:19 +0000825static __inline __m128d __DEFAULT_FN_ATTRS
Andrea Di Biagio8bb12d02015-10-20 11:19:54 +0000826_mm_maskload_pd(double const *__p, __m128i __m)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000827{
Andrea Di Biagio8bb12d02015-10-20 11:19:54 +0000828 return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2di)__m);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000829}
830
Michael Kupersteine45af542015-06-30 13:36:19 +0000831static __inline __m256d __DEFAULT_FN_ATTRS
Andrea Di Biagio8bb12d02015-10-20 11:19:54 +0000832_mm256_maskload_pd(double const *__p, __m256i __m)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000833{
David Blaikie3302f2b2013-01-16 23:08:36 +0000834 return (__m256d)__builtin_ia32_maskloadpd256((const __v4df *)__p,
Andrea Di Biagio8bb12d02015-10-20 11:19:54 +0000835 (__v4di)__m);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000836}
837
Michael Kupersteine45af542015-06-30 13:36:19 +0000838static __inline __m128 __DEFAULT_FN_ATTRS
Andrea Di Biagio8bb12d02015-10-20 11:19:54 +0000839_mm_maskload_ps(float const *__p, __m128i __m)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000840{
Andrea Di Biagio8bb12d02015-10-20 11:19:54 +0000841 return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4si)__m);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000842}
843
Michael Kupersteine45af542015-06-30 13:36:19 +0000844static __inline __m256 __DEFAULT_FN_ATTRS
Andrea Di Biagio8bb12d02015-10-20 11:19:54 +0000845_mm256_maskload_ps(float const *__p, __m256i __m)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000846{
Andrea Di Biagio8bb12d02015-10-20 11:19:54 +0000847 return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8si)__m);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000848}
849
850/* Conditional store ops */
Michael Kupersteine45af542015-06-30 13:36:19 +0000851static __inline void __DEFAULT_FN_ATTRS
Andrea Di Biagio8bb12d02015-10-20 11:19:54 +0000852_mm256_maskstore_ps(float *__p, __m256i __m, __m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000853{
Andrea Di Biagio8bb12d02015-10-20 11:19:54 +0000854 __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8si)__m, (__v8sf)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000855}
856
Michael Kupersteine45af542015-06-30 13:36:19 +0000857static __inline void __DEFAULT_FN_ATTRS
Andrea Di Biagio8bb12d02015-10-20 11:19:54 +0000858_mm_maskstore_pd(double *__p, __m128i __m, __m128d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000859{
Andrea Di Biagio8bb12d02015-10-20 11:19:54 +0000860 __builtin_ia32_maskstorepd((__v2df *)__p, (__v2di)__m, (__v2df)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000861}
862
Michael Kupersteine45af542015-06-30 13:36:19 +0000863static __inline void __DEFAULT_FN_ATTRS
Andrea Di Biagio8bb12d02015-10-20 11:19:54 +0000864_mm256_maskstore_pd(double *__p, __m256i __m, __m256d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000865{
Andrea Di Biagio8bb12d02015-10-20 11:19:54 +0000866 __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4di)__m, (__v4df)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000867}
868
Michael Kupersteine45af542015-06-30 13:36:19 +0000869static __inline void __DEFAULT_FN_ATTRS
Andrea Di Biagio8bb12d02015-10-20 11:19:54 +0000870_mm_maskstore_ps(float *__p, __m128i __m, __m128 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000871{
Andrea Di Biagio8bb12d02015-10-20 11:19:54 +0000872 __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4si)__m, (__v4sf)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000873}
874
875/* Cacheability support ops */
Michael Kupersteine45af542015-06-30 13:36:19 +0000876static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000877_mm256_stream_si256(__m256i *__a, __m256i __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000878{
David Blaikie3302f2b2013-01-16 23:08:36 +0000879 __builtin_ia32_movntdq256((__v4di *)__a, (__v4di)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000880}
881
Michael Kupersteine45af542015-06-30 13:36:19 +0000882static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000883_mm256_stream_pd(double *__a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000884{
David Blaikie3302f2b2013-01-16 23:08:36 +0000885 __builtin_ia32_movntpd256(__a, (__v4df)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000886}
887
Michael Kupersteine45af542015-06-30 13:36:19 +0000888static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000889_mm256_stream_ps(float *__p, __m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000890{
David Blaikie3302f2b2013-01-16 23:08:36 +0000891 __builtin_ia32_movntps256(__p, (__v8sf)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000892}
893
894/* Create vectors */
Simon Pilgrim5aba9922015-08-26 21:17:12 +0000895static __inline__ __m256d __DEFAULT_FN_ATTRS
896_mm256_undefined_pd()
897{
898 return (__m256d)__builtin_ia32_undef256();
899}
900
901static __inline__ __m256 __DEFAULT_FN_ATTRS
902_mm256_undefined_ps()
903{
904 return (__m256)__builtin_ia32_undef256();
905}
906
907static __inline__ __m256i __DEFAULT_FN_ATTRS
908_mm256_undefined_si256()
909{
910 return (__m256i)__builtin_ia32_undef256();
911}
912
Michael Kupersteine45af542015-06-30 13:36:19 +0000913static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000914_mm256_set_pd(double __a, double __b, double __c, double __d)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000915{
David Blaikie3302f2b2013-01-16 23:08:36 +0000916 return (__m256d){ __d, __c, __b, __a };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000917}
918
Michael Kupersteine45af542015-06-30 13:36:19 +0000919static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000920_mm256_set_ps(float __a, float __b, float __c, float __d,
Craig Topper9fee8ab2015-01-31 06:33:59 +0000921 float __e, float __f, float __g, float __h)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000922{
David Blaikie3302f2b2013-01-16 23:08:36 +0000923 return (__m256){ __h, __g, __f, __e, __d, __c, __b, __a };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000924}
925
Michael Kupersteine45af542015-06-30 13:36:19 +0000926static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000927_mm256_set_epi32(int __i0, int __i1, int __i2, int __i3,
Craig Topper9fee8ab2015-01-31 06:33:59 +0000928 int __i4, int __i5, int __i6, int __i7)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000929{
David Blaikie3302f2b2013-01-16 23:08:36 +0000930 return (__m256i)(__v8si){ __i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0 };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000931}
932
Michael Kupersteine45af542015-06-30 13:36:19 +0000933static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000934_mm256_set_epi16(short __w15, short __w14, short __w13, short __w12,
Craig Topper9fee8ab2015-01-31 06:33:59 +0000935 short __w11, short __w10, short __w09, short __w08,
936 short __w07, short __w06, short __w05, short __w04,
937 short __w03, short __w02, short __w01, short __w00)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000938{
David Blaikie3302f2b2013-01-16 23:08:36 +0000939 return (__m256i)(__v16hi){ __w00, __w01, __w02, __w03, __w04, __w05, __w06,
940 __w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15 };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000941}
942
Michael Kupersteine45af542015-06-30 13:36:19 +0000943static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000944_mm256_set_epi8(char __b31, char __b30, char __b29, char __b28,
Craig Topper9fee8ab2015-01-31 06:33:59 +0000945 char __b27, char __b26, char __b25, char __b24,
946 char __b23, char __b22, char __b21, char __b20,
947 char __b19, char __b18, char __b17, char __b16,
948 char __b15, char __b14, char __b13, char __b12,
949 char __b11, char __b10, char __b09, char __b08,
950 char __b07, char __b06, char __b05, char __b04,
951 char __b03, char __b02, char __b01, char __b00)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000952{
953 return (__m256i)(__v32qi){
David Blaikie3302f2b2013-01-16 23:08:36 +0000954 __b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07,
955 __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15,
956 __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23,
957 __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000958 };
959}
960
Michael Kupersteine45af542015-06-30 13:36:19 +0000961static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000962_mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000963{
David Blaikie3302f2b2013-01-16 23:08:36 +0000964 return (__m256i)(__v4di){ __d, __c, __b, __a };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000965}
966
967/* Create vectors with elements in reverse order */
Michael Kupersteine45af542015-06-30 13:36:19 +0000968static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000969_mm256_setr_pd(double __a, double __b, double __c, double __d)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000970{
David Blaikie3302f2b2013-01-16 23:08:36 +0000971 return (__m256d){ __a, __b, __c, __d };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000972}
973
Michael Kupersteine45af542015-06-30 13:36:19 +0000974static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000975_mm256_setr_ps(float __a, float __b, float __c, float __d,
Craig Topper9fee8ab2015-01-31 06:33:59 +0000976 float __e, float __f, float __g, float __h)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000977{
David Blaikie3302f2b2013-01-16 23:08:36 +0000978 return (__m256){ __a, __b, __c, __d, __e, __f, __g, __h };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000979}
980
Michael Kupersteine45af542015-06-30 13:36:19 +0000981static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000982_mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3,
Craig Topper9fee8ab2015-01-31 06:33:59 +0000983 int __i4, int __i5, int __i6, int __i7)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000984{
David Blaikie3302f2b2013-01-16 23:08:36 +0000985 return (__m256i)(__v8si){ __i0, __i1, __i2, __i3, __i4, __i5, __i6, __i7 };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000986}
987
Michael Kupersteine45af542015-06-30 13:36:19 +0000988static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000989_mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12,
Craig Topper9fee8ab2015-01-31 06:33:59 +0000990 short __w11, short __w10, short __w09, short __w08,
991 short __w07, short __w06, short __w05, short __w04,
992 short __w03, short __w02, short __w01, short __w00)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000993{
David Blaikie3302f2b2013-01-16 23:08:36 +0000994 return (__m256i)(__v16hi){ __w15, __w14, __w13, __w12, __w11, __w10, __w09,
995 __w08, __w07, __w06, __w05, __w04, __w03, __w02, __w01, __w00 };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000996}
997
Michael Kupersteine45af542015-06-30 13:36:19 +0000998static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000999_mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28,
Craig Topper9fee8ab2015-01-31 06:33:59 +00001000 char __b27, char __b26, char __b25, char __b24,
1001 char __b23, char __b22, char __b21, char __b20,
1002 char __b19, char __b18, char __b17, char __b16,
1003 char __b15, char __b14, char __b13, char __b12,
1004 char __b11, char __b10, char __b09, char __b08,
1005 char __b07, char __b06, char __b05, char __b04,
1006 char __b03, char __b02, char __b01, char __b00)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001007{
1008 return (__m256i)(__v32qi){
David Blaikie3302f2b2013-01-16 23:08:36 +00001009 __b31, __b30, __b29, __b28, __b27, __b26, __b25, __b24,
Craig Topper9fee8ab2015-01-31 06:33:59 +00001010 __b23, __b22, __b21, __b20, __b19, __b18, __b17, __b16,
1011 __b15, __b14, __b13, __b12, __b11, __b10, __b09, __b08,
1012 __b07, __b06, __b05, __b04, __b03, __b02, __b01, __b00 };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001013}
1014
Michael Kupersteine45af542015-06-30 13:36:19 +00001015static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001016_mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001017{
David Blaikie3302f2b2013-01-16 23:08:36 +00001018 return (__m256i)(__v4di){ __a, __b, __c, __d };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001019}
1020
1021/* Create vectors with repeated elements */
Michael Kupersteine45af542015-06-30 13:36:19 +00001022static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001023_mm256_set1_pd(double __w)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001024{
David Blaikie3302f2b2013-01-16 23:08:36 +00001025 return (__m256d){ __w, __w, __w, __w };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001026}
1027
Michael Kupersteine45af542015-06-30 13:36:19 +00001028static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001029_mm256_set1_ps(float __w)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001030{
David Blaikie3302f2b2013-01-16 23:08:36 +00001031 return (__m256){ __w, __w, __w, __w, __w, __w, __w, __w };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001032}
1033
Michael Kupersteine45af542015-06-30 13:36:19 +00001034static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001035_mm256_set1_epi32(int __i)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001036{
David Blaikie3302f2b2013-01-16 23:08:36 +00001037 return (__m256i)(__v8si){ __i, __i, __i, __i, __i, __i, __i, __i };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001038}
1039
Michael Kupersteine45af542015-06-30 13:36:19 +00001040static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001041_mm256_set1_epi16(short __w)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001042{
David Blaikie3302f2b2013-01-16 23:08:36 +00001043 return (__m256i)(__v16hi){ __w, __w, __w, __w, __w, __w, __w, __w, __w, __w,
1044 __w, __w, __w, __w, __w, __w };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001045}
1046
Michael Kupersteine45af542015-06-30 13:36:19 +00001047static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001048_mm256_set1_epi8(char __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001049{
David Blaikie3302f2b2013-01-16 23:08:36 +00001050 return (__m256i)(__v32qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b,
1051 __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b,
1052 __b, __b, __b, __b, __b, __b, __b };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001053}
1054
Michael Kupersteine45af542015-06-30 13:36:19 +00001055static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001056_mm256_set1_epi64x(long long __q)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001057{
David Blaikie3302f2b2013-01-16 23:08:36 +00001058 return (__m256i)(__v4di){ __q, __q, __q, __q };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001059}
1060
David Blaikie3302f2b2013-01-16 23:08:36 +00001061/* Create __zeroed vectors */
Michael Kupersteine45af542015-06-30 13:36:19 +00001062static __inline __m256d __DEFAULT_FN_ATTRS
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001063_mm256_setzero_pd(void)
1064{
1065 return (__m256d){ 0, 0, 0, 0 };
1066}
1067
Michael Kupersteine45af542015-06-30 13:36:19 +00001068static __inline __m256 __DEFAULT_FN_ATTRS
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001069_mm256_setzero_ps(void)
1070{
1071 return (__m256){ 0, 0, 0, 0, 0, 0, 0, 0 };
1072}
1073
Michael Kupersteine45af542015-06-30 13:36:19 +00001074static __inline __m256i __DEFAULT_FN_ATTRS
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001075_mm256_setzero_si256(void)
1076{
1077 return (__m256i){ 0LL, 0LL, 0LL, 0LL };
1078}
1079
1080/* Cast between vector types */
Michael Kupersteine45af542015-06-30 13:36:19 +00001081static __inline __m256 __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001082_mm256_castpd_ps(__m256d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001083{
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001084 return (__m256)__a;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001085}
1086
Michael Kupersteine45af542015-06-30 13:36:19 +00001087static __inline __m256i __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001088_mm256_castpd_si256(__m256d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001089{
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001090 return (__m256i)__a;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001091}
1092
Michael Kupersteine45af542015-06-30 13:36:19 +00001093static __inline __m256d __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001094_mm256_castps_pd(__m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001095{
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001096 return (__m256d)__a;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001097}
1098
Michael Kupersteine45af542015-06-30 13:36:19 +00001099static __inline __m256i __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001100_mm256_castps_si256(__m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001101{
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001102 return (__m256i)__a;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001103}
1104
Michael Kupersteine45af542015-06-30 13:36:19 +00001105static __inline __m256 __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001106_mm256_castsi256_ps(__m256i __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001107{
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001108 return (__m256)__a;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001109}
1110
Michael Kupersteine45af542015-06-30 13:36:19 +00001111static __inline __m256d __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001112_mm256_castsi256_pd(__m256i __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001113{
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001114 return (__m256d)__a;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001115}
1116
Michael Kupersteine45af542015-06-30 13:36:19 +00001117static __inline __m128d __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001118_mm256_castpd256_pd128(__m256d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001119{
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001120 return __builtin_shufflevector(__a, __a, 0, 1);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001121}
1122
Michael Kupersteine45af542015-06-30 13:36:19 +00001123static __inline __m128 __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001124_mm256_castps256_ps128(__m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001125{
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001126 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001127}
1128
Michael Kupersteine45af542015-06-30 13:36:19 +00001129static __inline __m128i __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001130_mm256_castsi256_si128(__m256i __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001131{
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001132 return __builtin_shufflevector(__a, __a, 0, 1);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001133}
1134
Michael Kupersteine45af542015-06-30 13:36:19 +00001135static __inline __m256d __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001136_mm256_castpd128_pd256(__m128d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001137{
Craig Topperc5244512013-08-05 06:17:21 +00001138 return __builtin_shufflevector(__a, __a, 0, 1, -1, -1);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001139}
1140
Michael Kupersteine45af542015-06-30 13:36:19 +00001141static __inline __m256 __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001142_mm256_castps128_ps256(__m128 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001143{
Craig Topperc5244512013-08-05 06:17:21 +00001144 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001145}
1146
Michael Kupersteine45af542015-06-30 13:36:19 +00001147static __inline __m256i __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001148_mm256_castsi128_si256(__m128i __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001149{
Craig Topperc5244512013-08-05 06:17:21 +00001150 return __builtin_shufflevector(__a, __a, 0, 1, -1, -1);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001151}
Chad Rosierf8df4f42012-03-20 16:40:00 +00001152
Sean Silvae4c37602015-09-12 02:55:19 +00001153/*
Sanjay Patel7f6aa522015-03-10 15:19:26 +00001154 Vector insert.
1155 We use macros rather than inlines because we only want to accept
1156 invocations where the immediate M is a constant expression.
1157*/
1158#define _mm256_insertf128_ps(V1, V2, M) __extension__ ({ \
1159 (__m256)__builtin_shufflevector( \
1160 (__v8sf)(V1), \
1161 (__v8sf)_mm256_castps128_ps256((__m128)(V2)), \
1162 (((M) & 1) ? 0 : 8), \
1163 (((M) & 1) ? 1 : 9), \
1164 (((M) & 1) ? 2 : 10), \
1165 (((M) & 1) ? 3 : 11), \
1166 (((M) & 1) ? 8 : 4), \
1167 (((M) & 1) ? 9 : 5), \
1168 (((M) & 1) ? 10 : 6), \
1169 (((M) & 1) ? 11 : 7) );})
1170
1171#define _mm256_insertf128_pd(V1, V2, M) __extension__ ({ \
1172 (__m256d)__builtin_shufflevector( \
1173 (__v4df)(V1), \
1174 (__v4df)_mm256_castpd128_pd256((__m128d)(V2)), \
1175 (((M) & 1) ? 0 : 4), \
1176 (((M) & 1) ? 1 : 5), \
1177 (((M) & 1) ? 4 : 2), \
1178 (((M) & 1) ? 5 : 3) );})
1179
1180#define _mm256_insertf128_si256(V1, V2, M) __extension__ ({ \
1181 (__m256i)__builtin_shufflevector( \
1182 (__v4di)(V1), \
1183 (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \
1184 (((M) & 1) ? 0 : 4), \
1185 (((M) & 1) ? 1 : 5), \
1186 (((M) & 1) ? 4 : 2), \
1187 (((M) & 1) ? 5 : 3) );})
1188
Sean Silvae4c37602015-09-12 02:55:19 +00001189/*
Sanjay Patel0c351ab2015-03-12 15:50:36 +00001190 Vector extract.
1191 We use macros rather than inlines because we only want to accept
1192 invocations where the immediate M is a constant expression.
1193*/
1194#define _mm256_extractf128_ps(V, M) __extension__ ({ \
1195 (__m128)__builtin_shufflevector( \
1196 (__v8sf)(V), \
Sanjay Patelf204b002015-03-12 17:23:46 +00001197 (__v8sf)(_mm256_setzero_ps()), \
Sanjay Patel0c351ab2015-03-12 15:50:36 +00001198 (((M) & 1) ? 4 : 0), \
1199 (((M) & 1) ? 5 : 1), \
1200 (((M) & 1) ? 6 : 2), \
1201 (((M) & 1) ? 7 : 3) );})
1202
1203#define _mm256_extractf128_pd(V, M) __extension__ ({ \
1204 (__m128d)__builtin_shufflevector( \
1205 (__v4df)(V), \
Sanjay Patelf204b002015-03-12 17:23:46 +00001206 (__v4df)(_mm256_setzero_pd()), \
Sanjay Patel0c351ab2015-03-12 15:50:36 +00001207 (((M) & 1) ? 2 : 0), \
1208 (((M) & 1) ? 3 : 1) );})
1209
1210#define _mm256_extractf128_si256(V, M) __extension__ ({ \
1211 (__m128i)__builtin_shufflevector( \
1212 (__v4di)(V), \
Sanjay Patelf204b002015-03-12 17:23:46 +00001213 (__v4di)(_mm256_setzero_si256()), \
Sanjay Patel0c351ab2015-03-12 15:50:36 +00001214 (((M) & 1) ? 2 : 0), \
1215 (((M) & 1) ? 3 : 1) );})
1216
Chad Rosierf8df4f42012-03-20 16:40:00 +00001217/* SIMD load ops (unaligned) */
Michael Kupersteine45af542015-06-30 13:36:19 +00001218static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001219_mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo)
Chad Rosierf8df4f42012-03-20 16:40:00 +00001220{
1221 struct __loadu_ps {
David Blaikie3302f2b2013-01-16 23:08:36 +00001222 __m128 __v;
Chad Rosierf8df4f42012-03-20 16:40:00 +00001223 } __attribute__((__packed__, __may_alias__));
1224
David Blaikie3302f2b2013-01-16 23:08:36 +00001225 __m256 __v256 = _mm256_castps128_ps256(((struct __loadu_ps*)__addr_lo)->__v);
1226 return _mm256_insertf128_ps(__v256, ((struct __loadu_ps*)__addr_hi)->__v, 1);
Chad Rosierf8df4f42012-03-20 16:40:00 +00001227}
1228
Michael Kupersteine45af542015-06-30 13:36:19 +00001229static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001230_mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo)
Chad Rosierf8df4f42012-03-20 16:40:00 +00001231{
1232 struct __loadu_pd {
David Blaikie3302f2b2013-01-16 23:08:36 +00001233 __m128d __v;
Chad Rosierf8df4f42012-03-20 16:40:00 +00001234 } __attribute__((__packed__, __may_alias__));
Sean Silvae4c37602015-09-12 02:55:19 +00001235
David Blaikie3302f2b2013-01-16 23:08:36 +00001236 __m256d __v256 = _mm256_castpd128_pd256(((struct __loadu_pd*)__addr_lo)->__v);
1237 return _mm256_insertf128_pd(__v256, ((struct __loadu_pd*)__addr_hi)->__v, 1);
Chad Rosierf8df4f42012-03-20 16:40:00 +00001238}
1239
Michael Kupersteine45af542015-06-30 13:36:19 +00001240static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001241_mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo)
Chad Rosierf8df4f42012-03-20 16:40:00 +00001242{
1243 struct __loadu_si128 {
David Blaikie3302f2b2013-01-16 23:08:36 +00001244 __m128i __v;
David Majnemer1cf22e62015-02-04 00:26:10 +00001245 } __attribute__((__packed__, __may_alias__));
David Blaikie3302f2b2013-01-16 23:08:36 +00001246 __m256i __v256 = _mm256_castsi128_si256(
1247 ((struct __loadu_si128*)__addr_lo)->__v);
1248 return _mm256_insertf128_si256(__v256,
1249 ((struct __loadu_si128*)__addr_hi)->__v, 1);
Chad Rosierf8df4f42012-03-20 16:40:00 +00001250}
1251
1252/* SIMD store ops (unaligned) */
Michael Kupersteine45af542015-06-30 13:36:19 +00001253static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001254_mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a)
Chad Rosierf8df4f42012-03-20 16:40:00 +00001255{
David Blaikie3302f2b2013-01-16 23:08:36 +00001256 __m128 __v128;
Chad Rosierf8df4f42012-03-20 16:40:00 +00001257
David Blaikie3302f2b2013-01-16 23:08:36 +00001258 __v128 = _mm256_castps256_ps128(__a);
1259 __builtin_ia32_storeups(__addr_lo, __v128);
1260 __v128 = _mm256_extractf128_ps(__a, 1);
1261 __builtin_ia32_storeups(__addr_hi, __v128);
Chad Rosierf8df4f42012-03-20 16:40:00 +00001262}
1263
Michael Kupersteine45af542015-06-30 13:36:19 +00001264static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001265_mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a)
Chad Rosierf8df4f42012-03-20 16:40:00 +00001266{
David Blaikie3302f2b2013-01-16 23:08:36 +00001267 __m128d __v128;
Chad Rosierf8df4f42012-03-20 16:40:00 +00001268
David Blaikie3302f2b2013-01-16 23:08:36 +00001269 __v128 = _mm256_castpd256_pd128(__a);
1270 __builtin_ia32_storeupd(__addr_lo, __v128);
1271 __v128 = _mm256_extractf128_pd(__a, 1);
1272 __builtin_ia32_storeupd(__addr_hi, __v128);
Chad Rosierf8df4f42012-03-20 16:40:00 +00001273}
1274
Michael Kupersteine45af542015-06-30 13:36:19 +00001275static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001276_mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a)
Chad Rosierf8df4f42012-03-20 16:40:00 +00001277{
David Blaikie3302f2b2013-01-16 23:08:36 +00001278 __m128i __v128;
Chad Rosierf8df4f42012-03-20 16:40:00 +00001279
David Blaikie3302f2b2013-01-16 23:08:36 +00001280 __v128 = _mm256_castsi256_si128(__a);
1281 __builtin_ia32_storedqu((char *)__addr_lo, (__v16qi)__v128);
1282 __v128 = _mm256_extractf128_si256(__a, 1);
1283 __builtin_ia32_storedqu((char *)__addr_hi, (__v16qi)__v128);
Chad Rosierf8df4f42012-03-20 16:40:00 +00001284}
Richard Smith49e56442013-07-14 05:41:45 +00001285
Michael Kupersteine45af542015-06-30 13:36:19 +00001286static __inline __m256 __DEFAULT_FN_ATTRS
Michael Kuperstein76190042015-05-20 07:46:52 +00001287_mm256_set_m128 (__m128 __hi, __m128 __lo) {
1288 return (__m256) __builtin_shufflevector(__lo, __hi, 0, 1, 2, 3, 4, 5, 6, 7);
1289}
1290
Michael Kupersteine45af542015-06-30 13:36:19 +00001291static __inline __m256d __DEFAULT_FN_ATTRS
Michael Kuperstein76190042015-05-20 07:46:52 +00001292_mm256_set_m128d (__m128d __hi, __m128d __lo) {
1293 return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo);
1294}
1295
Michael Kupersteine45af542015-06-30 13:36:19 +00001296static __inline __m256i __DEFAULT_FN_ATTRS
Michael Kuperstein76190042015-05-20 07:46:52 +00001297_mm256_set_m128i (__m128i __hi, __m128i __lo) {
1298 return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo);
1299}
1300
Michael Kupersteine45af542015-06-30 13:36:19 +00001301static __inline __m256 __DEFAULT_FN_ATTRS
Michael Kuperstein76190042015-05-20 07:46:52 +00001302_mm256_setr_m128 (__m128 __lo, __m128 __hi) {
1303 return _mm256_set_m128(__hi, __lo);
1304}
1305
Michael Kupersteine45af542015-06-30 13:36:19 +00001306static __inline __m256d __DEFAULT_FN_ATTRS
Michael Kuperstein76190042015-05-20 07:46:52 +00001307_mm256_setr_m128d (__m128d __lo, __m128d __hi) {
1308 return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo);
1309}
1310
Michael Kupersteine45af542015-06-30 13:36:19 +00001311static __inline __m256i __DEFAULT_FN_ATTRS
Michael Kuperstein76190042015-05-20 07:46:52 +00001312_mm256_setr_m128i (__m128i __lo, __m128i __hi) {
1313 return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo);
1314}
1315
Michael Kupersteine45af542015-06-30 13:36:19 +00001316#undef __DEFAULT_FN_ATTRS
Eric Christopher4d1851682015-06-17 07:09:20 +00001317
Richard Smith49e56442013-07-14 05:41:45 +00001318#endif /* __AVXINTRIN_H */