blob: 7e4de9d443cff1c8279e8cdd912f412b1505157f [file] [log] [blame]
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001/*===---- avxintrin.h - AVX intrinsics -------------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
Benjamin Kramer6f35f3c2010-08-20 23:00:03 +000024#ifndef __IMMINTRIN_H
25#error "Never use <avxintrin.h> directly; include <immintrin.h> instead."
26#endif
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000027
Richard Smith49e56442013-07-14 05:41:45 +000028#ifndef __AVXINTRIN_H
29#define __AVXINTRIN_H
30
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000031typedef double __v4df __attribute__ ((__vector_size__ (32)));
32typedef float __v8sf __attribute__ ((__vector_size__ (32)));
33typedef long long __v4di __attribute__ ((__vector_size__ (32)));
34typedef int __v8si __attribute__ ((__vector_size__ (32)));
35typedef short __v16hi __attribute__ ((__vector_size__ (32)));
36typedef char __v32qi __attribute__ ((__vector_size__ (32)));
37
Chandler Carruthcbe64112015-10-01 23:40:12 +000038/* We need an explicitly signed variant for char. Note that this shouldn't
39 * appear in the interface though. */
40typedef signed char __v32qs __attribute__((__vector_size__(32)));
41
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000042typedef float __m256 __attribute__ ((__vector_size__ (32)));
43typedef double __m256d __attribute__((__vector_size__(32)));
44typedef long long __m256i __attribute__((__vector_size__(32)));
45
Eric Christopher4d1851682015-06-17 07:09:20 +000046/* Define the default attributes for the functions in this file. */
Michael Kupersteine45af542015-06-30 13:36:19 +000047#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx")))
Eric Christopher4d1851682015-06-17 07:09:20 +000048
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000049/* Arithmetic */
Michael Kupersteine45af542015-06-30 13:36:19 +000050static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +000051_mm256_add_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000052{
David Blaikie3302f2b2013-01-16 23:08:36 +000053 return __a+__b;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000054}
55
Michael Kupersteine45af542015-06-30 13:36:19 +000056static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +000057_mm256_add_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000058{
David Blaikie3302f2b2013-01-16 23:08:36 +000059 return __a+__b;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000060}
61
Michael Kupersteine45af542015-06-30 13:36:19 +000062static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +000063_mm256_sub_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000064{
David Blaikie3302f2b2013-01-16 23:08:36 +000065 return __a-__b;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000066}
67
Michael Kupersteine45af542015-06-30 13:36:19 +000068static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +000069_mm256_sub_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000070{
David Blaikie3302f2b2013-01-16 23:08:36 +000071 return __a-__b;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000072}
73
Michael Kupersteine45af542015-06-30 13:36:19 +000074static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +000075_mm256_addsub_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000076{
David Blaikie3302f2b2013-01-16 23:08:36 +000077 return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000078}
79
Michael Kupersteine45af542015-06-30 13:36:19 +000080static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +000081_mm256_addsub_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000082{
David Blaikie3302f2b2013-01-16 23:08:36 +000083 return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000084}
85
Michael Kupersteine45af542015-06-30 13:36:19 +000086static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +000087_mm256_div_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000088{
David Blaikie3302f2b2013-01-16 23:08:36 +000089 return __a / __b;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000090}
91
Michael Kupersteine45af542015-06-30 13:36:19 +000092static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +000093_mm256_div_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000094{
David Blaikie3302f2b2013-01-16 23:08:36 +000095 return __a / __b;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +000096}
97
Michael Kupersteine45af542015-06-30 13:36:19 +000098static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +000099_mm256_max_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000100{
David Blaikie3302f2b2013-01-16 23:08:36 +0000101 return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000102}
103
Michael Kupersteine45af542015-06-30 13:36:19 +0000104static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000105_mm256_max_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000106{
David Blaikie3302f2b2013-01-16 23:08:36 +0000107 return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000108}
109
Michael Kupersteine45af542015-06-30 13:36:19 +0000110static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000111_mm256_min_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000112{
David Blaikie3302f2b2013-01-16 23:08:36 +0000113 return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000114}
115
Michael Kupersteine45af542015-06-30 13:36:19 +0000116static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000117_mm256_min_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000118{
David Blaikie3302f2b2013-01-16 23:08:36 +0000119 return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000120}
121
Michael Kupersteine45af542015-06-30 13:36:19 +0000122static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000123_mm256_mul_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000124{
David Blaikie3302f2b2013-01-16 23:08:36 +0000125 return __a * __b;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000126}
127
Michael Kupersteine45af542015-06-30 13:36:19 +0000128static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000129_mm256_mul_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000130{
David Blaikie3302f2b2013-01-16 23:08:36 +0000131 return __a * __b;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000132}
133
Michael Kupersteine45af542015-06-30 13:36:19 +0000134static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000135_mm256_sqrt_pd(__m256d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000136{
David Blaikie3302f2b2013-01-16 23:08:36 +0000137 return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000138}
139
Michael Kupersteine45af542015-06-30 13:36:19 +0000140static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000141_mm256_sqrt_ps(__m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000142{
David Blaikie3302f2b2013-01-16 23:08:36 +0000143 return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000144}
145
Michael Kupersteine45af542015-06-30 13:36:19 +0000146static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000147_mm256_rsqrt_ps(__m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000148{
David Blaikie3302f2b2013-01-16 23:08:36 +0000149 return (__m256)__builtin_ia32_rsqrtps256((__v8sf)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000150}
151
Michael Kupersteine45af542015-06-30 13:36:19 +0000152static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000153_mm256_rcp_ps(__m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000154{
David Blaikie3302f2b2013-01-16 23:08:36 +0000155 return (__m256)__builtin_ia32_rcpps256((__v8sf)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000156}
157
Chad Rosier060d03b2011-12-17 00:15:26 +0000158#define _mm256_round_pd(V, M) __extension__ ({ \
159 __m256d __V = (V); \
Craig Topper9f009482011-12-24 07:55:14 +0000160 (__m256d)__builtin_ia32_roundpd256((__v4df)__V, (M)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000161
Chad Rosier060d03b2011-12-17 00:15:26 +0000162#define _mm256_round_ps(V, M) __extension__ ({ \
163 __m256 __V = (V); \
Craig Topper9f009482011-12-24 07:55:14 +0000164 (__m256)__builtin_ia32_roundps256((__v8sf)__V, (M)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000165
166#define _mm256_ceil_pd(V) _mm256_round_pd((V), _MM_FROUND_CEIL)
167#define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR)
168#define _mm256_ceil_ps(V) _mm256_round_ps((V), _MM_FROUND_CEIL)
169#define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR)
170
171/* Logical */
Michael Kupersteine45af542015-06-30 13:36:19 +0000172static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000173_mm256_and_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000174{
David Blaikie3302f2b2013-01-16 23:08:36 +0000175 return (__m256d)((__v4di)__a & (__v4di)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000176}
177
Michael Kupersteine45af542015-06-30 13:36:19 +0000178static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000179_mm256_and_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000180{
David Blaikie3302f2b2013-01-16 23:08:36 +0000181 return (__m256)((__v8si)__a & (__v8si)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000182}
183
Michael Kupersteine45af542015-06-30 13:36:19 +0000184static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000185_mm256_andnot_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000186{
David Blaikie3302f2b2013-01-16 23:08:36 +0000187 return (__m256d)(~(__v4di)__a & (__v4di)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000188}
189
Michael Kupersteine45af542015-06-30 13:36:19 +0000190static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000191_mm256_andnot_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000192{
David Blaikie3302f2b2013-01-16 23:08:36 +0000193 return (__m256)(~(__v8si)__a & (__v8si)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000194}
195
Michael Kupersteine45af542015-06-30 13:36:19 +0000196static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000197_mm256_or_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000198{
David Blaikie3302f2b2013-01-16 23:08:36 +0000199 return (__m256d)((__v4di)__a | (__v4di)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000200}
201
Michael Kupersteine45af542015-06-30 13:36:19 +0000202static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000203_mm256_or_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000204{
David Blaikie3302f2b2013-01-16 23:08:36 +0000205 return (__m256)((__v8si)__a | (__v8si)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000206}
207
Michael Kupersteine45af542015-06-30 13:36:19 +0000208static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000209_mm256_xor_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000210{
David Blaikie3302f2b2013-01-16 23:08:36 +0000211 return (__m256d)((__v4di)__a ^ (__v4di)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000212}
213
Michael Kupersteine45af542015-06-30 13:36:19 +0000214static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000215_mm256_xor_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000216{
David Blaikie3302f2b2013-01-16 23:08:36 +0000217 return (__m256)((__v8si)__a ^ (__v8si)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000218}
219
220/* Horizontal arithmetic */
Michael Kupersteine45af542015-06-30 13:36:19 +0000221static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000222_mm256_hadd_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000223{
David Blaikie3302f2b2013-01-16 23:08:36 +0000224 return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000225}
226
Michael Kupersteine45af542015-06-30 13:36:19 +0000227static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000228_mm256_hadd_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000229{
David Blaikie3302f2b2013-01-16 23:08:36 +0000230 return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000231}
232
Michael Kupersteine45af542015-06-30 13:36:19 +0000233static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000234_mm256_hsub_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000235{
David Blaikie3302f2b2013-01-16 23:08:36 +0000236 return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000237}
238
Michael Kupersteine45af542015-06-30 13:36:19 +0000239static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000240_mm256_hsub_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000241{
David Blaikie3302f2b2013-01-16 23:08:36 +0000242 return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000243}
244
245/* Vector permutations */
Michael Kupersteine45af542015-06-30 13:36:19 +0000246static __inline __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000247_mm_permutevar_pd(__m128d __a, __m128i __c)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000248{
David Blaikie3302f2b2013-01-16 23:08:36 +0000249 return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000250}
251
Michael Kupersteine45af542015-06-30 13:36:19 +0000252static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000253_mm256_permutevar_pd(__m256d __a, __m256i __c)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000254{
David Blaikie3302f2b2013-01-16 23:08:36 +0000255 return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000256}
257
Michael Kupersteine45af542015-06-30 13:36:19 +0000258static __inline __m128 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000259_mm_permutevar_ps(__m128 __a, __m128i __c)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000260{
David Blaikie3302f2b2013-01-16 23:08:36 +0000261 return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000262}
263
Michael Kupersteine45af542015-06-30 13:36:19 +0000264static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000265_mm256_permutevar_ps(__m256 __a, __m256i __c)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000266{
Craig Topper9fee8ab2015-01-31 06:33:59 +0000267 return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000268}
269
Chad Rosier93375d52011-12-17 01:39:56 +0000270#define _mm_permute_pd(A, C) __extension__ ({ \
271 __m128d __A = (A); \
Craig Topperfec9f8e2012-02-08 05:16:54 +0000272 (__m128d)__builtin_shufflevector((__v2df)__A, (__v2df) _mm_setzero_pd(), \
273 (C) & 0x1, ((C) & 0x2) >> 1); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000274
Chad Rosier93375d52011-12-17 01:39:56 +0000275#define _mm256_permute_pd(A, C) __extension__ ({ \
276 __m256d __A = (A); \
Craig Topperfec9f8e2012-02-08 05:16:54 +0000277 (__m256d)__builtin_shufflevector((__v4df)__A, (__v4df) _mm256_setzero_pd(), \
278 (C) & 0x1, ((C) & 0x2) >> 1, \
279 2 + (((C) & 0x4) >> 2), \
280 2 + (((C) & 0x8) >> 3)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000281
Chad Rosier7caca842011-12-17 01:51:05 +0000282#define _mm_permute_ps(A, C) __extension__ ({ \
283 __m128 __A = (A); \
Craig Topperfec9f8e2012-02-08 05:16:54 +0000284 (__m128)__builtin_shufflevector((__v4sf)__A, (__v4sf) _mm_setzero_ps(), \
285 (C) & 0x3, ((C) & 0xc) >> 2, \
Craig Topper678a53c2012-03-30 05:09:18 +0000286 ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000287
Chad Rosier7caca842011-12-17 01:51:05 +0000288#define _mm256_permute_ps(A, C) __extension__ ({ \
289 __m256 __A = (A); \
Craig Topperfec9f8e2012-02-08 05:16:54 +0000290 (__m256)__builtin_shufflevector((__v8sf)__A, (__v8sf) _mm256_setzero_ps(), \
291 (C) & 0x3, ((C) & 0xc) >> 2, \
292 ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6, \
293 4 + (((C) & 0x03) >> 0), \
294 4 + (((C) & 0x0c) >> 2), \
295 4 + (((C) & 0x30) >> 4), \
296 4 + (((C) & 0xc0) >> 6)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000297
Chad Rosier9138fea252011-12-16 21:07:34 +0000298#define _mm256_permute2f128_pd(V1, V2, M) __extension__ ({ \
299 __m256d __V1 = (V1); \
300 __m256d __V2 = (V2); \
Craig Topper26e74e52012-04-17 05:16:56 +0000301 (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)__V1, (__v4df)__V2, (M)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000302
Chad Rosier9138fea252011-12-16 21:07:34 +0000303#define _mm256_permute2f128_ps(V1, V2, M) __extension__ ({ \
304 __m256 __V1 = (V1); \
305 __m256 __V2 = (V2); \
Craig Topper26e74e52012-04-17 05:16:56 +0000306 (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)__V1, (__v8sf)__V2, (M)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000307
Chad Rosier9138fea252011-12-16 21:07:34 +0000308#define _mm256_permute2f128_si256(V1, V2, M) __extension__ ({ \
309 __m256i __V1 = (V1); \
310 __m256i __V2 = (V2); \
Craig Topper26e74e52012-04-17 05:16:56 +0000311 (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)__V1, (__v8si)__V2, (M)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000312
313/* Vector Blend */
Eli Friedmanf16beb32011-11-10 00:11:13 +0000314#define _mm256_blend_pd(V1, V2, M) __extension__ ({ \
315 __m256d __V1 = (V1); \
316 __m256d __V2 = (V2); \
Filipe Cabecinhas5d289b42014-05-13 02:37:02 +0000317 (__m256d)__builtin_shufflevector((__v4df)__V1, (__v4df)__V2, \
318 (((M) & 0x01) ? 4 : 0), \
319 (((M) & 0x02) ? 5 : 1), \
320 (((M) & 0x04) ? 6 : 2), \
321 (((M) & 0x08) ? 7 : 3)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000322
Eli Friedmanf16beb32011-11-10 00:11:13 +0000323#define _mm256_blend_ps(V1, V2, M) __extension__ ({ \
324 __m256 __V1 = (V1); \
325 __m256 __V2 = (V2); \
Filipe Cabecinhas5d289b42014-05-13 02:37:02 +0000326 (__m256)__builtin_shufflevector((__v8sf)__V1, (__v8sf)__V2, \
327 (((M) & 0x01) ? 8 : 0), \
328 (((M) & 0x02) ? 9 : 1), \
329 (((M) & 0x04) ? 10 : 2), \
330 (((M) & 0x08) ? 11 : 3), \
331 (((M) & 0x10) ? 12 : 4), \
332 (((M) & 0x20) ? 13 : 5), \
333 (((M) & 0x40) ? 14 : 6), \
334 (((M) & 0x80) ? 15 : 7)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000335
Michael Kupersteine45af542015-06-30 13:36:19 +0000336static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000337_mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000338{
David Blaikie3302f2b2013-01-16 23:08:36 +0000339 return (__m256d)__builtin_ia32_blendvpd256(
340 (__v4df)__a, (__v4df)__b, (__v4df)__c);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000341}
342
Michael Kupersteine45af542015-06-30 13:36:19 +0000343static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000344_mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000345{
David Blaikie5bb70032013-01-16 23:13:42 +0000346 return (__m256)__builtin_ia32_blendvps256(
David Blaikie3302f2b2013-01-16 23:08:36 +0000347 (__v8sf)__a, (__v8sf)__b, (__v8sf)__c);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000348}
349
350/* Vector Dot Product */
Eli Friedmanf16beb32011-11-10 00:11:13 +0000351#define _mm256_dp_ps(V1, V2, M) __extension__ ({ \
352 __m256 __V1 = (V1); \
353 __m256 __V2 = (V2); \
Craig Topper9f009482011-12-24 07:55:14 +0000354 (__m256)__builtin_ia32_dpps256((__v8sf)__V1, (__v8sf)__V2, (M)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000355
356/* Vector shuffle */
Bob Wilsonc9b97cc2011-11-05 06:08:06 +0000357#define _mm256_shuffle_ps(a, b, mask) __extension__ ({ \
358 __m256 __a = (a); \
359 __m256 __b = (b); \
360 (__m256)__builtin_shufflevector((__v8sf)__a, (__v8sf)__b, \
Bruno Cardoso Lopese712a132010-08-11 01:17:34 +0000361 (mask) & 0x3, ((mask) & 0xc) >> 2, \
Bruno Cardoso Lopes8c333152010-08-11 18:45:43 +0000362 (((mask) & 0x30) >> 4) + 8, (((mask) & 0xc0) >> 6) + 8, \
Bruno Cardoso Lopes7a98a7e2011-08-23 23:29:45 +0000363 ((mask) & 0x3) + 4, (((mask) & 0xc) >> 2) + 4, \
Bob Wilsonc9b97cc2011-11-05 06:08:06 +0000364 (((mask) & 0x30) >> 4) + 12, (((mask) & 0xc0) >> 6) + 12); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000365
Bob Wilsonc9b97cc2011-11-05 06:08:06 +0000366#define _mm256_shuffle_pd(a, b, mask) __extension__ ({ \
367 __m256d __a = (a); \
368 __m256d __b = (b); \
369 (__m256d)__builtin_shufflevector((__v4df)__a, (__v4df)__b, \
Bruno Cardoso Lopese712a132010-08-11 01:17:34 +0000370 (mask) & 0x1, \
371 (((mask) & 0x2) >> 1) + 4, \
372 (((mask) & 0x4) >> 2) + 2, \
Bob Wilsonc9b97cc2011-11-05 06:08:06 +0000373 (((mask) & 0x8) >> 3) + 6); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000374
375/* Compare */
376#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
377#define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */
378#define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */
379#define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */
380#define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */
381#define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */
382#define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */
383#define _CMP_ORD_Q 0x07 /* Ordered (nonsignaling) */
384#define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */
385#define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unord, signaling) */
386#define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */
387#define _CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */
388#define _CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */
389#define _CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */
390#define _CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */
391#define _CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */
392#define _CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */
393#define _CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */
394#define _CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */
395#define _CMP_UNORD_S 0x13 /* Unordered (signaling) */
396#define _CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */
397#define _CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */
398#define _CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unord, non-signaling) */
399#define _CMP_ORD_S 0x17 /* Ordered (signaling) */
400#define _CMP_EQ_US 0x18 /* Equal (unordered, signaling) */
401#define _CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unord, non-sign) */
402#define _CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */
403#define _CMP_FALSE_OS 0x1b /* False (ordered, signaling) */
404#define _CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */
405#define _CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */
406#define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */
407#define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */
408
Bob Wilsonc9b97cc2011-11-05 06:08:06 +0000409#define _mm_cmp_pd(a, b, c) __extension__ ({ \
410 __m128d __a = (a); \
411 __m128d __b = (b); \
412 (__m128d)__builtin_ia32_cmppd((__v2df)__a, (__v2df)__b, (c)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000413
Bob Wilsonc9b97cc2011-11-05 06:08:06 +0000414#define _mm_cmp_ps(a, b, c) __extension__ ({ \
415 __m128 __a = (a); \
416 __m128 __b = (b); \
417 (__m128)__builtin_ia32_cmpps((__v4sf)__a, (__v4sf)__b, (c)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000418
Bob Wilsonc9b97cc2011-11-05 06:08:06 +0000419#define _mm256_cmp_pd(a, b, c) __extension__ ({ \
420 __m256d __a = (a); \
421 __m256d __b = (b); \
422 (__m256d)__builtin_ia32_cmppd256((__v4df)__a, (__v4df)__b, (c)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000423
Bob Wilsonc9b97cc2011-11-05 06:08:06 +0000424#define _mm256_cmp_ps(a, b, c) __extension__ ({ \
425 __m256 __a = (a); \
426 __m256 __b = (b); \
427 (__m256)__builtin_ia32_cmpps256((__v8sf)__a, (__v8sf)__b, (c)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000428
Bob Wilsonc9b97cc2011-11-05 06:08:06 +0000429#define _mm_cmp_sd(a, b, c) __extension__ ({ \
430 __m128d __a = (a); \
431 __m128d __b = (b); \
432 (__m128d)__builtin_ia32_cmpsd((__v2df)__a, (__v2df)__b, (c)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000433
Bob Wilsonc9b97cc2011-11-05 06:08:06 +0000434#define _mm_cmp_ss(a, b, c) __extension__ ({ \
435 __m128 __a = (a); \
436 __m128 __b = (b); \
437 (__m128)__builtin_ia32_cmpss((__v4sf)__a, (__v4sf)__b, (c)); })
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000438
Michael Kupersteine45af542015-06-30 13:36:19 +0000439static __inline int __DEFAULT_FN_ATTRS
Craig Topper459554f2015-01-31 06:31:30 +0000440_mm256_extract_epi32(__m256i __a, const int __imm)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000441{
David Blaikie3302f2b2013-01-16 23:08:36 +0000442 __v8si __b = (__v8si)__a;
Manman Renc94122e2013-10-23 20:33:14 +0000443 return __b[__imm & 7];
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000444}
445
Michael Kupersteine45af542015-06-30 13:36:19 +0000446static __inline int __DEFAULT_FN_ATTRS
Craig Topper459554f2015-01-31 06:31:30 +0000447_mm256_extract_epi16(__m256i __a, const int __imm)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000448{
David Blaikie3302f2b2013-01-16 23:08:36 +0000449 __v16hi __b = (__v16hi)__a;
Manman Renc94122e2013-10-23 20:33:14 +0000450 return __b[__imm & 15];
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000451}
452
Michael Kupersteine45af542015-06-30 13:36:19 +0000453static __inline int __DEFAULT_FN_ATTRS
Craig Topper459554f2015-01-31 06:31:30 +0000454_mm256_extract_epi8(__m256i __a, const int __imm)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000455{
David Blaikie3302f2b2013-01-16 23:08:36 +0000456 __v32qi __b = (__v32qi)__a;
Manman Renc94122e2013-10-23 20:33:14 +0000457 return __b[__imm & 31];
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000458}
459
460#ifdef __x86_64__
Michael Kupersteine45af542015-06-30 13:36:19 +0000461static __inline long long __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000462_mm256_extract_epi64(__m256i __a, const int __imm)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000463{
David Blaikie3302f2b2013-01-16 23:08:36 +0000464 __v4di __b = (__v4di)__a;
Manman Renc94122e2013-10-23 20:33:14 +0000465 return __b[__imm & 3];
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000466}
467#endif
468
Michael Kupersteine45af542015-06-30 13:36:19 +0000469static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000470_mm256_insert_epi32(__m256i __a, int __b, int const __imm)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000471{
David Blaikie3302f2b2013-01-16 23:08:36 +0000472 __v8si __c = (__v8si)__a;
473 __c[__imm & 7] = __b;
474 return (__m256i)__c;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000475}
476
Michael Kupersteine45af542015-06-30 13:36:19 +0000477static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000478_mm256_insert_epi16(__m256i __a, int __b, int const __imm)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000479{
David Blaikie3302f2b2013-01-16 23:08:36 +0000480 __v16hi __c = (__v16hi)__a;
481 __c[__imm & 15] = __b;
482 return (__m256i)__c;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000483}
484
Michael Kupersteine45af542015-06-30 13:36:19 +0000485static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000486_mm256_insert_epi8(__m256i __a, int __b, int const __imm)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000487{
David Blaikie3302f2b2013-01-16 23:08:36 +0000488 __v32qi __c = (__v32qi)__a;
489 __c[__imm & 31] = __b;
490 return (__m256i)__c;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000491}
492
493#ifdef __x86_64__
Michael Kupersteine45af542015-06-30 13:36:19 +0000494static __inline __m256i __DEFAULT_FN_ATTRS
Filipe Cabecinhasd7400292015-02-19 19:00:33 +0000495_mm256_insert_epi64(__m256i __a, long long __b, int const __imm)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000496{
David Blaikie3302f2b2013-01-16 23:08:36 +0000497 __v4di __c = (__v4di)__a;
498 __c[__imm & 3] = __b;
499 return (__m256i)__c;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000500}
501#endif
502
503/* Conversion */
Michael Kupersteine45af542015-06-30 13:36:19 +0000504static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000505_mm256_cvtepi32_pd(__m128i __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000506{
David Blaikie3302f2b2013-01-16 23:08:36 +0000507 return (__m256d)__builtin_ia32_cvtdq2pd256((__v4si) __a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000508}
509
Michael Kupersteine45af542015-06-30 13:36:19 +0000510static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000511_mm256_cvtepi32_ps(__m256i __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000512{
David Blaikie3302f2b2013-01-16 23:08:36 +0000513 return (__m256)__builtin_ia32_cvtdq2ps256((__v8si) __a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000514}
515
Michael Kupersteine45af542015-06-30 13:36:19 +0000516static __inline __m128 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000517_mm256_cvtpd_ps(__m256d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000518{
David Blaikie3302f2b2013-01-16 23:08:36 +0000519 return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000520}
521
Michael Kupersteine45af542015-06-30 13:36:19 +0000522static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000523_mm256_cvtps_epi32(__m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000524{
David Blaikie3302f2b2013-01-16 23:08:36 +0000525 return (__m256i)__builtin_ia32_cvtps2dq256((__v8sf) __a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000526}
527
Michael Kupersteine45af542015-06-30 13:36:19 +0000528static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000529_mm256_cvtps_pd(__m128 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000530{
David Blaikie3302f2b2013-01-16 23:08:36 +0000531 return (__m256d)__builtin_ia32_cvtps2pd256((__v4sf) __a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000532}
533
Michael Kupersteine45af542015-06-30 13:36:19 +0000534static __inline __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000535_mm256_cvttpd_epi32(__m256d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000536{
David Blaikie3302f2b2013-01-16 23:08:36 +0000537 return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000538}
539
Michael Kupersteine45af542015-06-30 13:36:19 +0000540static __inline __m128i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000541_mm256_cvtpd_epi32(__m256d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000542{
David Blaikie3302f2b2013-01-16 23:08:36 +0000543 return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000544}
545
Michael Kupersteine45af542015-06-30 13:36:19 +0000546static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000547_mm256_cvttps_epi32(__m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000548{
David Blaikie3302f2b2013-01-16 23:08:36 +0000549 return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000550}
551
552/* Vector replicate */
Michael Kupersteine45af542015-06-30 13:36:19 +0000553static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000554_mm256_movehdup_ps(__m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000555{
David Blaikie3302f2b2013-01-16 23:08:36 +0000556 return __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000557}
558
Michael Kupersteine45af542015-06-30 13:36:19 +0000559static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000560_mm256_moveldup_ps(__m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000561{
David Blaikie3302f2b2013-01-16 23:08:36 +0000562 return __builtin_shufflevector(__a, __a, 0, 0, 2, 2, 4, 4, 6, 6);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000563}
564
Michael Kupersteine45af542015-06-30 13:36:19 +0000565static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000566_mm256_movedup_pd(__m256d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000567{
David Blaikie3302f2b2013-01-16 23:08:36 +0000568 return __builtin_shufflevector(__a, __a, 0, 0, 2, 2);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000569}
570
571/* Unpack and Interleave */
Michael Kupersteine45af542015-06-30 13:36:19 +0000572static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000573_mm256_unpackhi_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000574{
David Blaikie3302f2b2013-01-16 23:08:36 +0000575 return __builtin_shufflevector(__a, __b, 1, 5, 1+2, 5+2);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000576}
577
Michael Kupersteine45af542015-06-30 13:36:19 +0000578static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000579_mm256_unpacklo_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000580{
David Blaikie3302f2b2013-01-16 23:08:36 +0000581 return __builtin_shufflevector(__a, __b, 0, 4, 0+2, 4+2);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000582}
583
Michael Kupersteine45af542015-06-30 13:36:19 +0000584static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000585_mm256_unpackhi_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000586{
David Blaikie3302f2b2013-01-16 23:08:36 +0000587 return __builtin_shufflevector(__a, __b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000588}
589
Michael Kupersteine45af542015-06-30 13:36:19 +0000590static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000591_mm256_unpacklo_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000592{
David Blaikie3302f2b2013-01-16 23:08:36 +0000593 return __builtin_shufflevector(__a, __b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000594}
595
596/* Bit Test */
Michael Kupersteine45af542015-06-30 13:36:19 +0000597static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000598_mm_testz_pd(__m128d __a, __m128d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000599{
David Blaikie3302f2b2013-01-16 23:08:36 +0000600 return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000601}
602
Michael Kupersteine45af542015-06-30 13:36:19 +0000603static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000604_mm_testc_pd(__m128d __a, __m128d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000605{
David Blaikie3302f2b2013-01-16 23:08:36 +0000606 return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000607}
608
Michael Kupersteine45af542015-06-30 13:36:19 +0000609static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000610_mm_testnzc_pd(__m128d __a, __m128d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000611{
David Blaikie3302f2b2013-01-16 23:08:36 +0000612 return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000613}
614
Michael Kupersteine45af542015-06-30 13:36:19 +0000615static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000616_mm_testz_ps(__m128 __a, __m128 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000617{
David Blaikie3302f2b2013-01-16 23:08:36 +0000618 return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000619}
620
Michael Kupersteine45af542015-06-30 13:36:19 +0000621static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000622_mm_testc_ps(__m128 __a, __m128 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000623{
David Blaikie3302f2b2013-01-16 23:08:36 +0000624 return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000625}
626
Michael Kupersteine45af542015-06-30 13:36:19 +0000627static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000628_mm_testnzc_ps(__m128 __a, __m128 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000629{
David Blaikie3302f2b2013-01-16 23:08:36 +0000630 return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000631}
632
Michael Kupersteine45af542015-06-30 13:36:19 +0000633static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000634_mm256_testz_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000635{
David Blaikie3302f2b2013-01-16 23:08:36 +0000636 return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000637}
638
Michael Kupersteine45af542015-06-30 13:36:19 +0000639static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000640_mm256_testc_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000641{
David Blaikie3302f2b2013-01-16 23:08:36 +0000642 return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000643}
644
Michael Kupersteine45af542015-06-30 13:36:19 +0000645static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000646_mm256_testnzc_pd(__m256d __a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000647{
David Blaikie3302f2b2013-01-16 23:08:36 +0000648 return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000649}
650
Michael Kupersteine45af542015-06-30 13:36:19 +0000651static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000652_mm256_testz_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000653{
David Blaikie3302f2b2013-01-16 23:08:36 +0000654 return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000655}
656
Michael Kupersteine45af542015-06-30 13:36:19 +0000657static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000658_mm256_testc_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000659{
David Blaikie3302f2b2013-01-16 23:08:36 +0000660 return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000661}
662
Michael Kupersteine45af542015-06-30 13:36:19 +0000663static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000664_mm256_testnzc_ps(__m256 __a, __m256 __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000665{
David Blaikie3302f2b2013-01-16 23:08:36 +0000666 return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000667}
668
Michael Kupersteine45af542015-06-30 13:36:19 +0000669static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000670_mm256_testz_si256(__m256i __a, __m256i __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000671{
David Blaikie3302f2b2013-01-16 23:08:36 +0000672 return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000673}
674
Michael Kupersteine45af542015-06-30 13:36:19 +0000675static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000676_mm256_testc_si256(__m256i __a, __m256i __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000677{
David Blaikie3302f2b2013-01-16 23:08:36 +0000678 return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000679}
680
Michael Kupersteine45af542015-06-30 13:36:19 +0000681static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000682_mm256_testnzc_si256(__m256i __a, __m256i __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000683{
David Blaikie3302f2b2013-01-16 23:08:36 +0000684 return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000685}
686
687/* Vector extract sign mask */
Michael Kupersteine45af542015-06-30 13:36:19 +0000688static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000689_mm256_movemask_pd(__m256d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000690{
David Blaikie3302f2b2013-01-16 23:08:36 +0000691 return __builtin_ia32_movmskpd256((__v4df)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000692}
693
Michael Kupersteine45af542015-06-30 13:36:19 +0000694static __inline int __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000695_mm256_movemask_ps(__m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000696{
David Blaikie3302f2b2013-01-16 23:08:36 +0000697 return __builtin_ia32_movmskps256((__v8sf)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000698}
699
David Blaikie3302f2b2013-01-16 23:08:36 +0000700/* Vector __zero */
Michael Kupersteine45af542015-06-30 13:36:19 +0000701static __inline void __DEFAULT_FN_ATTRS
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000702_mm256_zeroall(void)
703{
704 __builtin_ia32_vzeroall();
705}
706
Michael Kupersteine45af542015-06-30 13:36:19 +0000707static __inline void __DEFAULT_FN_ATTRS
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000708_mm256_zeroupper(void)
709{
710 __builtin_ia32_vzeroupper();
711}
712
713/* Vector load with broadcast */
Michael Kupersteine45af542015-06-30 13:36:19 +0000714static __inline __m128 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000715_mm_broadcast_ss(float const *__a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000716{
Adam Nemet286ae082014-05-29 20:47:29 +0000717 float __f = *__a;
718 return (__m128)(__v4sf){ __f, __f, __f, __f };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000719}
720
Michael Kupersteine45af542015-06-30 13:36:19 +0000721static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000722_mm256_broadcast_sd(double const *__a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000723{
Adam Nemet286ae082014-05-29 20:47:29 +0000724 double __d = *__a;
725 return (__m256d)(__v4df){ __d, __d, __d, __d };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000726}
727
Michael Kupersteine45af542015-06-30 13:36:19 +0000728static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000729_mm256_broadcast_ss(float const *__a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000730{
Adam Nemet286ae082014-05-29 20:47:29 +0000731 float __f = *__a;
732 return (__m256)(__v8sf){ __f, __f, __f, __f, __f, __f, __f, __f };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000733}
734
Michael Kupersteine45af542015-06-30 13:36:19 +0000735static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000736_mm256_broadcast_pd(__m128d const *__a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000737{
David Blaikie3302f2b2013-01-16 23:08:36 +0000738 return (__m256d)__builtin_ia32_vbroadcastf128_pd256(__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000739}
740
Michael Kupersteine45af542015-06-30 13:36:19 +0000741static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000742_mm256_broadcast_ps(__m128 const *__a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000743{
David Blaikie3302f2b2013-01-16 23:08:36 +0000744 return (__m256)__builtin_ia32_vbroadcastf128_ps256(__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000745}
746
747/* SIMD load ops */
Michael Kupersteine45af542015-06-30 13:36:19 +0000748static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000749_mm256_load_pd(double const *__p)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000750{
David Blaikie3302f2b2013-01-16 23:08:36 +0000751 return *(__m256d *)__p;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000752}
753
Michael Kupersteine45af542015-06-30 13:36:19 +0000754static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000755_mm256_load_ps(float const *__p)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000756{
David Blaikie3302f2b2013-01-16 23:08:36 +0000757 return *(__m256 *)__p;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000758}
759
Michael Kupersteine45af542015-06-30 13:36:19 +0000760static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000761_mm256_loadu_pd(double const *__p)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000762{
Craig Topper9e9301a2012-01-25 04:26:17 +0000763 struct __loadu_pd {
David Blaikie3302f2b2013-01-16 23:08:36 +0000764 __m256d __v;
David Majnemer1cf22e62015-02-04 00:26:10 +0000765 } __attribute__((__packed__, __may_alias__));
David Blaikie3302f2b2013-01-16 23:08:36 +0000766 return ((struct __loadu_pd*)__p)->__v;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000767}
768
Michael Kupersteine45af542015-06-30 13:36:19 +0000769static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000770_mm256_loadu_ps(float const *__p)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000771{
Craig Topper9e9301a2012-01-25 04:26:17 +0000772 struct __loadu_ps {
David Blaikie3302f2b2013-01-16 23:08:36 +0000773 __m256 __v;
David Majnemer1cf22e62015-02-04 00:26:10 +0000774 } __attribute__((__packed__, __may_alias__));
David Blaikie3302f2b2013-01-16 23:08:36 +0000775 return ((struct __loadu_ps*)__p)->__v;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000776}
777
Michael Kupersteine45af542015-06-30 13:36:19 +0000778static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000779_mm256_load_si256(__m256i const *__p)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000780{
David Blaikie3302f2b2013-01-16 23:08:36 +0000781 return *__p;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000782}
783
Michael Kupersteine45af542015-06-30 13:36:19 +0000784static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000785_mm256_loadu_si256(__m256i const *__p)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000786{
Craig Topper9e9301a2012-01-25 04:26:17 +0000787 struct __loadu_si256 {
David Blaikie3302f2b2013-01-16 23:08:36 +0000788 __m256i __v;
David Majnemer1cf22e62015-02-04 00:26:10 +0000789 } __attribute__((__packed__, __may_alias__));
David Blaikie3302f2b2013-01-16 23:08:36 +0000790 return ((struct __loadu_si256*)__p)->__v;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000791}
792
Michael Kupersteine45af542015-06-30 13:36:19 +0000793static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000794_mm256_lddqu_si256(__m256i const *__p)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000795{
David Blaikie3302f2b2013-01-16 23:08:36 +0000796 return (__m256i)__builtin_ia32_lddqu256((char const *)__p);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000797}
798
799/* SIMD store ops */
Michael Kupersteine45af542015-06-30 13:36:19 +0000800static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000801_mm256_store_pd(double *__p, __m256d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000802{
David Blaikie3302f2b2013-01-16 23:08:36 +0000803 *(__m256d *)__p = __a;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000804}
805
Michael Kupersteine45af542015-06-30 13:36:19 +0000806static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000807_mm256_store_ps(float *__p, __m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000808{
David Blaikie3302f2b2013-01-16 23:08:36 +0000809 *(__m256 *)__p = __a;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000810}
811
Michael Kupersteine45af542015-06-30 13:36:19 +0000812static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000813_mm256_storeu_pd(double *__p, __m256d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000814{
David Blaikie3302f2b2013-01-16 23:08:36 +0000815 __builtin_ia32_storeupd256(__p, (__v4df)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000816}
817
Michael Kupersteine45af542015-06-30 13:36:19 +0000818static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000819_mm256_storeu_ps(float *__p, __m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000820{
David Blaikie3302f2b2013-01-16 23:08:36 +0000821 __builtin_ia32_storeups256(__p, (__v8sf)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000822}
823
Michael Kupersteine45af542015-06-30 13:36:19 +0000824static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000825_mm256_store_si256(__m256i *__p, __m256i __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000826{
David Blaikie3302f2b2013-01-16 23:08:36 +0000827 *__p = __a;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000828}
829
Michael Kupersteine45af542015-06-30 13:36:19 +0000830static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000831_mm256_storeu_si256(__m256i *__p, __m256i __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000832{
David Blaikie3302f2b2013-01-16 23:08:36 +0000833 __builtin_ia32_storedqu256((char *)__p, (__v32qi)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000834}
835
836/* Conditional load ops */
Michael Kupersteine45af542015-06-30 13:36:19 +0000837static __inline __m128d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000838_mm_maskload_pd(double const *__p, __m128d __m)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000839{
David Blaikie3302f2b2013-01-16 23:08:36 +0000840 return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2df)__m);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000841}
842
Michael Kupersteine45af542015-06-30 13:36:19 +0000843static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000844_mm256_maskload_pd(double const *__p, __m256d __m)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000845{
David Blaikie3302f2b2013-01-16 23:08:36 +0000846 return (__m256d)__builtin_ia32_maskloadpd256((const __v4df *)__p,
847 (__v4df)__m);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000848}
849
Michael Kupersteine45af542015-06-30 13:36:19 +0000850static __inline __m128 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000851_mm_maskload_ps(float const *__p, __m128 __m)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000852{
David Blaikie3302f2b2013-01-16 23:08:36 +0000853 return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4sf)__m);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000854}
855
Michael Kupersteine45af542015-06-30 13:36:19 +0000856static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000857_mm256_maskload_ps(float const *__p, __m256 __m)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000858{
David Blaikie3302f2b2013-01-16 23:08:36 +0000859 return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8sf)__m);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000860}
861
862/* Conditional store ops */
Michael Kupersteine45af542015-06-30 13:36:19 +0000863static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000864_mm256_maskstore_ps(float *__p, __m256 __m, __m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000865{
David Blaikie3302f2b2013-01-16 23:08:36 +0000866 __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8sf)__m, (__v8sf)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000867}
868
Michael Kupersteine45af542015-06-30 13:36:19 +0000869static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000870_mm_maskstore_pd(double *__p, __m128d __m, __m128d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000871{
David Blaikie3302f2b2013-01-16 23:08:36 +0000872 __builtin_ia32_maskstorepd((__v2df *)__p, (__v2df)__m, (__v2df)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000873}
874
Michael Kupersteine45af542015-06-30 13:36:19 +0000875static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000876_mm256_maskstore_pd(double *__p, __m256d __m, __m256d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000877{
David Blaikie3302f2b2013-01-16 23:08:36 +0000878 __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4df)__m, (__v4df)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000879}
880
Michael Kupersteine45af542015-06-30 13:36:19 +0000881static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000882_mm_maskstore_ps(float *__p, __m128 __m, __m128 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000883{
David Blaikie3302f2b2013-01-16 23:08:36 +0000884 __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4sf)__m, (__v4sf)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000885}
886
887/* Cacheability support ops */
Michael Kupersteine45af542015-06-30 13:36:19 +0000888static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000889_mm256_stream_si256(__m256i *__a, __m256i __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000890{
David Blaikie3302f2b2013-01-16 23:08:36 +0000891 __builtin_ia32_movntdq256((__v4di *)__a, (__v4di)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000892}
893
Michael Kupersteine45af542015-06-30 13:36:19 +0000894static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000895_mm256_stream_pd(double *__a, __m256d __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000896{
David Blaikie3302f2b2013-01-16 23:08:36 +0000897 __builtin_ia32_movntpd256(__a, (__v4df)__b);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000898}
899
Michael Kupersteine45af542015-06-30 13:36:19 +0000900static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000901_mm256_stream_ps(float *__p, __m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000902{
David Blaikie3302f2b2013-01-16 23:08:36 +0000903 __builtin_ia32_movntps256(__p, (__v8sf)__a);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000904}
905
906/* Create vectors */
Simon Pilgrim5aba9922015-08-26 21:17:12 +0000907static __inline__ __m256d __DEFAULT_FN_ATTRS
908_mm256_undefined_pd()
909{
910 return (__m256d)__builtin_ia32_undef256();
911}
912
913static __inline__ __m256 __DEFAULT_FN_ATTRS
914_mm256_undefined_ps()
915{
916 return (__m256)__builtin_ia32_undef256();
917}
918
919static __inline__ __m256i __DEFAULT_FN_ATTRS
920_mm256_undefined_si256()
921{
922 return (__m256i)__builtin_ia32_undef256();
923}
924
Michael Kupersteine45af542015-06-30 13:36:19 +0000925static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000926_mm256_set_pd(double __a, double __b, double __c, double __d)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000927{
David Blaikie3302f2b2013-01-16 23:08:36 +0000928 return (__m256d){ __d, __c, __b, __a };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000929}
930
Michael Kupersteine45af542015-06-30 13:36:19 +0000931static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000932_mm256_set_ps(float __a, float __b, float __c, float __d,
Craig Topper9fee8ab2015-01-31 06:33:59 +0000933 float __e, float __f, float __g, float __h)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000934{
David Blaikie3302f2b2013-01-16 23:08:36 +0000935 return (__m256){ __h, __g, __f, __e, __d, __c, __b, __a };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000936}
937
Michael Kupersteine45af542015-06-30 13:36:19 +0000938static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000939_mm256_set_epi32(int __i0, int __i1, int __i2, int __i3,
Craig Topper9fee8ab2015-01-31 06:33:59 +0000940 int __i4, int __i5, int __i6, int __i7)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000941{
David Blaikie3302f2b2013-01-16 23:08:36 +0000942 return (__m256i)(__v8si){ __i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0 };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000943}
944
Michael Kupersteine45af542015-06-30 13:36:19 +0000945static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000946_mm256_set_epi16(short __w15, short __w14, short __w13, short __w12,
Craig Topper9fee8ab2015-01-31 06:33:59 +0000947 short __w11, short __w10, short __w09, short __w08,
948 short __w07, short __w06, short __w05, short __w04,
949 short __w03, short __w02, short __w01, short __w00)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000950{
David Blaikie3302f2b2013-01-16 23:08:36 +0000951 return (__m256i)(__v16hi){ __w00, __w01, __w02, __w03, __w04, __w05, __w06,
952 __w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15 };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000953}
954
Michael Kupersteine45af542015-06-30 13:36:19 +0000955static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000956_mm256_set_epi8(char __b31, char __b30, char __b29, char __b28,
Craig Topper9fee8ab2015-01-31 06:33:59 +0000957 char __b27, char __b26, char __b25, char __b24,
958 char __b23, char __b22, char __b21, char __b20,
959 char __b19, char __b18, char __b17, char __b16,
960 char __b15, char __b14, char __b13, char __b12,
961 char __b11, char __b10, char __b09, char __b08,
962 char __b07, char __b06, char __b05, char __b04,
963 char __b03, char __b02, char __b01, char __b00)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000964{
965 return (__m256i)(__v32qi){
David Blaikie3302f2b2013-01-16 23:08:36 +0000966 __b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07,
967 __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15,
968 __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23,
969 __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000970 };
971}
972
Michael Kupersteine45af542015-06-30 13:36:19 +0000973static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000974_mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000975{
David Blaikie3302f2b2013-01-16 23:08:36 +0000976 return (__m256i)(__v4di){ __d, __c, __b, __a };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000977}
978
979/* Create vectors with elements in reverse order */
Michael Kupersteine45af542015-06-30 13:36:19 +0000980static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000981_mm256_setr_pd(double __a, double __b, double __c, double __d)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000982{
David Blaikie3302f2b2013-01-16 23:08:36 +0000983 return (__m256d){ __a, __b, __c, __d };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000984}
985
Michael Kupersteine45af542015-06-30 13:36:19 +0000986static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000987_mm256_setr_ps(float __a, float __b, float __c, float __d,
Craig Topper9fee8ab2015-01-31 06:33:59 +0000988 float __e, float __f, float __g, float __h)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000989{
David Blaikie3302f2b2013-01-16 23:08:36 +0000990 return (__m256){ __a, __b, __c, __d, __e, __f, __g, __h };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000991}
992
Michael Kupersteine45af542015-06-30 13:36:19 +0000993static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +0000994_mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3,
Craig Topper9fee8ab2015-01-31 06:33:59 +0000995 int __i4, int __i5, int __i6, int __i7)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000996{
David Blaikie3302f2b2013-01-16 23:08:36 +0000997 return (__m256i)(__v8si){ __i0, __i1, __i2, __i3, __i4, __i5, __i6, __i7 };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +0000998}
999
Michael Kupersteine45af542015-06-30 13:36:19 +00001000static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001001_mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12,
Craig Topper9fee8ab2015-01-31 06:33:59 +00001002 short __w11, short __w10, short __w09, short __w08,
1003 short __w07, short __w06, short __w05, short __w04,
1004 short __w03, short __w02, short __w01, short __w00)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001005{
David Blaikie3302f2b2013-01-16 23:08:36 +00001006 return (__m256i)(__v16hi){ __w15, __w14, __w13, __w12, __w11, __w10, __w09,
1007 __w08, __w07, __w06, __w05, __w04, __w03, __w02, __w01, __w00 };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001008}
1009
Michael Kupersteine45af542015-06-30 13:36:19 +00001010static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001011_mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28,
Craig Topper9fee8ab2015-01-31 06:33:59 +00001012 char __b27, char __b26, char __b25, char __b24,
1013 char __b23, char __b22, char __b21, char __b20,
1014 char __b19, char __b18, char __b17, char __b16,
1015 char __b15, char __b14, char __b13, char __b12,
1016 char __b11, char __b10, char __b09, char __b08,
1017 char __b07, char __b06, char __b05, char __b04,
1018 char __b03, char __b02, char __b01, char __b00)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001019{
1020 return (__m256i)(__v32qi){
David Blaikie3302f2b2013-01-16 23:08:36 +00001021 __b31, __b30, __b29, __b28, __b27, __b26, __b25, __b24,
Craig Topper9fee8ab2015-01-31 06:33:59 +00001022 __b23, __b22, __b21, __b20, __b19, __b18, __b17, __b16,
1023 __b15, __b14, __b13, __b12, __b11, __b10, __b09, __b08,
1024 __b07, __b06, __b05, __b04, __b03, __b02, __b01, __b00 };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001025}
1026
Michael Kupersteine45af542015-06-30 13:36:19 +00001027static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001028_mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001029{
David Blaikie3302f2b2013-01-16 23:08:36 +00001030 return (__m256i)(__v4di){ __a, __b, __c, __d };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001031}
1032
1033/* Create vectors with repeated elements */
Michael Kupersteine45af542015-06-30 13:36:19 +00001034static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001035_mm256_set1_pd(double __w)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001036{
David Blaikie3302f2b2013-01-16 23:08:36 +00001037 return (__m256d){ __w, __w, __w, __w };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001038}
1039
Michael Kupersteine45af542015-06-30 13:36:19 +00001040static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001041_mm256_set1_ps(float __w)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001042{
David Blaikie3302f2b2013-01-16 23:08:36 +00001043 return (__m256){ __w, __w, __w, __w, __w, __w, __w, __w };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001044}
1045
Michael Kupersteine45af542015-06-30 13:36:19 +00001046static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001047_mm256_set1_epi32(int __i)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001048{
David Blaikie3302f2b2013-01-16 23:08:36 +00001049 return (__m256i)(__v8si){ __i, __i, __i, __i, __i, __i, __i, __i };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001050}
1051
Michael Kupersteine45af542015-06-30 13:36:19 +00001052static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001053_mm256_set1_epi16(short __w)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001054{
David Blaikie3302f2b2013-01-16 23:08:36 +00001055 return (__m256i)(__v16hi){ __w, __w, __w, __w, __w, __w, __w, __w, __w, __w,
1056 __w, __w, __w, __w, __w, __w };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001057}
1058
Michael Kupersteine45af542015-06-30 13:36:19 +00001059static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001060_mm256_set1_epi8(char __b)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001061{
David Blaikie3302f2b2013-01-16 23:08:36 +00001062 return (__m256i)(__v32qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b,
1063 __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b,
1064 __b, __b, __b, __b, __b, __b, __b };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001065}
1066
Michael Kupersteine45af542015-06-30 13:36:19 +00001067static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001068_mm256_set1_epi64x(long long __q)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001069{
David Blaikie3302f2b2013-01-16 23:08:36 +00001070 return (__m256i)(__v4di){ __q, __q, __q, __q };
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001071}
1072
David Blaikie3302f2b2013-01-16 23:08:36 +00001073/* Create __zeroed vectors */
Michael Kupersteine45af542015-06-30 13:36:19 +00001074static __inline __m256d __DEFAULT_FN_ATTRS
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001075_mm256_setzero_pd(void)
1076{
1077 return (__m256d){ 0, 0, 0, 0 };
1078}
1079
Michael Kupersteine45af542015-06-30 13:36:19 +00001080static __inline __m256 __DEFAULT_FN_ATTRS
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001081_mm256_setzero_ps(void)
1082{
1083 return (__m256){ 0, 0, 0, 0, 0, 0, 0, 0 };
1084}
1085
Michael Kupersteine45af542015-06-30 13:36:19 +00001086static __inline __m256i __DEFAULT_FN_ATTRS
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001087_mm256_setzero_si256(void)
1088{
1089 return (__m256i){ 0LL, 0LL, 0LL, 0LL };
1090}
1091
1092/* Cast between vector types */
Michael Kupersteine45af542015-06-30 13:36:19 +00001093static __inline __m256 __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001094_mm256_castpd_ps(__m256d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001095{
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001096 return (__m256)__a;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001097}
1098
Michael Kupersteine45af542015-06-30 13:36:19 +00001099static __inline __m256i __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001100_mm256_castpd_si256(__m256d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001101{
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001102 return (__m256i)__a;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001103}
1104
Michael Kupersteine45af542015-06-30 13:36:19 +00001105static __inline __m256d __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001106_mm256_castps_pd(__m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001107{
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001108 return (__m256d)__a;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001109}
1110
Michael Kupersteine45af542015-06-30 13:36:19 +00001111static __inline __m256i __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001112_mm256_castps_si256(__m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001113{
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001114 return (__m256i)__a;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001115}
1116
Michael Kupersteine45af542015-06-30 13:36:19 +00001117static __inline __m256 __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001118_mm256_castsi256_ps(__m256i __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001119{
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001120 return (__m256)__a;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001121}
1122
Michael Kupersteine45af542015-06-30 13:36:19 +00001123static __inline __m256d __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001124_mm256_castsi256_pd(__m256i __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001125{
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001126 return (__m256d)__a;
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001127}
1128
Michael Kupersteine45af542015-06-30 13:36:19 +00001129static __inline __m128d __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001130_mm256_castpd256_pd128(__m256d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001131{
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001132 return __builtin_shufflevector(__a, __a, 0, 1);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001133}
1134
Michael Kupersteine45af542015-06-30 13:36:19 +00001135static __inline __m128 __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001136_mm256_castps256_ps128(__m256 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001137{
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001138 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001139}
1140
Michael Kupersteine45af542015-06-30 13:36:19 +00001141static __inline __m128i __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001142_mm256_castsi256_si128(__m256i __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001143{
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001144 return __builtin_shufflevector(__a, __a, 0, 1);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001145}
1146
Michael Kupersteine45af542015-06-30 13:36:19 +00001147static __inline __m256d __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001148_mm256_castpd128_pd256(__m128d __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001149{
Craig Topperc5244512013-08-05 06:17:21 +00001150 return __builtin_shufflevector(__a, __a, 0, 1, -1, -1);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001151}
1152
Michael Kupersteine45af542015-06-30 13:36:19 +00001153static __inline __m256 __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001154_mm256_castps128_ps256(__m128 __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001155{
Craig Topperc5244512013-08-05 06:17:21 +00001156 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001157}
1158
Michael Kupersteine45af542015-06-30 13:36:19 +00001159static __inline __m256i __DEFAULT_FN_ATTRS
Reid Kleckner7ab75b32013-04-19 17:00:14 +00001160_mm256_castsi128_si256(__m128i __a)
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001161{
Craig Topperc5244512013-08-05 06:17:21 +00001162 return __builtin_shufflevector(__a, __a, 0, 1, -1, -1);
Bruno Cardoso Lopes7c4b5132010-08-04 22:03:36 +00001163}
Chad Rosierf8df4f42012-03-20 16:40:00 +00001164
Sean Silvae4c37602015-09-12 02:55:19 +00001165/*
Sanjay Patel7f6aa522015-03-10 15:19:26 +00001166 Vector insert.
1167 We use macros rather than inlines because we only want to accept
1168 invocations where the immediate M is a constant expression.
1169*/
1170#define _mm256_insertf128_ps(V1, V2, M) __extension__ ({ \
1171 (__m256)__builtin_shufflevector( \
1172 (__v8sf)(V1), \
1173 (__v8sf)_mm256_castps128_ps256((__m128)(V2)), \
1174 (((M) & 1) ? 0 : 8), \
1175 (((M) & 1) ? 1 : 9), \
1176 (((M) & 1) ? 2 : 10), \
1177 (((M) & 1) ? 3 : 11), \
1178 (((M) & 1) ? 8 : 4), \
1179 (((M) & 1) ? 9 : 5), \
1180 (((M) & 1) ? 10 : 6), \
1181 (((M) & 1) ? 11 : 7) );})
1182
1183#define _mm256_insertf128_pd(V1, V2, M) __extension__ ({ \
1184 (__m256d)__builtin_shufflevector( \
1185 (__v4df)(V1), \
1186 (__v4df)_mm256_castpd128_pd256((__m128d)(V2)), \
1187 (((M) & 1) ? 0 : 4), \
1188 (((M) & 1) ? 1 : 5), \
1189 (((M) & 1) ? 4 : 2), \
1190 (((M) & 1) ? 5 : 3) );})
1191
1192#define _mm256_insertf128_si256(V1, V2, M) __extension__ ({ \
1193 (__m256i)__builtin_shufflevector( \
1194 (__v4di)(V1), \
1195 (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \
1196 (((M) & 1) ? 0 : 4), \
1197 (((M) & 1) ? 1 : 5), \
1198 (((M) & 1) ? 4 : 2), \
1199 (((M) & 1) ? 5 : 3) );})
1200
Sean Silvae4c37602015-09-12 02:55:19 +00001201/*
Sanjay Patel0c351ab2015-03-12 15:50:36 +00001202 Vector extract.
1203 We use macros rather than inlines because we only want to accept
1204 invocations where the immediate M is a constant expression.
1205*/
1206#define _mm256_extractf128_ps(V, M) __extension__ ({ \
1207 (__m128)__builtin_shufflevector( \
1208 (__v8sf)(V), \
Sanjay Patelf204b002015-03-12 17:23:46 +00001209 (__v8sf)(_mm256_setzero_ps()), \
Sanjay Patel0c351ab2015-03-12 15:50:36 +00001210 (((M) & 1) ? 4 : 0), \
1211 (((M) & 1) ? 5 : 1), \
1212 (((M) & 1) ? 6 : 2), \
1213 (((M) & 1) ? 7 : 3) );})
1214
1215#define _mm256_extractf128_pd(V, M) __extension__ ({ \
1216 (__m128d)__builtin_shufflevector( \
1217 (__v4df)(V), \
Sanjay Patelf204b002015-03-12 17:23:46 +00001218 (__v4df)(_mm256_setzero_pd()), \
Sanjay Patel0c351ab2015-03-12 15:50:36 +00001219 (((M) & 1) ? 2 : 0), \
1220 (((M) & 1) ? 3 : 1) );})
1221
1222#define _mm256_extractf128_si256(V, M) __extension__ ({ \
1223 (__m128i)__builtin_shufflevector( \
1224 (__v4di)(V), \
Sanjay Patelf204b002015-03-12 17:23:46 +00001225 (__v4di)(_mm256_setzero_si256()), \
Sanjay Patel0c351ab2015-03-12 15:50:36 +00001226 (((M) & 1) ? 2 : 0), \
1227 (((M) & 1) ? 3 : 1) );})
1228
Chad Rosierf8df4f42012-03-20 16:40:00 +00001229/* SIMD load ops (unaligned) */
Michael Kupersteine45af542015-06-30 13:36:19 +00001230static __inline __m256 __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001231_mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo)
Chad Rosierf8df4f42012-03-20 16:40:00 +00001232{
1233 struct __loadu_ps {
David Blaikie3302f2b2013-01-16 23:08:36 +00001234 __m128 __v;
Chad Rosierf8df4f42012-03-20 16:40:00 +00001235 } __attribute__((__packed__, __may_alias__));
1236
David Blaikie3302f2b2013-01-16 23:08:36 +00001237 __m256 __v256 = _mm256_castps128_ps256(((struct __loadu_ps*)__addr_lo)->__v);
1238 return _mm256_insertf128_ps(__v256, ((struct __loadu_ps*)__addr_hi)->__v, 1);
Chad Rosierf8df4f42012-03-20 16:40:00 +00001239}
1240
Michael Kupersteine45af542015-06-30 13:36:19 +00001241static __inline __m256d __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001242_mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo)
Chad Rosierf8df4f42012-03-20 16:40:00 +00001243{
1244 struct __loadu_pd {
David Blaikie3302f2b2013-01-16 23:08:36 +00001245 __m128d __v;
Chad Rosierf8df4f42012-03-20 16:40:00 +00001246 } __attribute__((__packed__, __may_alias__));
Sean Silvae4c37602015-09-12 02:55:19 +00001247
David Blaikie3302f2b2013-01-16 23:08:36 +00001248 __m256d __v256 = _mm256_castpd128_pd256(((struct __loadu_pd*)__addr_lo)->__v);
1249 return _mm256_insertf128_pd(__v256, ((struct __loadu_pd*)__addr_hi)->__v, 1);
Chad Rosierf8df4f42012-03-20 16:40:00 +00001250}
1251
Michael Kupersteine45af542015-06-30 13:36:19 +00001252static __inline __m256i __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001253_mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo)
Chad Rosierf8df4f42012-03-20 16:40:00 +00001254{
1255 struct __loadu_si128 {
David Blaikie3302f2b2013-01-16 23:08:36 +00001256 __m128i __v;
David Majnemer1cf22e62015-02-04 00:26:10 +00001257 } __attribute__((__packed__, __may_alias__));
David Blaikie3302f2b2013-01-16 23:08:36 +00001258 __m256i __v256 = _mm256_castsi128_si256(
1259 ((struct __loadu_si128*)__addr_lo)->__v);
1260 return _mm256_insertf128_si256(__v256,
1261 ((struct __loadu_si128*)__addr_hi)->__v, 1);
Chad Rosierf8df4f42012-03-20 16:40:00 +00001262}
1263
1264/* SIMD store ops (unaligned) */
Michael Kupersteine45af542015-06-30 13:36:19 +00001265static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001266_mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a)
Chad Rosierf8df4f42012-03-20 16:40:00 +00001267{
David Blaikie3302f2b2013-01-16 23:08:36 +00001268 __m128 __v128;
Chad Rosierf8df4f42012-03-20 16:40:00 +00001269
David Blaikie3302f2b2013-01-16 23:08:36 +00001270 __v128 = _mm256_castps256_ps128(__a);
1271 __builtin_ia32_storeups(__addr_lo, __v128);
1272 __v128 = _mm256_extractf128_ps(__a, 1);
1273 __builtin_ia32_storeups(__addr_hi, __v128);
Chad Rosierf8df4f42012-03-20 16:40:00 +00001274}
1275
Michael Kupersteine45af542015-06-30 13:36:19 +00001276static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001277_mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a)
Chad Rosierf8df4f42012-03-20 16:40:00 +00001278{
David Blaikie3302f2b2013-01-16 23:08:36 +00001279 __m128d __v128;
Chad Rosierf8df4f42012-03-20 16:40:00 +00001280
David Blaikie3302f2b2013-01-16 23:08:36 +00001281 __v128 = _mm256_castpd256_pd128(__a);
1282 __builtin_ia32_storeupd(__addr_lo, __v128);
1283 __v128 = _mm256_extractf128_pd(__a, 1);
1284 __builtin_ia32_storeupd(__addr_hi, __v128);
Chad Rosierf8df4f42012-03-20 16:40:00 +00001285}
1286
Michael Kupersteine45af542015-06-30 13:36:19 +00001287static __inline void __DEFAULT_FN_ATTRS
David Blaikie3302f2b2013-01-16 23:08:36 +00001288_mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a)
Chad Rosierf8df4f42012-03-20 16:40:00 +00001289{
David Blaikie3302f2b2013-01-16 23:08:36 +00001290 __m128i __v128;
Chad Rosierf8df4f42012-03-20 16:40:00 +00001291
David Blaikie3302f2b2013-01-16 23:08:36 +00001292 __v128 = _mm256_castsi256_si128(__a);
1293 __builtin_ia32_storedqu((char *)__addr_lo, (__v16qi)__v128);
1294 __v128 = _mm256_extractf128_si256(__a, 1);
1295 __builtin_ia32_storedqu((char *)__addr_hi, (__v16qi)__v128);
Chad Rosierf8df4f42012-03-20 16:40:00 +00001296}
Richard Smith49e56442013-07-14 05:41:45 +00001297
Michael Kupersteine45af542015-06-30 13:36:19 +00001298static __inline __m256 __DEFAULT_FN_ATTRS
Michael Kuperstein76190042015-05-20 07:46:52 +00001299_mm256_set_m128 (__m128 __hi, __m128 __lo) {
1300 return (__m256) __builtin_shufflevector(__lo, __hi, 0, 1, 2, 3, 4, 5, 6, 7);
1301}
1302
Michael Kupersteine45af542015-06-30 13:36:19 +00001303static __inline __m256d __DEFAULT_FN_ATTRS
Michael Kuperstein76190042015-05-20 07:46:52 +00001304_mm256_set_m128d (__m128d __hi, __m128d __lo) {
1305 return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo);
1306}
1307
Michael Kupersteine45af542015-06-30 13:36:19 +00001308static __inline __m256i __DEFAULT_FN_ATTRS
Michael Kuperstein76190042015-05-20 07:46:52 +00001309_mm256_set_m128i (__m128i __hi, __m128i __lo) {
1310 return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo);
1311}
1312
Michael Kupersteine45af542015-06-30 13:36:19 +00001313static __inline __m256 __DEFAULT_FN_ATTRS
Michael Kuperstein76190042015-05-20 07:46:52 +00001314_mm256_setr_m128 (__m128 __lo, __m128 __hi) {
1315 return _mm256_set_m128(__hi, __lo);
1316}
1317
Michael Kupersteine45af542015-06-30 13:36:19 +00001318static __inline __m256d __DEFAULT_FN_ATTRS
Michael Kuperstein76190042015-05-20 07:46:52 +00001319_mm256_setr_m128d (__m128d __lo, __m128d __hi) {
1320 return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo);
1321}
1322
Michael Kupersteine45af542015-06-30 13:36:19 +00001323static __inline __m256i __DEFAULT_FN_ATTRS
Michael Kuperstein76190042015-05-20 07:46:52 +00001324_mm256_setr_m128i (__m128i __lo, __m128i __hi) {
1325 return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo);
1326}
1327
Michael Kupersteine45af542015-06-30 13:36:19 +00001328#undef __DEFAULT_FN_ATTRS
Eric Christopher4d1851682015-06-17 07:09:20 +00001329
Richard Smith49e56442013-07-14 05:41:45 +00001330#endif /* __AVXINTRIN_H */