blob: 323f4f0d6728e1718d6c67569bb2fe469c200386 [file] [log] [blame]
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +00001/*===---- avx512fintrin.h - AVX2 intrinsics -----------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23#ifndef __IMMINTRIN_H
24#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
25#endif
26
27#ifndef __AVX512ERINTRIN_H
28#define __AVX512ERINTRIN_H
29
30
31// rsqrt28
Craig Topperc4b852a2015-02-01 08:52:55 +000032#define _mm512_rsqrt28_round_pd(A, R) __extension__ ({ \
Craig Topperb01fc312015-02-01 08:05:12 +000033 (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
34 (__v8df)_mm512_setzero_pd(), \
35 (__mmask8)-1, (R)); })
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000036
Craig Topperc4b852a2015-02-01 08:52:55 +000037#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) __extension__ ({ \
38 (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
39 (__v8df)(__m512d)(S), \
40 (__mmask8)(M), (R)); })
41
42#define _mm512_maskz_rsqrt28_round_pd(M, A, R) __extension__ ({ \
43 (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
44 (__v8df)_mm512_setzero_pd(), \
45 (__mmask8)(M), (R)); })
46
47#define _mm512_rsqrt28_pd(A) \
Craig Topperda97c202015-02-01 10:15:11 +000048 _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
Craig Topperc4b852a2015-02-01 08:52:55 +000049
50#define _mm512_mask_rsqrt28_pd(S, M, A) \
Craig Topperda97c202015-02-01 10:15:11 +000051 _mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
Craig Topperc4b852a2015-02-01 08:52:55 +000052
53#define _mm512_maskz_rsqrt28_pd(M, A) \
Craig Topperda97c202015-02-01 10:15:11 +000054 _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
Craig Topperc4b852a2015-02-01 08:52:55 +000055
Craig Topperb01fc312015-02-01 08:05:12 +000056#define _mm512_rsqrt28_round_ps(A, R) __extension__ ({ \
57 (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
58 (__v16sf)_mm512_setzero_ps(), \
59 (__mmask16)-1, (R)); })
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +000060
Craig Topperc4b852a2015-02-01 08:52:55 +000061#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) __extension__ ({ \
62 (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
63 (__v16sf)(__m512)(S), \
64 (__mmask16)(M), (R)); })
65
66#define _mm512_maskz_rsqrt28_round_ps(M, A, R) __extension__ ({ \
67 (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
68 (__v16sf)_mm512_setzero_ps(), \
69 (__mmask16)(M), (R)); })
70
71#define _mm512_rsqrt28_ps(A) \
Craig Topperda97c202015-02-01 10:15:11 +000072 _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
Craig Topperc4b852a2015-02-01 08:52:55 +000073
74#define _mm512_mask_rsqrt28_ps(S, M, A) \
Craig Topperda97c202015-02-01 10:15:11 +000075 _mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION)
Craig Topperc4b852a2015-02-01 08:52:55 +000076
77#define _mm512_maskz_rsqrt28_ps(M, A) \
Craig Topperda97c202015-02-01 10:15:11 +000078 _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
Craig Topperc4b852a2015-02-01 08:52:55 +000079
Craig Topperb01fc312015-02-01 08:05:12 +000080#define _mm_rsqrt28_round_ss(A, B, R) __extension__ ({ \
81 (__m128)__builtin_ia32_rsqrt28ss_mask((__v4sf)(__m128)(A), \
Craig Topperc4b852a2015-02-01 08:52:55 +000082 (__v4sf)(__m128)(B), \
Craig Topperb01fc312015-02-01 08:05:12 +000083 (__v4sf)_mm_setzero_ps(), \
84 (__mmask8)-1, (R)); })
85
Craig Topperda97c202015-02-01 10:15:11 +000086#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) __extension__ ({ \
87 (__m128)__builtin_ia32_rsqrt28ss_mask((__v4sf)(__m128)(A), \
88 (__v4sf)(__m128)(B), \
89 (__v4sf)(__m128)(S), \
90 (__mmask8)(M), (R)); })
91
92#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) __extension__ ({ \
93 (__m128)__builtin_ia32_rsqrt28ss_mask((__v4sf)(__m128)(A), \
94 (__v4sf)(__m128)(B), \
95 (__v4sf)_mm_setzero_ps(), \
96 (__mmask8)(M), (R)); })
97
98#define _mm_rsqrt28_ss(A, B) \
99 _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
100
101#define _mm_mask_rsqrt28_ss(S, M, A, B) \
102 _mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
103
104#define _mm_maskz_rsqrt28_ss(M, A, B) \
105 _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
106
Craig Topperc4b852a2015-02-01 08:52:55 +0000107#define _mm_rsqrt28_round_sd(A, B, R) __extension__ ({ \
Craig Topperb01fc312015-02-01 08:05:12 +0000108 (__m128d)__builtin_ia32_rsqrt28sd_mask((__v2df)(__m128d)(A), \
109 (__v2df)(__m128d)(B), \
110 (__v2df)_mm_setzero_pd(), \
111 (__mmask8)-1, (R)); })
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000112
Craig Topperda97c202015-02-01 10:15:11 +0000113#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) __extension__ ({ \
114 (__m128d)__builtin_ia32_rsqrt28sd_mask((__v2df)(__m128d)(A), \
115 (__v2df)(__m128d)(B), \
116 (__v2df)(__m128d)(S), \
117 (__mmask8)(M), (R)); })
118
119#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) __extension__ ({ \
120 (__m128d)__builtin_ia32_rsqrt28sd_mask((__v2df)(__m128d)(A), \
121 (__v2df)(__m128d)(B), \
122 (__v2df)_mm_setzero_pd(), \
123 (__mmask8)(M), (R)); })
124
125#define _mm_rsqrt28_sd(A, B) \
126 _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
127
128#define _mm_mask_rsqrt28_sd(S, M, A, B) \
129 _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
130
131#define _mm_maskz_rsqrt28_sd(M, A, B) \
132 _mm_mask_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000133
134// rcp28
Craig Topperb01fc312015-02-01 08:05:12 +0000135#define _mm512_rcp28_round_pd(A, R) __extension__ ({ \
136 (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
137 (__v8df)_mm512_setzero_pd(), \
138 (__mmask8)-1, (R)); })
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000139
Craig Topperc4b852a2015-02-01 08:52:55 +0000140#define _mm512_mask_rcp28_round_pd(S, M, A, R) __extension__ ({ \
141 (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
142 (__v8df)(__m512d)(S), \
143 (__mmask8)(M), (R)); })
144
145#define _mm512_maskz_rcp28_round_pd(M, A, R) __extension__ ({ \
146 (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
147 (__v8df)_mm512_setzero_pd(), \
148 (__mmask8)(M), (R)); })
149
150#define _mm512_rcp28_pd(A) \
Craig Topperda97c202015-02-01 10:15:11 +0000151 _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
Craig Topperc4b852a2015-02-01 08:52:55 +0000152
153#define _mm512_mask_rcp28_pd(S, M, A) \
Craig Topperda97c202015-02-01 10:15:11 +0000154 _mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
Craig Topperc4b852a2015-02-01 08:52:55 +0000155
156#define _mm512_maskz_rcp28_pd(M, A) \
Craig Topperda97c202015-02-01 10:15:11 +0000157 _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
Craig Topperc4b852a2015-02-01 08:52:55 +0000158
Craig Topperb01fc312015-02-01 08:05:12 +0000159#define _mm512_rcp28_round_ps(A, R) __extension__ ({ \
160 (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
161 (__v16sf)_mm512_setzero_ps(), \
162 (__mmask16)-1, (R)); })
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000163
Craig Topperc4b852a2015-02-01 08:52:55 +0000164#define _mm512_mask_rcp28_round_ps(S, M, A, R) __extension__ ({ \
165 (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
166 (__v16sf)(__m512)(S), \
167 (__mmask16)(M), (R)); })
168
169#define _mm512_maskz_rcp28_round_ps(M, A, R) __extension__ ({ \
170 (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
171 (__v16sf)_mm512_setzero_ps(), \
172 (__mmask16)(M), (R)); })
173
174#define _mm512_rcp28_ps(A) \
Craig Topperda97c202015-02-01 10:15:11 +0000175 _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
Craig Topperc4b852a2015-02-01 08:52:55 +0000176
177#define _mm512_mask_rcp28_ps(S, M, A) \
Craig Topperda97c202015-02-01 10:15:11 +0000178 _mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
Craig Topperc4b852a2015-02-01 08:52:55 +0000179
180#define _mm512_maskz_rcp28_ps(M, A) \
Craig Topperda97c202015-02-01 10:15:11 +0000181 _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
Craig Topperc4b852a2015-02-01 08:52:55 +0000182
Craig Topperb01fc312015-02-01 08:05:12 +0000183#define _mm_rcp28_round_ss(A, B, R) __extension__ ({ \
Craig Topperda97c202015-02-01 10:15:11 +0000184 (__m128)__builtin_ia32_rcp28ss_mask((__v4sf)(__m128)(A), \
185 (__v4sf)(__m128)(B), \
186 (__v4sf)_mm_setzero_ps(), \
187 (__mmask8)-1, (R)); })
188
189#define _mm_mask_rcp28_round_ss(S, M, A, B, R) __extension__ ({ \
190 (__m128)__builtin_ia32_rcp28ss_mask((__v4sf)(__m128)(A), \
191 (__v4sf)(__m128)(B), \
192 (__v4sf)(__m128)(S), \
193 (__mmask8)(M), (R)); })
194
195#define _mm_maskz_rcp28_round_ss(M, A, B, R) __extension__ ({ \
196 (__m128)__builtin_ia32_rcp28ss_mask((__v4sf)(__m128)(A), \
197 (__v4sf)(__m128)(B), \
198 (__v4sf)_mm_setzero_ps(), \
199 (__mmask8)(M), (R)); })
200
201#define _mm_rcp28_ss(A, B) \
202 _mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
203
204#define _mm_mask_rcp28_ss(S, M, A, B) \
205 _mm_mask_rcp28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
206
207#define _mm_maskz_rcp28_ss(M, A, B) \
208 _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
Craig Topperb01fc312015-02-01 08:05:12 +0000209
210#define _mm_rcp28_round_sd(A, B, R) __extension__ ({ \
211 (__m128d)__builtin_ia32_rcp28sd_mask((__v2df)(__m128d)(A), \
212 (__v2df)(__m128d)(B), \
213 (__v2df)_mm_setzero_pd(), \
214 (__mmask8)-1, (R)); })
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000215
Craig Topperda97c202015-02-01 10:15:11 +0000216#define _mm_mask_rcp28_round_sd(S, M, A, B, R) __extension__ ({ \
217 (__m128d)__builtin_ia32_rcp28sd_mask((__v2df)(__m128d)(A), \
218 (__v2df)(__m128d)(B), \
219 (__v2df)(__m128d)(S), \
220 (__mmask8)(M), (R)); })
221
222#define _mm_maskz_rcp28_round_sd(M, A, B, R) __extension__ ({ \
223 (__m128d)__builtin_ia32_rcp28sd_mask((__v2df)(__m128d)(A), \
224 (__v2df)(__m128d)(B), \
225 (__v2df)_mm_setzero_pd(), \
226 (__mmask8)(M), (R)); })
227
228#define _mm_rcp28_sd(A, B) \
229 _mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
230
231#define _mm_mask_rcp28_sd(S, M, A, B) \
232 _mm_mask_rcp28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
233
234#define _mm_maskz_rcp28_sd(M, A, B) \
235 _mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
236
Elena Demikhovskyfcc6df32014-07-22 11:31:39 +0000237#endif // __AVX512ERINTRIN_H