blob: 7bae2f4a3155c04713d4a8db2e003c35784978f3 [file] [log] [blame]
Craig Topperb4ceb6f2011-12-30 09:15:03 +00001/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __X86INTRIN_H
25#error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead."
26#endif
27
28#ifndef __FMA4INTRIN_H
29#define __FMA4INTRIN_H
30
Craig Topperb4ceb6f2011-12-30 09:15:03 +000031#include <pmmintrin.h>
32
Eric Christopher4d1851682015-06-17 07:09:20 +000033/* Define the default attributes for the functions in this file. */
Craig Topper74c10e32018-07-09 19:00:16 +000034#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(128)))
35#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(256)))
Eric Christopher4d1851682015-06-17 07:09:20 +000036
Craig Topper74c10e32018-07-09 19:00:16 +000037static __inline__ __m128 __DEFAULT_FN_ATTRS128
Craig Topperb4ceb6f2011-12-30 09:15:03 +000038_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
39{
Craig Topper1aa231e2016-05-16 06:38:42 +000040 return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +000041}
42
Craig Topper74c10e32018-07-09 19:00:16 +000043static __inline__ __m128d __DEFAULT_FN_ATTRS128
Craig Topperb4ceb6f2011-12-30 09:15:03 +000044_mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)
45{
Craig Topper1aa231e2016-05-16 06:38:42 +000046 return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +000047}
48
Craig Topper74c10e32018-07-09 19:00:16 +000049static __inline__ __m128 __DEFAULT_FN_ATTRS128
Craig Topperb4ceb6f2011-12-30 09:15:03 +000050_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
51{
Craig Topper9e032ed2017-11-25 19:32:12 +000052 return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +000053}
54
Craig Topper74c10e32018-07-09 19:00:16 +000055static __inline__ __m128d __DEFAULT_FN_ATTRS128
Craig Topperb4ceb6f2011-12-30 09:15:03 +000056_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
57{
Craig Topper9e032ed2017-11-25 19:32:12 +000058 return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +000059}
60
Craig Topper74c10e32018-07-09 19:00:16 +000061static __inline__ __m128 __DEFAULT_FN_ATTRS128
Craig Topperb4ceb6f2011-12-30 09:15:03 +000062_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
63{
Craig Topperb3d44732017-11-10 05:20:32 +000064 return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +000065}
66
Craig Topper74c10e32018-07-09 19:00:16 +000067static __inline__ __m128d __DEFAULT_FN_ATTRS128
Craig Topperb4ceb6f2011-12-30 09:15:03 +000068_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
69{
Craig Topperb3d44732017-11-10 05:20:32 +000070 return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +000071}
72
Craig Topper74c10e32018-07-09 19:00:16 +000073static __inline__ __m128 __DEFAULT_FN_ATTRS128
Craig Topperb4ceb6f2011-12-30 09:15:03 +000074_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
75{
Craig Topper9e032ed2017-11-25 19:32:12 +000076 return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +000077}
78
Craig Topper74c10e32018-07-09 19:00:16 +000079static __inline__ __m128d __DEFAULT_FN_ATTRS128
Craig Topperb4ceb6f2011-12-30 09:15:03 +000080_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
81{
Craig Topper9e032ed2017-11-25 19:32:12 +000082 return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +000083}
84
Craig Topper74c10e32018-07-09 19:00:16 +000085static __inline__ __m128 __DEFAULT_FN_ATTRS128
Craig Topperb4ceb6f2011-12-30 09:15:03 +000086_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
87{
Craig Topperb3d44732017-11-10 05:20:32 +000088 return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +000089}
90
Craig Topper74c10e32018-07-09 19:00:16 +000091static __inline__ __m128d __DEFAULT_FN_ATTRS128
Craig Topperb4ceb6f2011-12-30 09:15:03 +000092_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
93{
Craig Topperb3d44732017-11-10 05:20:32 +000094 return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +000095}
96
Craig Topper74c10e32018-07-09 19:00:16 +000097static __inline__ __m128 __DEFAULT_FN_ATTRS128
Craig Topperb4ceb6f2011-12-30 09:15:03 +000098_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
99{
Craig Topper9e032ed2017-11-25 19:32:12 +0000100 return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000101}
102
Craig Topper74c10e32018-07-09 19:00:16 +0000103static __inline__ __m128d __DEFAULT_FN_ATTRS128
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000104_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
105{
Craig Topper9e032ed2017-11-25 19:32:12 +0000106 return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000107}
108
Craig Topper74c10e32018-07-09 19:00:16 +0000109static __inline__ __m128 __DEFAULT_FN_ATTRS128
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000110_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
111{
Craig Topperb3d44732017-11-10 05:20:32 +0000112 return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000113}
114
Craig Topper74c10e32018-07-09 19:00:16 +0000115static __inline__ __m128d __DEFAULT_FN_ATTRS128
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000116_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
117{
Craig Topperb3d44732017-11-10 05:20:32 +0000118 return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000119}
120
Craig Topper74c10e32018-07-09 19:00:16 +0000121static __inline__ __m128 __DEFAULT_FN_ATTRS128
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000122_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
123{
Craig Topper9e032ed2017-11-25 19:32:12 +0000124 return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000125}
126
Craig Topper74c10e32018-07-09 19:00:16 +0000127static __inline__ __m128d __DEFAULT_FN_ATTRS128
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000128_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
129{
Craig Topper9e032ed2017-11-25 19:32:12 +0000130 return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000131}
132
Craig Topper74c10e32018-07-09 19:00:16 +0000133static __inline__ __m128 __DEFAULT_FN_ATTRS128
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000134_mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C)
135{
Craig Topper1aa231e2016-05-16 06:38:42 +0000136 return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000137}
138
Craig Topper74c10e32018-07-09 19:00:16 +0000139static __inline__ __m128d __DEFAULT_FN_ATTRS128
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000140_mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C)
141{
Craig Topper1aa231e2016-05-16 06:38:42 +0000142 return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000143}
144
Craig Topper74c10e32018-07-09 19:00:16 +0000145static __inline__ __m128 __DEFAULT_FN_ATTRS128
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000146_mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C)
147{
Craig Topperb3d44732017-11-10 05:20:32 +0000148 return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000149}
150
Craig Topper74c10e32018-07-09 19:00:16 +0000151static __inline__ __m128d __DEFAULT_FN_ATTRS128
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000152_mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)
153{
Craig Topperb3d44732017-11-10 05:20:32 +0000154 return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000155}
156
Craig Topper74c10e32018-07-09 19:00:16 +0000157static __inline__ __m256 __DEFAULT_FN_ATTRS256
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000158_mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C)
159{
Craig Topper1aa231e2016-05-16 06:38:42 +0000160 return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000161}
162
Craig Topper74c10e32018-07-09 19:00:16 +0000163static __inline__ __m256d __DEFAULT_FN_ATTRS256
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000164_mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C)
165{
Craig Topper1aa231e2016-05-16 06:38:42 +0000166 return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000167}
168
Craig Topper74c10e32018-07-09 19:00:16 +0000169static __inline__ __m256 __DEFAULT_FN_ATTRS256
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000170_mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
171{
Craig Topperb3d44732017-11-10 05:20:32 +0000172 return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000173}
174
Craig Topper74c10e32018-07-09 19:00:16 +0000175static __inline__ __m256d __DEFAULT_FN_ATTRS256
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000176_mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)
177{
Craig Topperb3d44732017-11-10 05:20:32 +0000178 return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000179}
180
Craig Topper74c10e32018-07-09 19:00:16 +0000181static __inline__ __m256 __DEFAULT_FN_ATTRS256
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000182_mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)
183{
Craig Topperb3d44732017-11-10 05:20:32 +0000184 return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000185}
186
Craig Topper74c10e32018-07-09 19:00:16 +0000187static __inline__ __m256d __DEFAULT_FN_ATTRS256
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000188_mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)
189{
Craig Topperb3d44732017-11-10 05:20:32 +0000190 return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000191}
192
Craig Topper74c10e32018-07-09 19:00:16 +0000193static __inline__ __m256 __DEFAULT_FN_ATTRS256
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000194_mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)
195{
Craig Topperb3d44732017-11-10 05:20:32 +0000196 return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000197}
198
Craig Topper74c10e32018-07-09 19:00:16 +0000199static __inline__ __m256d __DEFAULT_FN_ATTRS256
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000200_mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)
201{
Craig Topperb3d44732017-11-10 05:20:32 +0000202 return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000203}
204
Craig Topper74c10e32018-07-09 19:00:16 +0000205static __inline__ __m256 __DEFAULT_FN_ATTRS256
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000206_mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C)
207{
Craig Topper1aa231e2016-05-16 06:38:42 +0000208 return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000209}
210
Craig Topper74c10e32018-07-09 19:00:16 +0000211static __inline__ __m256d __DEFAULT_FN_ATTRS256
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000212_mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C)
213{
Craig Topper1aa231e2016-05-16 06:38:42 +0000214 return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000215}
216
Craig Topper74c10e32018-07-09 19:00:16 +0000217static __inline__ __m256 __DEFAULT_FN_ATTRS256
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000218_mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C)
219{
Craig Topperb3d44732017-11-10 05:20:32 +0000220 return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000221}
222
Craig Topper74c10e32018-07-09 19:00:16 +0000223static __inline__ __m256d __DEFAULT_FN_ATTRS256
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000224_mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)
225{
Craig Topperb3d44732017-11-10 05:20:32 +0000226 return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000227}
228
Craig Topper74c10e32018-07-09 19:00:16 +0000229#undef __DEFAULT_FN_ATTRS128
230#undef __DEFAULT_FN_ATTRS256
Eric Christopher4d1851682015-06-17 07:09:20 +0000231
Craig Topperb4ceb6f2011-12-30 09:15:03 +0000232#endif /* __FMA4INTRIN_H */