blob: d889b7c5e27059e4c46c4c22ef7c9cecd2446aad [file] [log] [blame]
Logan Chien55afb0a2018-10-15 10:42:14 +08001/*===---- fmaintrin.h - FMA intrinsics -------------------------------------===
Logan Chien2833ffb2018-10-09 10:03:24 +08002 *
Logan Chiendf4f7662019-09-04 16:45:23 -07003 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Logan Chien2833ffb2018-10-09 10:03:24 +08006 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifndef __FMAINTRIN_H
15#define __FMAINTRIN_H
16
17/* Define the default attributes for the functions in this file. */
Logan Chien55afb0a2018-10-15 10:42:14 +080018#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128)))
19#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256)))
Logan Chien2833ffb2018-10-09 10:03:24 +080020
Logan Chien55afb0a2018-10-15 10:42:14 +080021static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080022_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
23{
24 return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
25}
26
Logan Chien55afb0a2018-10-15 10:42:14 +080027static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080028_mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
29{
30 return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
31}
32
Logan Chien55afb0a2018-10-15 10:42:14 +080033static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080034_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
35{
Logan Chien55afb0a2018-10-15 10:42:14 +080036 return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
Logan Chien2833ffb2018-10-09 10:03:24 +080037}
38
Logan Chien55afb0a2018-10-15 10:42:14 +080039static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080040_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
41{
Logan Chien55afb0a2018-10-15 10:42:14 +080042 return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
Logan Chien2833ffb2018-10-09 10:03:24 +080043}
44
Logan Chien55afb0a2018-10-15 10:42:14 +080045static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080046_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
47{
Logan Chien55afb0a2018-10-15 10:42:14 +080048 return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
Logan Chien2833ffb2018-10-09 10:03:24 +080049}
50
Logan Chien55afb0a2018-10-15 10:42:14 +080051static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080052_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
53{
Logan Chien55afb0a2018-10-15 10:42:14 +080054 return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
Logan Chien2833ffb2018-10-09 10:03:24 +080055}
56
Logan Chien55afb0a2018-10-15 10:42:14 +080057static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080058_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
59{
Logan Chien55afb0a2018-10-15 10:42:14 +080060 return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
Logan Chien2833ffb2018-10-09 10:03:24 +080061}
62
Logan Chien55afb0a2018-10-15 10:42:14 +080063static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080064_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
65{
Logan Chien55afb0a2018-10-15 10:42:14 +080066 return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);
Logan Chien2833ffb2018-10-09 10:03:24 +080067}
68
Logan Chien55afb0a2018-10-15 10:42:14 +080069static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080070_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
71{
Logan Chien55afb0a2018-10-15 10:42:14 +080072 return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
Logan Chien2833ffb2018-10-09 10:03:24 +080073}
74
Logan Chien55afb0a2018-10-15 10:42:14 +080075static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080076_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
77{
Logan Chien55afb0a2018-10-15 10:42:14 +080078 return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
Logan Chien2833ffb2018-10-09 10:03:24 +080079}
80
Logan Chien55afb0a2018-10-15 10:42:14 +080081static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080082_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
83{
Logan Chien55afb0a2018-10-15 10:42:14 +080084 return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);
Logan Chien2833ffb2018-10-09 10:03:24 +080085}
86
Logan Chien55afb0a2018-10-15 10:42:14 +080087static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080088_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
89{
Logan Chien55afb0a2018-10-15 10:42:14 +080090 return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);
Logan Chien2833ffb2018-10-09 10:03:24 +080091}
92
Logan Chien55afb0a2018-10-15 10:42:14 +080093static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +080094_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
95{
Logan Chien55afb0a2018-10-15 10:42:14 +080096 return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
Logan Chien2833ffb2018-10-09 10:03:24 +080097}
98
Logan Chien55afb0a2018-10-15 10:42:14 +080099static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800100_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
101{
Logan Chien55afb0a2018-10-15 10:42:14 +0800102 return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
Logan Chien2833ffb2018-10-09 10:03:24 +0800103}
104
Logan Chien55afb0a2018-10-15 10:42:14 +0800105static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800106_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
107{
Logan Chien55afb0a2018-10-15 10:42:14 +0800108 return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);
Logan Chien2833ffb2018-10-09 10:03:24 +0800109}
110
Logan Chien55afb0a2018-10-15 10:42:14 +0800111static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800112_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
113{
Logan Chien55afb0a2018-10-15 10:42:14 +0800114 return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);
Logan Chien2833ffb2018-10-09 10:03:24 +0800115}
116
Logan Chien55afb0a2018-10-15 10:42:14 +0800117static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800118_mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)
119{
120 return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
121}
122
Logan Chien55afb0a2018-10-15 10:42:14 +0800123static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800124_mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)
125{
126 return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
127}
128
Logan Chien55afb0a2018-10-15 10:42:14 +0800129static __inline__ __m128 __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800130_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
131{
Logan Chien55afb0a2018-10-15 10:42:14 +0800132 return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
Logan Chien2833ffb2018-10-09 10:03:24 +0800133}
134
Logan Chien55afb0a2018-10-15 10:42:14 +0800135static __inline__ __m128d __DEFAULT_FN_ATTRS128
Logan Chien2833ffb2018-10-09 10:03:24 +0800136_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
137{
Logan Chien55afb0a2018-10-15 10:42:14 +0800138 return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
Logan Chien2833ffb2018-10-09 10:03:24 +0800139}
140
Logan Chien55afb0a2018-10-15 10:42:14 +0800141static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800142_mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
143{
144 return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
145}
146
Logan Chien55afb0a2018-10-15 10:42:14 +0800147static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800148_mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
149{
150 return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
151}
152
Logan Chien55afb0a2018-10-15 10:42:14 +0800153static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800154_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
155{
Logan Chien55afb0a2018-10-15 10:42:14 +0800156 return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
Logan Chien2833ffb2018-10-09 10:03:24 +0800157}
158
Logan Chien55afb0a2018-10-15 10:42:14 +0800159static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800160_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
161{
Logan Chien55afb0a2018-10-15 10:42:14 +0800162 return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
Logan Chien2833ffb2018-10-09 10:03:24 +0800163}
164
Logan Chien55afb0a2018-10-15 10:42:14 +0800165static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800166_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
167{
Logan Chien55afb0a2018-10-15 10:42:14 +0800168 return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
Logan Chien2833ffb2018-10-09 10:03:24 +0800169}
170
Logan Chien55afb0a2018-10-15 10:42:14 +0800171static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800172_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
173{
Logan Chien55afb0a2018-10-15 10:42:14 +0800174 return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
Logan Chien2833ffb2018-10-09 10:03:24 +0800175}
176
Logan Chien55afb0a2018-10-15 10:42:14 +0800177static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800178_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
179{
Logan Chien55afb0a2018-10-15 10:42:14 +0800180 return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
Logan Chien2833ffb2018-10-09 10:03:24 +0800181}
182
Logan Chien55afb0a2018-10-15 10:42:14 +0800183static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800184_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
185{
Logan Chien55afb0a2018-10-15 10:42:14 +0800186 return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
Logan Chien2833ffb2018-10-09 10:03:24 +0800187}
188
Logan Chien55afb0a2018-10-15 10:42:14 +0800189static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800190_mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)
191{
192 return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
193}
194
Logan Chien55afb0a2018-10-15 10:42:14 +0800195static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800196_mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)
197{
198 return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
199}
200
Logan Chien55afb0a2018-10-15 10:42:14 +0800201static __inline__ __m256 __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800202_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
203{
Logan Chien55afb0a2018-10-15 10:42:14 +0800204 return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
Logan Chien2833ffb2018-10-09 10:03:24 +0800205}
206
Logan Chien55afb0a2018-10-15 10:42:14 +0800207static __inline__ __m256d __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800208_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
209{
Logan Chien55afb0a2018-10-15 10:42:14 +0800210 return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
Logan Chien2833ffb2018-10-09 10:03:24 +0800211}
212
Logan Chien55afb0a2018-10-15 10:42:14 +0800213#undef __DEFAULT_FN_ATTRS128
214#undef __DEFAULT_FN_ATTRS256
Logan Chien2833ffb2018-10-09 10:03:24 +0800215
216#endif /* __FMAINTRIN_H */