[AArch64] Implement FP16FML intrinsics
Generate the FP16FML intrinsics into arm_neon.h (AArch64 only for now).
Add two new type modifiers to NeonEmitter to handle the new prototypes.
Define __ARM_FEATURE_FP16FML when +fp16fml is enabled and guard the
intrinsics with the macro in arm_neon.h.
Based on a patch by Gao Yiling.
Differential Revision: https://reviews.llvm.org/D53633
llvm-svn: 345344
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 3e8debb..f058109 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4368,6 +4368,14 @@
NEONMAP0(vextq_v),
NEONMAP0(vfma_v),
NEONMAP0(vfmaq_v),
+ NEONMAP1(vfmlal_high_v, aarch64_neon_fmlal2, 0),
+ NEONMAP1(vfmlal_low_v, aarch64_neon_fmlal, 0),
+ NEONMAP1(vfmlalq_high_v, aarch64_neon_fmlal2, 0),
+ NEONMAP1(vfmlalq_low_v, aarch64_neon_fmlal, 0),
+ NEONMAP1(vfmlsl_high_v, aarch64_neon_fmlsl2, 0),
+ NEONMAP1(vfmlsl_low_v, aarch64_neon_fmlsl, 0),
+ NEONMAP1(vfmlslq_high_v, aarch64_neon_fmlsl2, 0),
+ NEONMAP1(vfmlslq_low_v, aarch64_neon_fmlsl, 0),
NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
@@ -5341,6 +5349,34 @@
Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
}
+ case NEON::BI__builtin_neon_vfmlal_low_v:
+ case NEON::BI__builtin_neon_vfmlalq_low_v: {
+ llvm::Type *InputTy =
+ llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
+ llvm::Type *Tys[2] = { Ty, InputTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
+ }
+ case NEON::BI__builtin_neon_vfmlsl_low_v:
+ case NEON::BI__builtin_neon_vfmlslq_low_v: {
+ llvm::Type *InputTy =
+ llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
+ llvm::Type *Tys[2] = { Ty, InputTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
+ }
+ case NEON::BI__builtin_neon_vfmlal_high_v:
+ case NEON::BI__builtin_neon_vfmlalq_high_v: {
+ llvm::Type *InputTy =
+ llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
+ llvm::Type *Tys[2] = { Ty, InputTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
+ }
+ case NEON::BI__builtin_neon_vfmlsl_high_v:
+ case NEON::BI__builtin_neon_vfmlslq_high_v: {
+ llvm::Type *InputTy =
+ llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
+ llvm::Type *Tys[2] = { Ty, InputTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
+ }
}
assert(Int && "Expected valid intrinsic number");