Implement aarch64 neon instruction set AdvSIMD (Across).
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@192029 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 2fe4167..52e40db 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -1748,209 +1748,245 @@
static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
unsigned BuiltinID,
const CallExpr *E) {
- NeonTypeFlags::EltType ET;
- bool usgn;
unsigned int Int = 0;
- bool OverloadInt = true;
+ // Scalar result generated across vectors
+ bool AcrossVec = false;
+ // Extend element of one-element vector
+ bool ExtendEle = false;
+ bool OverloadInt = false;
const char *s = NULL;
- SmallVector<Value *, 4> Ops;
- for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
- Ops.push_back(CGF.EmitScalarExpr(E->getArg(i)));
- }
-
// AArch64 scalar builtins are not overloaded, they do not have an extra
// argument that specifies the vector type, need to handle each case.
switch (BuiltinID) {
default: break;
// Scalar Add
case AArch64::BI__builtin_neon_vaddd_s64:
- ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vaddds;
- s = "vaddds"; usgn = false; OverloadInt = false; break;
+ Int = Intrinsic::aarch64_neon_vaddds;
+ s = "vaddds"; break;
case AArch64::BI__builtin_neon_vaddd_u64:
- ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vadddu;
- s = "vadddu"; usgn = true; OverloadInt = false; break;
+ Int = Intrinsic::aarch64_neon_vadddu;
+ s = "vadddu"; break;
// Scalar Sub
case AArch64::BI__builtin_neon_vsubd_s64:
- ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vsubds;
- s = "vsubds"; usgn = false; OverloadInt = false; break;
+ Int = Intrinsic::aarch64_neon_vsubds;
+ s = "vsubds"; break;
case AArch64::BI__builtin_neon_vsubd_u64:
- ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vsubdu;
- s = "vsubdu"; usgn = true; OverloadInt = false; break;
+ Int = Intrinsic::aarch64_neon_vsubdu;
+ s = "vsubdu"; break;
// Scalar Saturating Add
case AArch64::BI__builtin_neon_vqaddb_s8:
- ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqadds;
- s = "vqadds"; usgn = false; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqaddh_s16:
- ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqadds;
- s = "vqadds"; usgn = false; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqadds_s32:
- ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqadds;
- s = "vqadds"; usgn = false; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqaddd_s64:
- ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqadds;
- s = "vqadds"; usgn = false; OverloadInt = true; break;
+ Int = Intrinsic::aarch64_neon_vqadds;
+ s = "vqadds"; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqaddb_u8:
- ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqaddu;
- s = "vqaddu"; usgn = true; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqaddh_u16:
- ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqaddu;
- s = "vqaddu"; usgn = true; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqadds_u32:
- ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqaddu;
- s = "vqaddu"; usgn = true; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqaddd_u64:
- ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqaddu;
- s = "vqaddu"; usgn = true; OverloadInt = true; break;
+ Int = Intrinsic::aarch64_neon_vqaddu;
+ s = "vqaddu"; OverloadInt = true; break;
// Scalar Saturating Sub
case AArch64::BI__builtin_neon_vqsubb_s8:
- ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqsubs;
- s = "vqsubs"; usgn = false; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqsubh_s16:
- ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqsubs;
- s = "vqsubs"; usgn = false; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqsubs_s32:
- ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqsubs;
- s = "vqsubs"; usgn = false; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqsubd_s64:
- ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqsubs;
- s = "vqsubs"; usgn = false; OverloadInt = true; break;
+ Int = Intrinsic::aarch64_neon_vqsubs;
+ s = "vqsubs"; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqsubb_u8:
- ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqsubu;
- s = "vqsubu"; usgn = true; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqsubh_u16:
- ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqsubu;
- s = "vqsubu"; usgn = true; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqsubs_u32:
- ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqsubu;
- s = "vqsubu"; usgn = true; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqsubd_u64:
- ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqsubu;
- s = "vqsubu"; usgn = true; OverloadInt = true; break;
+ Int = Intrinsic::aarch64_neon_vqsubu;
+ s = "vqsubu"; OverloadInt = true; break;
// Scalar Shift Left
case AArch64::BI__builtin_neon_vshld_s64:
- ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vshlds;
- s = "vshlds"; usgn = false; OverloadInt=false; break;
+ Int = Intrinsic::aarch64_neon_vshlds;
+ s = "vshlds"; break;
case AArch64::BI__builtin_neon_vshld_u64:
- ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vshldu;
- s = "vshldu"; usgn = true; OverloadInt = false; break;
+ Int = Intrinsic::aarch64_neon_vshldu;
+ s = "vshldu"; break;
// Scalar Saturating Shift Left
case AArch64::BI__builtin_neon_vqshlb_s8:
- ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqshls;
- s = "vqshls"; usgn = false; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqshlh_s16:
- ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqshls;
- s = "vqshls"; usgn = false; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqshls_s32:
- ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqshls;
- s = "vqshls"; usgn = false; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqshld_s64:
- ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqshls;
- s = "vqshls"; usgn = false; OverloadInt = true; break;
+ Int = Intrinsic::aarch64_neon_vqshls;
+ s = "vqshls"; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqshlb_u8:
- ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqshlu;
- s = "vqshlu"; usgn = true; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqshlh_u16:
- ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqshlu;
- s = "vqshlu"; usgn = true; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqshls_u32:
- ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqshlu;
- s = "vqshlu"; usgn = true; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqshld_u64:
- ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqshlu;
- s = "vqshlu"; usgn = true; OverloadInt = true; break;
+ Int = Intrinsic::aarch64_neon_vqshlu;
+ s = "vqshlu"; OverloadInt = true; break;
// Scalar Rouding Shift Left
case AArch64::BI__builtin_neon_vrshld_s64:
- ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vrshlds;
- s = "vrshlds"; usgn = false; OverloadInt=false; break;
+ Int = Intrinsic::aarch64_neon_vrshlds;
+ s = "vrshlds"; break;
case AArch64::BI__builtin_neon_vrshld_u64:
- ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vrshldu;
- s = "vrshldu"; usgn = true; OverloadInt=false; break;
+ Int = Intrinsic::aarch64_neon_vrshldu;
+ s = "vrshldu"; break;
// Scalar Saturating Rouding Shift Left
case AArch64::BI__builtin_neon_vqrshlb_s8:
- ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqrshls;
- s = "vqrshls"; usgn = false; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqrshlh_s16:
- ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqrshls;
- s = "vqrshls"; usgn = false; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqrshls_s32:
- ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqrshls;
- s = "vqrshls"; usgn = false; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqrshld_s64:
- ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqrshls;
- s = "vqrshls"; usgn = false; OverloadInt = true; break;
+ Int = Intrinsic::aarch64_neon_vqrshls;
+ s = "vqrshls"; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqrshlb_u8:
- ET = NeonTypeFlags::Int8; Int = Intrinsic::aarch64_neon_vqrshlu;
- s = "vqrshlu"; usgn = true; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqrshlh_u16:
- ET = NeonTypeFlags::Int16; Int = Intrinsic::aarch64_neon_vqrshlu;
- s = "vqrshlu"; usgn = true; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqrshls_u32:
- ET = NeonTypeFlags::Int32; Int = Intrinsic::aarch64_neon_vqrshlu;
- s = "vqrshlu"; usgn = true; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqrshld_u64:
- ET = NeonTypeFlags::Int64; Int = Intrinsic::aarch64_neon_vqrshlu;
- s = "vqrshlu"; usgn = true; OverloadInt = true; break;
+ Int = Intrinsic::aarch64_neon_vqrshlu;
+ s = "vqrshlu"; OverloadInt = true; break;
// Scalar Reduce Pairwise Add
case AArch64::BI__builtin_neon_vpaddd_s64:
Int = Intrinsic::aarch64_neon_vpadd; s = "vpadd";
- OverloadInt = false; break;
+ break;
case AArch64::BI__builtin_neon_vpadds_f32:
Int = Intrinsic::aarch64_neon_vpfadd; s = "vpfadd";
- OverloadInt = false; break;
+ break;
case AArch64::BI__builtin_neon_vpaddd_f64:
Int = Intrinsic::aarch64_neon_vpfaddq; s = "vpfaddq";
- OverloadInt = false; break;
+ break;
// Scalar Reduce Pairwise Floating Point Max
case AArch64::BI__builtin_neon_vpmaxs_f32:
Int = Intrinsic::aarch64_neon_vpmax; s = "vpmax";
- OverloadInt = false; break;
+ break;
case AArch64::BI__builtin_neon_vpmaxqd_f64:
Int = Intrinsic::aarch64_neon_vpmaxq; s = "vpmaxq";
- OverloadInt = false; break;
+ break;
// Scalar Reduce Pairwise Floating Point Min
case AArch64::BI__builtin_neon_vpmins_f32:
Int = Intrinsic::aarch64_neon_vpmin; s = "vpmin";
- OverloadInt = false; break;
+ break;
case AArch64::BI__builtin_neon_vpminqd_f64:
Int = Intrinsic::aarch64_neon_vpminq; s = "vpminq";
- OverloadInt = false; break;
+ break;
// Scalar Reduce Pairwise Floating Point Maxnm
case AArch64::BI__builtin_neon_vpmaxnms_f32:
Int = Intrinsic::aarch64_neon_vpfmaxnm; s = "vpfmaxnm";
- OverloadInt = false; break;
+ break;
case AArch64::BI__builtin_neon_vpmaxnmqd_f64:
Int = Intrinsic::aarch64_neon_vpfmaxnmq; s = "vpfmaxnmq";
- OverloadInt = false; break;
- // Scalar Reduce Pairwise Floating Point Minnm
+ break;
+ // Scalar Reduce Pairwise Floating Point Minnm
case AArch64::BI__builtin_neon_vpminnms_f32:
Int = Intrinsic::aarch64_neon_vpfminnm; s = "vpfminnm";
- OverloadInt = false; break;
+ break;
case AArch64::BI__builtin_neon_vpminnmqd_f64:
Int = Intrinsic::aarch64_neon_vpfminnmq; s = "vpfminnmq";
- OverloadInt = false; break;
+ break;
+ // The followings are intrinsics with scalar results generated AcrossVec vectors
+ case AArch64::BI__builtin_neon_vaddlv_s8:
+ case AArch64::BI__builtin_neon_vaddlv_s16:
+ case AArch64::BI__builtin_neon_vaddlvq_s8:
+ case AArch64::BI__builtin_neon_vaddlvq_s16:
+ case AArch64::BI__builtin_neon_vaddlvq_s32:
+ Int = Intrinsic::aarch64_neon_saddlv;
+ AcrossVec = true; ExtendEle = true; s = "saddlv"; break;
+ case AArch64::BI__builtin_neon_vaddlv_u8:
+ case AArch64::BI__builtin_neon_vaddlv_u16:
+ case AArch64::BI__builtin_neon_vaddlvq_u8:
+ case AArch64::BI__builtin_neon_vaddlvq_u16:
+ case AArch64::BI__builtin_neon_vaddlvq_u32:
+ Int = Intrinsic::aarch64_neon_uaddlv;
+ AcrossVec = true; ExtendEle = true; s = "uaddlv"; break;
+ case AArch64::BI__builtin_neon_vmaxv_s8:
+ case AArch64::BI__builtin_neon_vmaxv_s16:
+ case AArch64::BI__builtin_neon_vmaxvq_s8:
+ case AArch64::BI__builtin_neon_vmaxvq_s16:
+ case AArch64::BI__builtin_neon_vmaxvq_s32:
+ Int = Intrinsic::aarch64_neon_smaxv;
+ AcrossVec = true; ExtendEle = false; s = "smaxv"; break;
+ case AArch64::BI__builtin_neon_vmaxv_u8:
+ case AArch64::BI__builtin_neon_vmaxv_u16:
+ case AArch64::BI__builtin_neon_vmaxvq_u8:
+ case AArch64::BI__builtin_neon_vmaxvq_u16:
+ case AArch64::BI__builtin_neon_vmaxvq_u32:
+ Int = Intrinsic::aarch64_neon_umaxv;
+ AcrossVec = true; ExtendEle = false; s = "umaxv"; break;
+ case AArch64::BI__builtin_neon_vminv_s8:
+ case AArch64::BI__builtin_neon_vminv_s16:
+ case AArch64::BI__builtin_neon_vminvq_s8:
+ case AArch64::BI__builtin_neon_vminvq_s16:
+ case AArch64::BI__builtin_neon_vminvq_s32:
+ Int = Intrinsic::aarch64_neon_sminv;
+ AcrossVec = true; ExtendEle = false; s = "sminv"; break;
+ case AArch64::BI__builtin_neon_vminv_u8:
+ case AArch64::BI__builtin_neon_vminv_u16:
+ case AArch64::BI__builtin_neon_vminvq_u8:
+ case AArch64::BI__builtin_neon_vminvq_u16:
+ case AArch64::BI__builtin_neon_vminvq_u32:
+ Int = Intrinsic::aarch64_neon_uminv;
+ AcrossVec = true; ExtendEle = false; s = "uminv"; break;
+ case AArch64::BI__builtin_neon_vaddv_s8:
+ case AArch64::BI__builtin_neon_vaddv_s16:
+ case AArch64::BI__builtin_neon_vaddvq_s8:
+ case AArch64::BI__builtin_neon_vaddvq_s16:
+ case AArch64::BI__builtin_neon_vaddvq_s32:
+ case AArch64::BI__builtin_neon_vaddv_u8:
+ case AArch64::BI__builtin_neon_vaddv_u16:
+ case AArch64::BI__builtin_neon_vaddvq_u8:
+ case AArch64::BI__builtin_neon_vaddvq_u16:
+ case AArch64::BI__builtin_neon_vaddvq_u32:
+ Int = Intrinsic::aarch64_neon_vaddv;
+ AcrossVec = true; ExtendEle = false; s = "vaddv"; break;
+ case AArch64::BI__builtin_neon_vmaxvq_f32:
+ Int = Intrinsic::aarch64_neon_vmaxv;
+ AcrossVec = true; ExtendEle = false; s = "vmaxv"; break;
+ case AArch64::BI__builtin_neon_vminvq_f32:
+ Int = Intrinsic::aarch64_neon_vminv;
+ AcrossVec = true; ExtendEle = false; s = "vminv"; break;
+ case AArch64::BI__builtin_neon_vmaxnmvq_f32:
+ Int = Intrinsic::aarch64_neon_vmaxnmv;
+ AcrossVec = true; ExtendEle = false; s = "vmaxnmv"; break;
+ case AArch64::BI__builtin_neon_vminnmvq_f32:
+ Int = Intrinsic::aarch64_neon_vminnmv;
+ AcrossVec = true; ExtendEle = false; s = "vminnmv"; break;
}
if (!Int)
return 0;
// AArch64 scalar builtin that returns scalar type
- // and should be mapped to AArch64 intrinsic that takes
- // one-element vector type arguments and returns
+ // and should be mapped to AArch64 intrinsic that returns
// one-element vector type.
- llvm::Type *Ty = 0;
Function *F = 0;
- if (OverloadInt) {
+ SmallVector<Value *, 4> Ops;
+ if (AcrossVec) {
+ // Gen arg type
+ const Expr *Arg = E->getArg(E->getNumArgs()-1);
+ llvm::Type *Ty = CGF.ConvertType(Arg->getType());
+ llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
+ llvm::Type *ETy = VTy->getElementType();
+ llvm::VectorType *RTy = llvm::VectorType::get(ETy, 1);
+
+ if (ExtendEle) {
+ assert(!ETy->isFloatingPointTy());
+ RTy = llvm::VectorType::getExtendedElementVectorType(RTy);
+ }
+
+ llvm::Type *Tys[2] = {RTy, VTy};
+ F = CGF.CGM.getIntrinsic(Int, Tys);
+ assert(E->getNumArgs() == 1);
+ }
+ else if (OverloadInt) {
// Determine the type of this overloaded AArch64 intrinsic
- NeonTypeFlags Type(ET, usgn, false);
- llvm::VectorType *VTy = GetNeonType(&CGF, Type, true);
- Ty = VTy;
- if (!Ty)
- return 0;
- F = CGF.CGM.getIntrinsic(Int, Ty);
+ const Expr *Arg = E->getArg(E->getNumArgs()-1);
+ llvm::Type *Ty = CGF.ConvertType(Arg->getType());
+ llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1);
+ assert(VTy);
+
+ F = CGF.CGM.getIntrinsic(Int, VTy);
} else
F = CGF.CGM.getIntrinsic(Int);
+ for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
+ Ops.push_back(CGF.EmitScalarExpr(E->getArg(i)));
+ }
+
Value *Result = CGF.EmitNeonCall(F, Ops, s);
llvm::Type *ResultType = CGF.ConvertType(E->getType());
// AArch64 intrinsic one-element vector type cast to
@@ -1959,7 +1995,7 @@
}
Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
- const CallExpr *E) {
+ const CallExpr *E) {
// Process AArch64 scalar builtins
if (Value *Result = EmitAArch64ScalarBuiltinExpr(*this, BuiltinID, E))