[ARM] Enabling range checks on Neon intrinsics' lane arguments
Summary:
Range checks were not properly performed in the lane arguments of Neon
intrinsics implemented based on splat operations. Calls to those
intrinsics where translated to `__builtin__shufflevector` calls directly
by the pre-processor through the arm_neon.h macros, missing the chance
for the proper range checks.
This patch enables the range check by introducing an auxiliary splat
instruction in arm_neon.td, delaying the translation to shufflevector
calls to CGBuiltin.cpp in clang after the checks were performed.
Reviewers: jmolloy, t.p.northover, rsmith, olista01, ostannard
Reviewed By: ostannard
Subscribers: ostannard, dnsampaio, danielkiss, kristof.beyls, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D74619
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b0be1ec..d0ac4dd 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4495,10 +4495,15 @@
}
}
+Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C,
+ const ElementCount &Count) {
+ Value *SV = llvm::ConstantVector::getSplat(Count, C);
+ return Builder.CreateShuffleVector(V, V, SV, "lane");
+}
+
Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
ElementCount EC = V->getType()->getVectorElementCount();
- Value *SV = llvm::ConstantVector::getSplat(EC, C);
- return Builder.CreateShuffleVector(V, V, SV, "lane");
+ return EmitNeonSplat(V, C, EC);
}
Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
@@ -4605,6 +4610,10 @@
TypeModifier }
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
+ NEONMAP0(splat_lane_v),
+ NEONMAP0(splat_laneq_v),
+ NEONMAP0(splatq_lane_v),
+ NEONMAP0(splatq_laneq_v),
NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
NEONMAP1(vabs_v, arm_neon_vabs, 0),
@@ -4886,6 +4895,10 @@
};
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
+ NEONMAP0(splat_lane_v),
+ NEONMAP0(splat_laneq_v),
+ NEONMAP0(splatq_lane_v),
+ NEONMAP0(splatq_laneq_v),
NEONMAP1(vabs_v, aarch64_neon_abs, 0),
NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
NEONMAP0(vaddhn_v),
@@ -5460,6 +5473,19 @@
switch (BuiltinID) {
default: break;
+ case NEON::BI__builtin_neon_splat_lane_v:
+ case NEON::BI__builtin_neon_splat_laneq_v:
+ case NEON::BI__builtin_neon_splatq_lane_v:
+ case NEON::BI__builtin_neon_splatq_laneq_v: {
+ auto NumElements = VTy->getElementCount();
+ if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
+ NumElements = NumElements * 2;
+ if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
+ NumElements = NumElements / 2;
+
+ Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
+ return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
+ }
case NEON::BI__builtin_neon_vpadd_v:
case NEON::BI__builtin_neon_vpaddq_v:
// We don't allow fp/int overloading of intrinsics.