[X86] Lowering addus/subus intrinsics to native IR
Summary: This is the patch that lowers x86 intrinsics to native IR in order to enable optimizations.
Reviewers: craig.topper, spatel, RKSimon
Reviewed By: craig.topper
Subscribers: cfe-commits
Differential Revision: https://reviews.llvm.org/D46892
llvm-svn: 339651
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 7456e18..bbd2904 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -8907,6 +8907,39 @@
return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
}
+// Emit addition or subtraction with saturation.
+// Handles both signed and unsigned intrinsics.
+static Value *EmitX86AddSubSatExpr(CodeGenFunction &CGF, const CallExpr *E,
+ SmallVectorImpl<Value *> &Ops,
+ bool IsAddition) {
+
+ // Collect vector elements and type data.
+ llvm::Type *ResultType = CGF.ConvertType(E->getType());
+
+ Value *Res;
+ if (IsAddition) {
+ // ADDUS: a > (a+b) ? ~0 : (a+b)
+ // If Ops[0] > Add, overflow occured.
+ Value *Add = CGF.Builder.CreateAdd(Ops[0], Ops[1]);
+ Value *ICmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0], Add);
+ Value *Max = llvm::Constant::getAllOnesValue(ResultType);
+ Res = CGF.Builder.CreateSelect(ICmp, Max, Add);
+ } else {
+ // SUBUS: max(a, b) - b
+ Value *ICmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0], Ops[1]);
+ Value *Select = CGF.Builder.CreateSelect(ICmp, Ops[0], Ops[1]);
+ Res = CGF.Builder.CreateSub(Select, Ops[1]);
+ }
+
+ if (E->getNumArgs() == 4) { // For masked intrinsics.
+ Value *VecSRC = Ops[2];
+ Value *Mask = Ops[3];
+ return EmitX86Select(CGF, Mask, Res, VecSRC);
+ }
+
+ return Res;
+}
+
Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
@@ -10530,10 +10563,23 @@
Load->setVolatile(true);
return Load;
}
+ case X86::BI__builtin_ia32_paddusb512_mask:
+ case X86::BI__builtin_ia32_paddusw512_mask:
+ case X86::BI__builtin_ia32_paddusb256:
+ case X86::BI__builtin_ia32_paddusw256:
+ case X86::BI__builtin_ia32_paddusb128:
+ case X86::BI__builtin_ia32_paddusw128:
+ return EmitX86AddSubSatExpr(*this, E, Ops, true /* IsAddition */);
+ case X86::BI__builtin_ia32_psubusb512_mask:
+ case X86::BI__builtin_ia32_psubusw512_mask:
+ case X86::BI__builtin_ia32_psubusb256:
+ case X86::BI__builtin_ia32_psubusw256:
+ case X86::BI__builtin_ia32_psubusb128:
+ case X86::BI__builtin_ia32_psubusw128:
+ return EmitX86AddSubSatExpr(*this, E, Ops, false /* IsAddition */);
}
}
-
Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
SmallVector<Value*, 4> Ops;