[X86] Replace VPCOM/VPCOMU with generic integer comparisons (clang)
These intrinsics can always be replaced with generic integer comparisons without any regression in codegen, even for -O0/-fast-isel cases.
Noticed while cleaning up vector integer comparison costs for PR40376.
A future commit will remove/autoupgrade the existing VPCOM/VPCOMU llvm intrinsics.
llvm-svn: 351687
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 6732915..390d28f 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9198,6 +9198,46 @@
return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
}
+static Value *EmitX86vpcom(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
+ bool IsSigned) {
+ Value *Op0 = Ops[0];
+ Value *Op1 = Ops[1];
+ llvm::Type *Ty = Op0->getType();
+ uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
+
+ CmpInst::Predicate Pred;
+ switch (Imm) {
+ case 0x0:
+ Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+ break;
+ case 0x1:
+ Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
+ break;
+ case 0x2:
+ Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+ break;
+ case 0x3:
+ Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
+ break;
+ case 0x4:
+ Pred = ICmpInst::ICMP_EQ;
+ break;
+ case 0x5:
+ Pred = ICmpInst::ICMP_NE;
+ break;
+ case 0x6:
+ return llvm::Constant::getNullValue(Ty); // FALSE
+ case 0x7:
+ return llvm::Constant::getAllOnesValue(Ty); // TRUE
+ default:
+ llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
+ }
+
+ Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
+ Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
+ return Res;
+}
+
static Value *EmitX86Select(CodeGenFunction &CGF,
Value *Mask, Value *Op0, Value *Op1) {
@@ -10928,6 +10968,16 @@
unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
return EmitX86MaskedCompare(*this, CC, false, Ops);
}
+ case X86::BI__builtin_ia32_vpcomb:
+ case X86::BI__builtin_ia32_vpcomw:
+ case X86::BI__builtin_ia32_vpcomd:
+ case X86::BI__builtin_ia32_vpcomq:
+ return EmitX86vpcom(*this, Ops, true);
+ case X86::BI__builtin_ia32_vpcomub:
+ case X86::BI__builtin_ia32_vpcomuw:
+ case X86::BI__builtin_ia32_vpcomud:
+ case X86::BI__builtin_ia32_vpcomuq:
+ return EmitX86vpcom(*this, Ops, false);
case X86::BI__builtin_ia32_kortestcqi:
case X86::BI__builtin_ia32_kortestchi: