AMDGPU: Improve nsw/nuw/exact when promoting uniform i16 ops
These were simply preserving the flags of the original operation,
which was too conservative in most cases and incorrect for mul.
nsw/nuw may be needed for some combines to cleanup messes when
intermediate sext_inregs are introduced later.
Tested valid combinations with alive.
llvm-svn: 293776
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 96131d4..23f124b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -59,8 +59,6 @@
/// binary operation \p V.
///
/// \returns Binary operation \p V.
- Value *copyFlags(const BinaryOperator &I, Value *V) const;
-
/// \returns \p T's base element bit width.
unsigned getBaseElementBitWidth(const Type *T) const;
@@ -156,21 +154,6 @@
} // end anonymous namespace
-Value *AMDGPUCodeGenPrepare::copyFlags(
- const BinaryOperator &I, Value *V) const {
- BinaryOperator *BinOp = dyn_cast<BinaryOperator>(V);
- if (!BinOp) // Possibly constant expression.
- return V;
-
- if (isa<OverflowingBinaryOperator>(BinOp)) {
- BinOp->setHasNoSignedWrap(I.hasNoSignedWrap());
- BinOp->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
- } else if (isa<PossiblyExactOperator>(BinOp))
- BinOp->setIsExact(I.isExact());
-
- return V;
-}
-
unsigned AMDGPUCodeGenPrepare::getBaseElementBitWidth(const Type *T) const {
assert(needsPromotionToI32(T) && "T does not need promotion to i32");
@@ -206,6 +189,34 @@
return needsPromotionToI32(cast<VectorType>(T)->getElementType());
}
+// Return true if the op promoted to i32 should have nsw set.
+static bool promotedOpIsNSW(const Instruction &I) {
+ switch (I.getOpcode()) {
+ case Instruction::Shl:
+ case Instruction::Add:
+ case Instruction::Sub:
+ return true;
+ case Instruction::Mul:
+ return I.hasNoUnsignedWrap();
+ default:
+ return false;
+ }
+}
+
+// Return true if the op promoted to i32 should have nuw set.
+static bool promotedOpIsNUW(const Instruction &I) {
+ switch (I.getOpcode()) {
+ case Instruction::Shl:
+ case Instruction::Add:
+ case Instruction::Mul:
+ return true;
+ case Instruction::Sub:
+ return I.hasNoUnsignedWrap();
+ default:
+ return false;
+ }
+}
+
bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(BinaryOperator &I) const {
assert(needsPromotionToI32(I.getType()) &&
"I does not need promotion to i32");
@@ -230,7 +241,19 @@
ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty);
ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
}
- ExtRes = copyFlags(I, Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1));
+
+ ExtRes = Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1);
+ if (Instruction *Inst = dyn_cast<Instruction>(ExtRes)) {
+ if (promotedOpIsNSW(cast<Instruction>(I)))
+ Inst->setHasNoSignedWrap();
+
+ if (promotedOpIsNUW(cast<Instruction>(I)))
+ Inst->setHasNoUnsignedWrap();
+
+ if (const auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I))
+ Inst->setIsExact(ExactOp->isExact());
+ }
+
TruncRes = Builder.CreateTrunc(ExtRes, I.getType());
I.replaceAllUsesWith(TruncRes);