AMDGPU: Improve nsw/nuw/exact when promoting uniform i16 ops These were simply preserving the flags of the original operation, which was too conservative in most cases and incorrect for mul. nsw/nuw may be needed for some combines to cleanup messes when intermediate sext_inregs are introduced later. Tested valid combinations with alive. llvm-svn: 293776

commit: d59e6404559f9adc510b1d89bae7177468c6d8c9 [log] [tgz]
author: Matt Arsenault <Matthew.Arsenault@amd.com> Wed Feb 01 16:25:23 2017 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> Wed Feb 01 16:25:23 2017 +0000
tree: 9955c89f3584d012d2f6bcb34e91de2d859619c9
parent: 08da2e28eebe4f1f3ef0beef91a83287e9b3b352 [diff] [blame]
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 96131d4..23f124b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

@@ -59,8 +59,6 @@
   /// binary operation \p V.
   ///
   /// \returns Binary operation \p V.
-  Value *copyFlags(const BinaryOperator &I, Value *V) const;
-
   /// \returns \p T's base element bit width.
   unsigned getBaseElementBitWidth(const Type *T) const;
 
@@ -156,21 +154,6 @@
 
 } // end anonymous namespace
 
-Value *AMDGPUCodeGenPrepare::copyFlags(
-    const BinaryOperator &I, Value *V) const {
-  BinaryOperator *BinOp = dyn_cast<BinaryOperator>(V);
-  if (!BinOp) // Possibly constant expression.
-    return V;
-
-  if (isa<OverflowingBinaryOperator>(BinOp)) {
-    BinOp->setHasNoSignedWrap(I.hasNoSignedWrap());
-    BinOp->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
-  } else if (isa<PossiblyExactOperator>(BinOp))
-    BinOp->setIsExact(I.isExact());
-
-  return V;
-}
-
 unsigned AMDGPUCodeGenPrepare::getBaseElementBitWidth(const Type *T) const {
   assert(needsPromotionToI32(T) && "T does not need promotion to i32");
 
@@ -206,6 +189,34 @@
   return needsPromotionToI32(cast<VectorType>(T)->getElementType());
 }
 
+// Return true if the op promoted to i32 should have nsw set.
+static bool promotedOpIsNSW(const Instruction &I) {
+  switch (I.getOpcode()) {
+  case Instruction::Shl:
+  case Instruction::Add:
+  case Instruction::Sub:
+    return true;
+  case Instruction::Mul:
+    return I.hasNoUnsignedWrap();
+  default:
+    return false;
+  }
+}
+
+// Return true if the op promoted to i32 should have nuw set.
+static bool promotedOpIsNUW(const Instruction &I) {
+  switch (I.getOpcode()) {
+  case Instruction::Shl:
+  case Instruction::Add:
+  case Instruction::Mul:
+    return true;
+  case Instruction::Sub:
+    return I.hasNoUnsignedWrap();
+  default:
+    return false;
+  }
+}
+
 bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(BinaryOperator &I) const {
   assert(needsPromotionToI32(I.getType()) &&
          "I does not need promotion to i32");
@@ -230,7 +241,19 @@
     ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty);
     ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
   }
-  ExtRes = copyFlags(I, Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1));
+
+  ExtRes = Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1);
+  if (Instruction *Inst = dyn_cast<Instruction>(ExtRes)) {
+    if (promotedOpIsNSW(cast<Instruction>(I)))
+      Inst->setHasNoSignedWrap();
+
+    if (promotedOpIsNUW(cast<Instruction>(I)))
+      Inst->setHasNoUnsignedWrap();
+
+    if (const auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I))
+      Inst->setIsExact(ExactOp->isExact());
+  }
+
   TruncRes = Builder.CreateTrunc(ExtRes, I.getType());
 
   I.replaceAllUsesWith(TruncRes);
commit	d59e6404559f9adc510b1d89bae7177468c6d8c9	[log] [tgz]
author	Matt Arsenault <Matthew.Arsenault@amd.com>	Wed Feb 01 16:25:23 2017 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	Wed Feb 01 16:25:23 2017 +0000
tree	9955c89f3584d012d2f6bcb34e91de2d859619c9
parent	08da2e28eebe4f1f3ef0beef91a83287e9b3b352 [diff] [blame]