Strength reduce intrinsics with overflow into regular arithmetic operations if possible. Some intrinsics, like s/uadd.with.overflow and umul.with.overflow, are already strength reduced. This change adds other arithmetic intrinsics: s/usub.with.overflow, smul.with.overflow. It completes the work on PR20194. llvm-svn: 224417

commit: a451b9b0b547d01939b6ecf878a41a696eb50678 [log] [tgz]
author: Erik Eckstein <eeckstein@apple.com> Wed Dec 17 07:29:19 2014 +0000
committer: Erik Eckstein <eeckstein@apple.com> Wed Dec 17 07:29:19 2014 +0000
tree: cedd9a38597bffd26115f973cd4ec7256a4ba116
parent: 92731d26bcbb82f58ff7cc65f1f762b84198f82e [diff]
diff --git a/llvm/lib/Transforms/InstCombine/InstCombine.h b/llvm/lib/Transforms/InstCombine/InstCombine.h
index 755de87..326bf8f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombine.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombine.h

@@ -285,6 +285,7 @@
   bool WillNotOverflowUnsignedAdd(Value *LHS, Value *RHS, Instruction *CxtI);
   bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction *CxtI);
   bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction *CxtI);
+  bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction *CxtI);
   Value *EmitGEPOffset(User *GEP);
   Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
   Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask);

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 902b640..37ae797 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

@@ -1008,6 +1008,51 @@
   return false;
 }
 
+/// \brief Return true if we can prove that:
+///    (mul LHS, RHS)  === (mul nsw LHS, RHS)
+bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS,
+                                            Instruction *CxtI) {
+  if (IntegerType *IT = dyn_cast<IntegerType>(LHS->getType())) {
+
+    // Multiplying n * m significant bits yields a result of n + m significant
+    // bits. If the total number of significant bits does not exceed the
+    // result bit width (minus 1), there is no overflow.
+    // This means if we have enough leading sign bits in the operands
+    // we can guarantee that the result does not overflow.
+    // Ref: "Hacker's Delight" by Henry Warren
+    unsigned BitWidth = IT->getBitWidth();
+
+    // Note that underestimating the number of sign bits gives a more
+    // conservative answer.
+    unsigned SignBits = ComputeNumSignBits(LHS, 0, CxtI) +
+                        ComputeNumSignBits(RHS, 0, CxtI);
+
+    // First handle the easy case: if we have enough sign bits there's
+    // definitely no overflow. 
+    if (SignBits > BitWidth + 1)
+      return true;
+    
+    // There are two ambiguous cases where there can be no overflow:
+    //   SignBits == BitWidth + 1    and
+    //   SignBits == BitWidth    
+    // The second case is difficult to check, therefore we only handle the
+    // first case.
+    if (SignBits == BitWidth + 1) {
+      // It overflows only when both arguments are negative and the true
+      // product is exactly the minimum negative number.
+      // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000
+      // For simplicity we just check if at least one side is not negative.
+      bool LHSNonNegative, LHSNegative;
+      bool RHSNonNegative, RHSNegative;
+      ComputeSignBit(LHS, LHSNonNegative, LHSNegative, DL, 0, AT, CxtI, DT);
+      ComputeSignBit(RHS, RHSNonNegative, RHSNegative, DL, 0, AT, CxtI, DT);
+      if (LHSNonNegative || RHSNonNegative)
+        return true;
+    }
+  }
+  return false;
+}
+
 // Checks if any operand is negative and we can convert add to sub.
 // This function checks for following negative patterns
 //   ADD(XOR(OR(Z, NOT(C)), C)), 1) == NEG(AND(Z, C))

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 33af7ee..b214b55 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

@@ -427,6 +427,15 @@
         return CreateOverflowTuple(II, LHS, false, /*ReUseName*/false);
       }
     }
+    if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow) {
+      if (WillNotOverflowSignedSub(LHS, RHS, II)) {
+        return CreateOverflowTuple(II, Builder->CreateNSWSub(LHS, RHS), false);
+      }
+    } else {
+      if (WillNotOverflowUnsignedSub(LHS, RHS, II)) {
+        return CreateOverflowTuple(II, Builder->CreateNUWSub(LHS, RHS), false);
+      }
+    }
     break;
   }
   case Intrinsic::umul_with_overflow: {
@@ -477,6 +486,12 @@
                                     /*ReUseName*/false);
       }
     }
+    if (II->getIntrinsicID() == Intrinsic::smul_with_overflow) {
+      Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+      if (WillNotOverflowSignedMul(LHS, RHS, II)) {
+        return CreateOverflowTuple(II, Builder->CreateNSWMul(LHS, RHS), false);
+      }
+    }
     break;
   case Intrinsic::minnum:
   case Intrinsic::maxnum: {
commit	a451b9b0b547d01939b6ecf878a41a696eb50678	[log] [tgz]
author	Erik Eckstein <eeckstein@apple.com>	Wed Dec 17 07:29:19 2014 +0000
committer	Erik Eckstein <eeckstein@apple.com>	Wed Dec 17 07:29:19 2014 +0000
tree	cedd9a38597bffd26115f973cd4ec7256a4ba116
parent	92731d26bcbb82f58ff7cc65f1f762b84198f82e [diff]