[InstCombine] fold another shifty abs pattern to cmp+sel (PR36036)

The bug report:
https://bugs.llvm.org/show_bug.cgi?id=36036

...requests a DAG change for this, but an IR canonicalization
probably handles most cases. If we still want to match this
pattern in the backend, there's a proposal for that too:
D47831

Alive proofs including nsw/nuw cases that were first noted in:
D46988

https://rise4fun.com/Alive/Kmp

This patch is largely copied from the existing code that was
initially added with:
D40984
...but I didn't see much gain from trying to share code.

llvm-svn: 334137
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 0ad0771..9be0455 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1773,6 +1773,27 @@
     if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
       return replaceInstUsesWith(I, Res);
 
+  // Canonicalize a shifty way to code absolute value to the common pattern.
+  // There are 2 potential commuted variants.
+  // We're relying on the fact that we only do this transform when the shift has
+  // exactly 2 uses and the xor has exactly 1 use (otherwise, we might increase
+  // instructions).
+  Value *A;
+  const APInt *ShAmt;
+  Type *Ty = I.getType();
+  if (match(Op1, m_AShr(m_Value(A), m_APInt(ShAmt))) &&
+      Op1->hasNUses(2) && *ShAmt == Ty->getScalarSizeInBits() - 1 &&
+      match(Op0, m_OneUse(m_c_Xor(m_Specific(A), m_Specific(Op1))))) {
+    // B = ashr i32 A, 31 ; smear the sign bit
+    // sub (xor A, B), B  ; flip bits if negative and subtract -1 (add 1)
+    // --> (A < 0) ? -A : A
+    Value *Cmp = Builder.CreateICmpSLT(A, ConstantInt::getNullValue(Ty));
+    // Copy the nuw/nsw flags from the sub to the negate.
+    Value *Neg = Builder.CreateNeg(A, "", I.hasNoUnsignedWrap(),
+                                   I.hasNoSignedWrap());
+    return SelectInst::Create(Cmp, Neg, A);
+  }
+
   bool Changed = false;
   if (!I.hasNoSignedWrap() && willNotOverflowSignedSub(Op0, Op1, I)) {
     Changed = true;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 475c94f..7bf9547 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2728,7 +2728,7 @@
   if (Instruction *CastedXor = foldCastedBitwiseLogic(I))
     return CastedXor;
 
-  // Canonicalize the shifty way to code absolute value to the common pattern.
+  // Canonicalize a shifty way to code absolute value to the common pattern.
   // There are 4 potential commuted variants. Move the 'ashr' candidate to Op1.
   // We're relying on the fact that we only do this transform when the shift has
   // exactly 2 uses and the add has exactly 1 use (otherwise, we might increase
diff --git a/llvm/test/Transforms/InstCombine/abs-1.ll b/llvm/test/Transforms/InstCombine/abs-1.ll
index ab1fb3f..2f773dc 100644
--- a/llvm/test/Transforms/InstCombine/abs-1.ll
+++ b/llvm/test/Transforms/InstCombine/abs-1.ll
@@ -301,9 +301,9 @@
 
 define i8 @shifty_sub(i8 %x) {
 ; CHECK-LABEL: @shifty_sub(
-; CHECK-NEXT:    [[SH:%.*]] = ashr i8 [[X:%.*]], 7
-; CHECK-NEXT:    [[XOR:%.*]] = xor i8 [[SH]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = sub i8 [[XOR]], [[SH]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i8 0, [[X]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 [[X]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sh = ashr i8 %x, 7
@@ -314,9 +314,9 @@
 
 define i8 @shifty_sub_nsw_commute(i8 %x) {
 ; CHECK-LABEL: @shifty_sub_nsw_commute(
-; CHECK-NEXT:    [[SH:%.*]] = ashr i8 [[X:%.*]], 7
-; CHECK-NEXT:    [[XOR:%.*]] = xor i8 [[SH]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = sub nsw i8 [[XOR]], [[SH]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw i8 0, [[X]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 [[X]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sh = ashr i8 %x, 7
@@ -327,9 +327,8 @@
 
 define <4 x i32> @shifty_sub_nuw_vec_commute(<4 x i32> %x) {
 ; CHECK-LABEL: @shifty_sub_nuw_vec_commute(
-; CHECK-NEXT:    [[SH:%.*]] = ashr <4 x i32> [[X:%.*]], <i32 31, i32 31, i32 31, i32 31>
-; CHECK-NEXT:    [[XOR:%.*]] = xor <4 x i32> [[SH]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = sub nuw <4 x i32> [[XOR]], [[SH]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[X]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
   %sh = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
@@ -340,9 +339,8 @@
 
 define i12 @shifty_sub_nsw_nuw(i12 %x) {
 ; CHECK-LABEL: @shifty_sub_nsw_nuw(
-; CHECK-NEXT:    [[SH:%.*]] = ashr i12 [[X:%.*]], 11
-; CHECK-NEXT:    [[XOR:%.*]] = xor i12 [[SH]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = sub nuw nsw i12 [[XOR]], [[SH]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i12 [[X:%.*]], 0
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i12 [[X]], i12 0
 ; CHECK-NEXT:    ret i12 [[R]]
 ;
   %sh = ashr i12 %x, 11