[InstComine] Forego of one-use check in `(X - (X & Y))   -->   (X & ~Y)` if Y is a constant

Summary:
This is potentially more friendly for further optimizations,
analysies, e.g.: https://godbolt.org/z/G24anE

This resolves phase-ordering bug that was introduced
in D75145 for https://godbolt.org/z/2gBwF2
https://godbolt.org/z/XvgSua

Reviewers: spatel, nikic, dmgreen, xbolva00

Reviewed By: nikic, xbolva00

Subscribers: hiraditya, zzheng, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D75757
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 3eaa1b6..a781251 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1917,6 +1917,12 @@
       return NewSel;
   }
 
+  // (X - (X & Y))   -->   (X & ~Y)
+  if (match(Op1, m_c_And(m_Specific(Op0), m_Value(Y))) &&
+      (Op1->hasOneUse() || isa<Constant>(Y)))
+    return BinaryOperator::CreateAnd(
+        Op0, Builder.CreateNot(Y, Y->getName() + ".not"));
+
   if (Op1->hasOneUse()) {
     Value *Y = nullptr, *Z = nullptr;
     Constant *C = nullptr;
@@ -1926,11 +1932,6 @@
       return BinaryOperator::CreateAdd(Op0,
                                       Builder.CreateSub(Z, Y, Op1->getName()));
 
-    // (X - (X & Y))   -->   (X & ~Y)
-    if (match(Op1, m_c_And(m_Value(Y), m_Specific(Op0))))
-      return BinaryOperator::CreateAnd(Op0,
-                                  Builder.CreateNot(Y, Y->getName() + ".not"));
-
     // Subtracting -1/0 is the same as adding 1/0:
     // sub [nsw] Op0, sext(bool Y) -> add [nsw] Op0, zext(bool Y)
     // 'nuw' is dropped in favor of the canonical form.
diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll
index 1b8ef83..dc1426c 100644
--- a/llvm/test/Transforms/InstCombine/sub.ll
+++ b/llvm/test/Transforms/InstCombine/sub.ll
@@ -1531,7 +1531,7 @@
 ; CHECK-LABEL: @test75(
 ; CHECK-NEXT:    [[T0:%.*]] = and i8 [[X:%.*]], -8
 ; CHECK-NEXT:    call void @use8(i8 [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = sub i8 [[X]], [[T0]]
+; CHECK-NEXT:    [[T1:%.*]] = and i8 [[X]], 7
 ; CHECK-NEXT:    ret i8 [[T1]]
 ;
   %t0 = and i8 %x, -8
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
index e91bd2e..f905355 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
@@ -19,7 +19,7 @@
 ; EPILOG-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
 ; EPILOG-NEXT:    br i1 [[TMP1]], label %exit2.loopexit.unr-lcssa, label [[ENTRY_NEW:%.*]]
 ; EPILOG:       entry.new:
-; EPILOG-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[TRIP]], [[XTRAITER]]
+; EPILOG-NEXT:    [[UNROLL_ITER:%.*]] = and i64 [[TRIP]], -8
 ; EPILOG-NEXT:    br label [[LOOP_HEADER:%.*]]
 ; EPILOG:  loop_latch.epil:
 ; EPILOG-NEXT:     %epil.iter.sub = add i64 %epil.iter, -1
@@ -147,7 +147,7 @@
 ; EPILOG-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
 ; EPILOG-NEXT:    br i1 [[TMP1]], label %exit2.loopexit.unr-lcssa, label [[ENTRY_NEW:%.*]]
 ; EPILOG:       entry.new:
-; EPILOG-NEXT:    %unroll_iter = sub i64 [[TRIP]], [[XTRAITER]]
+; EPILOG-NEXT:    %unroll_iter = and i64 [[TRIP]], -8
 ; EPILOG-NEXT:    br label [[LOOP_HEADER:%.*]]
 ; EPILOG:  loop_header:
 ; EPILOG-NEXT:     %sum = phi i64 [ 0, %entry.new ], [ %sum.next.7, %loop_latch.7 ]
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll b/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
index b85e09b..6ea1b54 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
@@ -21,7 +21,7 @@
 ; CHECK: br i1 [[CMP]], label %[[CLEANUP:.*]], label %for.body.lr.ph.new
 
 ; CHECK-LABEL: for.body.lr.ph.new:
-; CHECK: %unroll_iter = sub nsw i64 %wide.trip.count, %xtraiter
+; CHECK: %unroll_iter = and i64 %wide.trip.count, 4294967292
 ; CHECK: br label %for.body
 
 ; CHECK: [[CLEANUP]]:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
index 3d57991..4ad2db4 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
@@ -35,7 +35,7 @@
 ; AUTO_VEC-NEXT:    [[TMP4:%.*]] = icmp ult i64 [[TMP1]], 96
 ; AUTO_VEC-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]]
 ; AUTO_VEC:       vector.ph.new:
-; AUTO_VEC-NEXT:    [[UNROLL_ITER:%.*]] = sub nsw i64 [[TMP3]], [[XTRAITER]]
+; AUTO_VEC-NEXT:    [[UNROLL_ITER:%.*]] = and i64 [[TMP3]], 1152921504606846972
 ; AUTO_VEC-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; AUTO_VEC:       vector.body:
 ; AUTO_VEC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_3:%.*]], [[VECTOR_BODY]] ]
@@ -203,7 +203,7 @@
 ; AUTO_VEC-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
 ; AUTO_VEC-NEXT:    br i1 [[TMP1]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
 ; AUTO_VEC:       for.body.preheader.new:
-; AUTO_VEC-NEXT:    [[UNROLL_ITER:%.*]] = sub nsw i64 [[ZEXT]], [[XTRAITER]]
+; AUTO_VEC-NEXT:    [[UNROLL_ITER:%.*]] = and i64 [[ZEXT]], 4294967288
 ; AUTO_VEC-NEXT:    br label [[FOR_BODY:%.*]]
 ; AUTO_VEC:       for.body:
 ; AUTO_VEC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ]
@@ -306,7 +306,7 @@
 ; AUTO_VEC-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP2]], 48
 ; AUTO_VEC-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]]
 ; AUTO_VEC:       vector.ph.new:
-; AUTO_VEC-NEXT:    [[UNROLL_ITER:%.*]] = sub nsw i64 [[TMP4]], [[XTRAITER]]
+; AUTO_VEC-NEXT:    [[UNROLL_ITER:%.*]] = and i64 [[TMP4]], 2305843009213693948
 ; AUTO_VEC-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; AUTO_VEC:       vector.body:
 ; AUTO_VEC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_3:%.*]], [[VECTOR_BODY]] ]
@@ -459,7 +459,7 @@
 ; AUTO_VEC-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 7
 ; AUTO_VEC-NEXT:    br i1 [[TMP2]], label [[FOR_END_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
 ; AUTO_VEC:       entry.new:
-; AUTO_VEC-NEXT:    [[UNROLL_ITER:%.*]] = sub nsw i64 [[SMAX]], [[XTRAITER]]
+; AUTO_VEC-NEXT:    [[UNROLL_ITER:%.*]] = and i64 [[SMAX]], 9223372036854775800
 ; AUTO_VEC-NEXT:    br label [[FOR_BODY:%.*]]
 ; AUTO_VEC:       for.body:
 ; AUTO_VEC-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[I_NEXT_7:%.*]], [[FOR_BODY]] ]
diff --git a/llvm/test/Transforms/PhaseOrdering/reassociate-after-unroll.ll b/llvm/test/Transforms/PhaseOrdering/reassociate-after-unroll.ll
index 0441e8a..aff7dcc 100644
--- a/llvm/test/Transforms/PhaseOrdering/reassociate-after-unroll.ll
+++ b/llvm/test/Transforms/PhaseOrdering/reassociate-after-unroll.ll
@@ -19,7 +19,7 @@
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_LR_PH_NEW:%.*]]
 ; CHECK:       for.body.lr.ph.new:
-; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[LIMIT]], [[XTRAITER]]
+; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = and i64 [[LIMIT]], -8
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup.loopexit.unr-lcssa:
 ; CHECK-NEXT:    [[ADD_LCSSA_PH:%.*]] = phi i64 [ undef, [[FOR_BODY_LR_PH]] ], [ [[ADD_7:%.*]], [[FOR_BODY]] ]
@@ -66,7 +66,7 @@
 ; NPM-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
 ; NPM-NEXT:    br i1 [[TMP1]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_LR_PH_NEW:%.*]]
 ; NPM:       for.body.lr.ph.new:
-; NPM-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[LIMIT]], [[XTRAITER]]
+; NPM-NEXT:    [[UNROLL_ITER:%.*]] = and i64 [[LIMIT]], -8
 ; NPM-NEXT:    [[AND_0:%.*]] = and i64 [[CONV]], 1
 ; NPM-NEXT:    br label [[FOR_BODY:%.*]]
 ; NPM:       for.cond.cleanup.loopexit.unr-lcssa: