[JumpThreading] Fix threading with unusual PHI nodes.

If the block being cloned contains a PHI node, in general, we need to
clone that PHI node, even though it's trivial. If the operand of the PHI
is an instruction in the block being cloned, the correct value for the
operand doesn't exist until SSAUpdater constructs it.

We usually don't hit this issue because we try to avoid threading across
loop headers, but it's possible to hit this in some cases involving
irreducible CFGs.  I added a flag to allow threading across loop headers
to make the testcase easier to understand.

Thanks to Brian Rzycki for reducing the testcase.

Fixes https://bugs.llvm.org/show_bug.cgi?id=42085.

Differential Revision: https://reviews.llvm.org/D63913

llvm-svn: 365094
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index c80cdd0..b86bf2f 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -102,6 +102,12 @@
     cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false),
     cl::Hidden);
 
+static cl::opt<bool> ThreadAcrossLoopHeaders(
+    "jump-threading-across-loop-headers",
+    cl::desc("Allow JumpThreading to thread across loop headers, for testing"),
+    cl::init(false), cl::Hidden);
+
+
 namespace {
 
   /// This pass performs 'jump threading', which looks at blocks that have
@@ -368,7 +374,8 @@
     if (!DT.isReachableFromEntry(&BB))
       Unreachable.insert(&BB);
 
-  FindLoopHeaders(F);
+  if (!ThreadAcrossLoopHeaders)
+    FindLoopHeaders(F);
 
   bool EverChanged = false;
   bool Changed;
@@ -1978,8 +1985,14 @@
   }
 
   BasicBlock::iterator BI = BB->begin();
-  for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
-    ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
+  // Clone the phi nodes of BB into NewBB. The resulting phi nodes are trivial,
+  // since NewBB only has one predecessor, but SSAUpdater might need to rewrite
+  // the operand of the cloned phi.
+  for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
+    PHINode *NewPN = PHINode::Create(PN->getType(), 1, PN->getName(), NewBB);
+    NewPN->addIncoming(PN->getIncomingValueForBlock(PredBB), PredBB);
+    ValueMapping[PN] = NewPN;
+  }
 
   // Clone the non-phi instructions of BB into NewBB, keeping track of the
   // mapping and using it to remap operands in the cloned instructions.
diff --git a/llvm/test/Transforms/JumpThreading/loop-phi.ll b/llvm/test/Transforms/JumpThreading/loop-phi.ll
new file mode 100644
index 0000000..7ec69f9
--- /dev/null
+++ b/llvm/test/Transforms/JumpThreading/loop-phi.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -jump-threading -S -jump-threading-across-loop-headers | FileCheck %s
+
+; Make sure we correctly distinguish between %tmp15 and %tmp16 when we clone
+; body2.
+
+; CHECK:      body2.thread:
+; CHECK-NEXT: %tmp163 = add i32 %tmp165, 1
+; CHECK-NEXT: br label %latch1
+
+; CHECK:      latch1:
+; CHECK-NEXT: %tmp165 = phi i32 [ %tmp163, %body2.thread ], [ %tmp16, %body2 ]
+; CHECK-NEXT: %tmp154 = phi i32 [ %tmp165, %body2.thread ], [ %tmp15, %body2 ]
+
+define i32 @test(i1 %ARG1, i1 %ARG2, i32 %n) {
+entry:
+  br label %head1
+
+head1:                                            ; preds = %entry, %body1
+  %tmp = phi i32 [ 0, %entry ], [ %tmp16, %body1 ]
+  %tmp3 = phi i32 [ 0, %entry ], [ %tmp16, %body1 ]
+  %tmp4 = phi i32 [ 0, %entry ], [ %tmp16, %body1 ]
+  br i1 %ARG1, label %exit, label %body2
+
+body1:                                            ; preds = %latch1
+  %tmp12 = icmp sgt i32 %tmp16, 1
+  br i1 %tmp12, label %body2, label %head1
+
+body2:                                            ; preds = %head1, %body1
+  %tmp14 = phi i32 [ %tmp16, %body1 ], [ %tmp, %head1 ]
+  %tmp15 = phi i32 [ %tmp16, %body1 ], [ %tmp3, %head1 ]
+  %tmp16 = add i32 %tmp14, 1
+  br i1 %ARG2, label %exit, label %latch1
+
+latch1:                                           ; preds = %body2
+  %tmp18 = icmp sgt i32 %tmp16, %n
+  br i1 %tmp18, label %exit, label %body1
+
+exit:                                             ; preds = %latch1, %body2, %head1
+  %rc = phi i32 [ %tmp15, %body2 ], [ %tmp15, %latch1 ], [ -1, %head1 ]
+  ret i32 %rc
+}
diff --git a/llvm/test/Transforms/JumpThreading/lvi-tristate.ll b/llvm/test/Transforms/JumpThreading/lvi-tristate.ll
index 0aa8738..94fd0e5 100644
--- a/llvm/test/Transforms/JumpThreading/lvi-tristate.ll
+++ b/llvm/test/Transforms/JumpThreading/lvi-tristate.ll
@@ -1,17 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -jump-threading -simplifycfg -S < %s | FileCheck %s
-; CHECK-NOT: bb6:
-; CHECK-NOT: bb7:
-; CHECK-NOT: bb8:
-; CHECK-NOT: bb11:
-; CHECK-NOT: bb12:
-; CHECK: bb:
-; CHECK: bb2:
-; CHECK: bb4:
-; CHECK: bb10:
-; CHECK: bb13:
 declare void @ham()
 
 define void @hoge() {
+; CHECK-LABEL: @hoge(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = and i32 undef, 1073741823
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[TMP]], 5
+; CHECK-NEXT:    br i1 [[COND]], label [[BB10:%.*]], label [[BB13:%.*]]
+; CHECK:       bb10:
+; CHECK-NEXT:    tail call void @ham()
+; CHECK-NEXT:    br label [[BB13]]
+; CHECK:       bb13:
+; CHECK-NEXT:    ret void
+;
 bb:
   %tmp = and i32 undef, 1073741823
   %tmp1 = icmp eq i32 %tmp, 2
diff --git a/llvm/test/Transforms/JumpThreading/select.ll b/llvm/test/Transforms/JumpThreading/select.ll
index 7557a6c..08598f8 100644
--- a/llvm/test/Transforms/JumpThreading/select.ll
+++ b/llvm/test/Transforms/JumpThreading/select.ll
@@ -281,12 +281,12 @@
   ret i32 %j.add3
 
 ; CHECK-LABEL: @unfold3
-; CHECK: br i1 %cmp.i, label %.exit.thread2, label %cond.false.i
+; CHECK: br i1 %cmp.i, label %.exit.thread3, label %cond.false.i
 ; CHECK: br i1 %cmp4.i, label %.exit.thread, label %cond.false.6.i
-; CHECK: br i1 %cmp8.i, label %.exit.thread2, label %cond.false.10.i
+; CHECK: br i1 %cmp8.i, label %.exit.thread3, label %cond.false.10.i
 ; CHECK: br i1 %cmp13.i, label %.exit.thread, label %.exit
-; CHECK: br i1 %phitmp, label %.exit.thread, label %.exit.thread2
-; CHECK: br label %.exit.thread2
+; CHECK: br i1 %phitmp, label %.exit.thread, label %.exit.thread3
+; CHECK: br label %.exit.thread3
 }
 
 define i32 @unfold4(i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z, i32 %j) nounwind {
@@ -320,11 +320,11 @@
 
 ; CHECK-LABEL: @unfold4
 ; CHECK: br i1 %cmp.i, label %.exit.thread, label %cond.false.i
-; CHECK: br i1 %cmp4.i, label %.exit.thread3, label %cond.false.6.i
+; CHECK: br i1 %cmp4.i, label %.exit.thread4, label %cond.false.6.i
 ; CHECK: br i1 %cmp8.i, label %.exit.thread, label %cond.false.10.i
-; CHECK: br i1 %cmp13.i, label %.exit.thread3, label %.exit
-; CHECK: br i1 %lnot.i18, label %.exit.thread, label %.exit.thread3
-; CHECK: br label %.exit.thread3
+; CHECK: br i1 %cmp13.i, label %.exit.thread4, label %.exit
+; CHECK: br i1 %lnot.i18, label %.exit.thread, label %.exit.thread4
+; CHECK: br label %.exit.thread4
 }
 
 define i32 @unfold5(i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z, i32 %j) nounwind {
diff --git a/llvm/test/Transforms/JumpThreading/thread-loads.ll b/llvm/test/Transforms/JumpThreading/thread-loads.ll
index 1156f39..8e2edbd 100644
--- a/llvm/test/Transforms/JumpThreading/thread-loads.ll
+++ b/llvm/test/Transforms/JumpThreading/thread-loads.ll
@@ -32,8 +32,8 @@
 
 bb3:		; preds = %bb1
 ; CHECK: bb3:
-; CHECK: %res.01 = phi i32 [ 1, %bb1.thread ], [ 0, %bb1 ]
-; CHECK: ret i32 %res.01
+; CHECK: %res.02 = phi i32 [ 1, %bb1.thread ], [ 0, %bb1 ]
+; CHECK: ret i32 %res.02
 	ret i32 %res.0
 }
 
@@ -71,8 +71,8 @@
 
 bb3:		; preds = %bb1
 ; CHECK: bb3:
-; CHECK: %res.01 = phi i32 [ 1, %bb1.thread ], [ 0, %bb1 ]
-; CHECK: ret i32 %res.01
+; CHECK: %res.02 = phi i32 [ 1, %bb1.thread ], [ 0, %bb1 ]
+; CHECK: ret i32 %res.02
 	ret i32 %res.0
 }