[DAG, X86] Revert r327197 "Revert r327170, r327171, r327172"

Reland ISel cycle checking improvements after simplifying node id
invariant traversal and correcting typo.

llvm-svn: 327898
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 90cd4ff..6a47a5b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -766,12 +766,11 @@
 
   if (ProduceCarry) {
     // Replace the carry-use
-    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1));
+    ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
   }
 
   // Replace the remaining uses.
-  CurDAG->ReplaceAllUsesWith(N, RegSequence);
-  CurDAG->RemoveDeadNode(N);
+  ReplaceNode(N, RegSequence);
 }
 
 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 91d1ace..db25603 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -500,7 +500,7 @@
 
 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
   CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
-  CurDAG->ReplaceAllUsesWith(N, M);
+  ReplaceUses(N, M);
 }
 
 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 3540cf0..fa99249 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -662,7 +662,7 @@
     return;
   }
 
-  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N,0), N->getOperand(0));
+  ReplaceUses(SDValue(N, 0), N->getOperand(0));
   CurDAG->RemoveDeadNode(N);
 }
 
@@ -726,7 +726,6 @@
   SDNode *T = CurDAG->MorphNodeTo(N, N->getOpcode(),
                                   CurDAG->getVTList(OpTy), {Op});
   ReplaceNode(T, Op.getNode());
-  CurDAG->RemoveDeadNode(T);
 }
 
 void HexagonDAGToDAGISel::SelectP2D(SDNode *N) {
@@ -2185,4 +2184,3 @@
   RootHeights.clear();
   RootWeights.clear();
 }
-
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index 97d5565..3758362 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -1953,7 +1953,6 @@
   // If the mask is all -1's, generate "undef".
   if (!UseLeft && !UseRight) {
     ISel.ReplaceNode(N, ISel.selectUndef(SDLoc(SN), ResTy).getNode());
-    DAG.RemoveDeadNode(N);
     return;
   }
 
@@ -2009,7 +2008,6 @@
     NewN = DAG.getMachineNode(Hexagon::V6_vror, dl, Ty, {VecV, RotV});
 
   ISel.ReplaceNode(N, NewN);
-  DAG.RemoveDeadNode(N);
 }
 
 void HvxSelector::selectVAlign(SDNode *N) {
@@ -2074,8 +2072,7 @@
   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
   cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
 
-  ReplaceUses(N, Result);
-  CurDAG->RemoveDeadNode(N);
+  ReplaceNode(N, Result);
 }
 
 void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) {
@@ -2117,8 +2114,7 @@
   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
   cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
 
-  ReplaceUses(N, Result);
-  CurDAG->RemoveDeadNode(N);
+  ReplaceNode(N, Result);
 }
 
 void HexagonDAGToDAGISel::SelectHVXDualOutput(SDNode *N) {
@@ -2161,5 +2157,3 @@
   ReplaceUses(SDValue(N, 1), SDValue(Result, 1));
   CurDAG->RemoveDeadNode(N);
 }
-
-
diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 9bf2474..6e21308 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -596,7 +596,13 @@
   if (N.getNode()->getNodeId() == -1 ||
       N.getNode()->getNodeId() > Pos->getNodeId()) {
     DAG->RepositionNode(Pos->getIterator(), N.getNode());
-    N.getNode()->setNodeId(Pos->getNodeId());
+    // Mark Node as invalid for pruning as after this it may be a successor to a
+    // selected node but otherwise be in the same position of Pos.
+    // Conservatively mark it with the same -abs(Id) to assure node id
+    // invariant is preserved.
+    int PId = Pos->getNodeId();
+    int InvalidatedPId = -(PId + 1);
+    N->setNodeId((PId > 0) ? InvalidatedPId : PId);
   }
 }
 
@@ -1027,8 +1033,7 @@
   };
   SDValue New = convertTo(
       DL, VT, SDValue(CurDAG->getMachineNode(Opcode, DL, OpcodeVT, Ops), 0));
-  ReplaceUses(N, New.getNode());
-  CurDAG->RemoveDeadNode(N);
+  ReplaceNode(N, New.getNode());
   return true;
 }
 
@@ -1119,8 +1124,7 @@
   SDValue Lower = CurDAG->getConstant(LowerVal, DL, VT);
   SDValue Or = CurDAG->getNode(Opcode, DL, VT, Upper, Lower);
 
-  ReplaceUses(Node, Or.getNode());
-  CurDAG->RemoveDeadNode(Node);
+  ReplaceNode(Node, Or.getNode());
 
   SelectCode(Or.getNode());
 }
@@ -1618,4 +1622,3 @@
   if (MadeChange)
     CurDAG->RemoveDeadNodes();
 }
-
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 4ff36f0..a5358a5 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -2174,50 +2174,84 @@
       LoadNode->getOffset() != StoreNode->getOffset())
     return false;
 
-  // Check if the chain is produced by the load or is a TokenFactor with
-  // the load output chain as an operand. Return InputChain by reference.
+  bool FoundLoad = false;
+  SmallVector<SDValue, 4> ChainOps;
+  SmallVector<const SDNode *, 4> LoopWorklist;
+  SmallPtrSet<const SDNode *, 16> Visited;
+  const unsigned int Max = 1024;
+
+  //  Visualization of Load-Op-Store fusion:
+  // -------------------------
+  // Legend:
+  //    *-lines = Chain operand dependencies.
+  //    |-lines = Normal operand dependencies.
+  //    Dependencies flow down and right. n-suffix references multiple nodes.
+  //
+  //        C                        Xn  C
+  //        *                         *  *
+  //        *                          * *
+  //  Xn  A-LD    Yn                    TF         Yn
+  //   *    * \   |                       *        |
+  //    *   *  \  |                        *       |
+  //     *  *   \ |             =>       A--LD_OP_ST
+  //      * *    \|                                 \
+  //       TF    OP                                  \
+  //         *   | \                                  Zn
+  //          *  |  \
+  //         A-ST    Zn
+  //
+
+  // This merge induced dependences from: #1: Xn -> LD, OP, Zn
+  //                                      #2: Yn -> LD
+  //                                      #3: ST -> Zn
+
+  // Ensure the transform is safe by checking for the dual
+  // dependencies to make sure we do not induce a loop.
+
+  // As LD is a predecessor to both OP and ST we can do this by checking:
+  //  a). if LD is a predecessor to a member of Xn or Yn.
+  //  b). if a Zn is a predecessor to ST.
+
+  // However, (b) can only occur through being a chain predecessor to
+  // ST, which is the same as Zn being a member or predecessor of Xn,
+  // which is a subset of LD being a predecessor of Xn. So it's
+  // subsumed by check (a).
+
   SDValue Chain = StoreNode->getChain();
 
-  bool ChainCheck = false;
+  // Gather X elements in ChainOps.
   if (Chain == Load.getValue(1)) {
-    ChainCheck = true;
-    InputChain = LoadNode->getChain();
+    FoundLoad = true;
+    ChainOps.push_back(Load.getOperand(0));
   } else if (Chain.getOpcode() == ISD::TokenFactor) {
-    SmallVector<SDValue, 4> ChainOps;
     for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
       SDValue Op = Chain.getOperand(i);
       if (Op == Load.getValue(1)) {
-        ChainCheck = true;
+        FoundLoad = true;
         // Drop Load, but keep its chain. No cycle check necessary.
         ChainOps.push_back(Load.getOperand(0));
         continue;
       }
-
-      // Make sure using Op as part of the chain would not cause a cycle here.
-      // In theory, we could check whether the chain node is a predecessor of
-      // the load. But that can be very expensive. Instead visit the uses and
-      // make sure they all have smaller node id than the load.
-      int LoadId = LoadNode->getNodeId();
-      for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
-             UE = UI->use_end(); UI != UE; ++UI) {
-        if (UI.getUse().getResNo() != 0)
-          continue;
-        if (UI->getNodeId() > LoadId)
-          return false;
-      }
-
+      LoopWorklist.push_back(Op.getNode());
       ChainOps.push_back(Op);
     }
-
-    if (ChainCheck)
-      // Make a new TokenFactor with all the other input chains except
-      // for the load.
-      InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain),
-                                   MVT::Other, ChainOps);
   }
-  if (!ChainCheck)
+
+  if (!FoundLoad)
     return false;
 
+  // Worklist is currently Xn. Add Yn to worklist.
+  for (SDValue Op : StoredVal->ops())
+    if (Op.getNode() != LoadNode)
+      LoopWorklist.push_back(Op.getNode());
+
+  // Check (a) if Load is a predecessor to Xn + Yn
+  if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max,
+                                   true))
+    return false;
+
+  InputChain =
+      CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps);
   return true;
 }
 
@@ -2448,6 +2482,8 @@
   MemOp[1] = LoadNode->getMemOperand();
   Result->setMemRefs(MemOp, MemOp + 2);
 
+  // Update Load Chain uses as well.
+  ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1));
   ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
   ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
   CurDAG->RemoveDeadNode(Node);
@@ -3159,8 +3195,7 @@
       // Emit a testl or testw.
       SDNode *NewNode = CurDAG->getMachineNode(Op, dl, MVT::i32, Reg, Imm);
       // Replace CMP with TEST.
-      CurDAG->ReplaceAllUsesWith(Node, NewNode);
-      CurDAG->RemoveDeadNode(Node);
+      ReplaceNode(Node, NewNode);
       return;
     }
     break;