[globalisel][combine] Improve the truncate placement for the extending-loads combine

This brings the extending loads patch back to the original intent but minus the
PHI bug and with another small improvement to de-dupe truncates that are
inserted into the same block.

The truncates are sunk to their uses unless this would require inserting before a
phi in which case it sinks to the _beginning_ of the predecessor block for that
path (but no earlier than the def).

The reason for choosing the beginning of the predecessor is that it makes de-duping
multiple truncates in the same block simple, and optimized code is going to run a
scheduler at some point which will likely change the position anyway.

llvm-svn: 343804
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 30e0da3..06f0ea5 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -105,6 +105,37 @@
   }
   return CurrentUse;
 }
+
+/// Find a suitable place to insert some instructions and insert them. This
+/// function accounts for special cases like inserting before a PHI node.
+/// The current strategy for inserting before PHI's is to duplicate the
+/// instructions for each predecessor. However, while that's ok for G_TRUNC
+/// on most targets since it generally requires no code, other targets/cases may
+/// want to try harder to find a dominating block.
+static void InsertInsnsWithoutSideEffectsBeforeUse(
+    MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
+    std::function<void(MachineBasicBlock::iterator)> Inserter) {
+  MachineInstr &UseMI = *UseMO.getParent();
+
+  MachineBasicBlock *InsertBB = UseMI.getParent();
+
+  // If the use is a PHI then we want the predecessor block instead.
+  if (UseMI.isPHI()) {
+    MachineOperand *PredBB = std::next(&UseMO);
+    InsertBB = PredBB->getMBB();
+  }
+
+  // If the block is the same block as the def then we want to insert just after
+  // the def instead of at the start of the block.
+  if (InsertBB == DefMI.getParent()) {
+    MachineBasicBlock::iterator InsertPt = &DefMI;
+    Inserter(std::next(InsertPt));
+    return;
+  }
+
+  // Otherwise we want the start of the BB
+  Inserter(InsertBB->getFirstNonPHI());
+}
 } // end anonymous namespace
 
 bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
@@ -153,22 +184,6 @@
   // type since by definition the result of an extend is larger.
   assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
 
-  // Rewrite the load and schedule the canonical use for erasure.
-  const auto TruncateUse = [&MI](MachineIRBuilder &Builder,
-                                 MachineOperand &UseMO, unsigned DstReg,
-                                 unsigned SrcReg) {
-    MachineInstr &UseMI = *UseMO.getParent();
-    MachineBasicBlock &UseMBB = *UseMI.getParent();
-
-    Builder.setInsertPt(UseMBB, MachineBasicBlock::iterator(UseMI));
-
-    if (UseMI.isPHI())
-      Builder.setInsertPt(*MI.getParent(),
-                          std::next(MachineBasicBlock::iterator(MI)));
-
-    Builder.buildTrunc(DstReg, SrcReg);
-  };
-
   // Rewrite the load to the chosen extending load.
   unsigned ChosenDstReg = Preferred.MI->getOperand(0).getReg();
   MI.setDesc(
@@ -180,7 +195,7 @@
 
   // Rewrite all the uses to fix up the types.
   SmallVector<MachineInstr *, 1> ScheduleForErase;
-  SmallVector<std::pair<MachineOperand*, unsigned>, 4> ScheduleForAssignReg;
+  SmallVector<std::pair<MachineOperand *, MachineInstr *>, 4> ScheduleForInsert;
   for (auto &UseMO : MRI.use_operands(LoadValue.getReg())) {
     MachineInstr *UseMI = UseMO.getParent();
 
@@ -204,7 +219,6 @@
           //    ... = ... %2(s32)
           MRI.replaceRegWith(UseDstReg, ChosenDstReg);
           ScheduleForErase.push_back(UseMO.getParent());
-          Observer.erasedInstr(*UseMO.getParent());
         } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
           // If the preferred size is smaller, then keep the extend but extend
           // from the result of the extending load. For example:
@@ -229,9 +243,11 @@
           //    %4:_(s8) = G_TRUNC %2:_(s32)
           //    %3:_(s64) = G_ZEXT %2:_(s8)
           //    ... = ... %3(s64)
-          unsigned NewVReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
-          TruncateUse(Builder, UseMO, NewVReg, ChosenDstReg);
-          ScheduleForAssignReg.emplace_back(&UseMO, NewVReg);
+          InsertInsnsWithoutSideEffectsBeforeUse(
+              Builder, MI, UseMO,
+              [&](MachineBasicBlock::iterator InsertBefore) {
+                ScheduleForInsert.emplace_back(&UseMO, &*InsertBefore);
+              });
         }
         continue;
       }
@@ -239,20 +255,40 @@
       // We're going to update the load to def this value later so just erase
       // the old extend.
       ScheduleForErase.push_back(UseMO.getParent());
-      Observer.erasedInstr(*UseMO.getParent());
       continue;
     }
 
     // The use isn't an extend. Truncate back to the type we originally loaded.
     // This is free on many targets.
-    unsigned NewVReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
-    TruncateUse(Builder, UseMO, NewVReg, ChosenDstReg);
-    ScheduleForAssignReg.emplace_back(&UseMO, NewVReg);
+    InsertInsnsWithoutSideEffectsBeforeUse(
+        Builder, MI, UseMO, [&](MachineBasicBlock::iterator InsertBefore) {
+          ScheduleForInsert.emplace_back(&UseMO, &*InsertBefore);
+        });
   }
-  for (auto &Assignment : ScheduleForAssignReg)
-    Assignment.first->setReg(Assignment.second);
-  for (auto &EraseMI : ScheduleForErase)
+
+  DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
+  for (auto &InsertionInfo : ScheduleForInsert) {
+    MachineOperand *UseMO = InsertionInfo.first;
+    MachineInstr *InsertBefore = InsertionInfo.second;
+
+    MachineInstr *PreviouslyEmitted =
+        EmittedInsns.lookup(InsertBefore->getParent());
+    if (PreviouslyEmitted) {
+      UseMO->setReg(PreviouslyEmitted->getOperand(0).getReg());
+      continue;
+    }
+
+    Builder.setInsertPt(*InsertBefore->getParent(), InsertBefore);
+    unsigned NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
+    MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
+    EmittedInsns[InsertBefore->getParent()] = NewMI;
+    UseMO->setReg(NewDstReg);
+    Observer.createdInstr(*NewMI);
+  }
+  for (auto &EraseMI : ScheduleForErase) {
     EraseMI->eraseFromParent();
+    Observer.erasedInstr(*EraseMI);
+  }
   MI.getOperand(0).setReg(ChosenDstReg);
 
   return true;