Subzero: Use register availability during lowering to improve the code.

The problem is that given code like this:

  a = b + c
  d = a + e
  ...
  ... (use of a) ...

Lowering may produce code like this, at least on x86:

  T1 = b
  T1 += c
  a = T1
  T2 = a
  T2 += e
  d = T2
  ...
  ... (use of a) ...

If "a" has a long live range, it may not get a register, resulting in clumsy code in the middle of the sequence like "a=reg; reg=a".  Normally one might expect store forwarding to make the clumsy code fast, but it does presumably add an extra instruction-retirement cycle to the critical path in a pointer-chasing loop, and makes a big difference on some benchmarks.

The simple fix here is, at the end of lowering "a=b+c", keep track of the final "a=T1" assignment.  Then, when lowering "d=a+e" and we look up "a", we can substitute "T1".  This slightly increases the live range of T1, but it does a great job of avoiding the redundant reload of the register from the stack location.

A more general fix (in the future) might be to do live range splitting and let the register allocator handle it.

BUG= https://code.google.com/p/nativeclient/issues/detail?id=4095
R=kschimpf@google.com

Review URL: https://codereview.chromium.org/1385433002 .
diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp
index 9339b79..f78c107 100644
--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp
@@ -570,6 +570,7 @@
     // Ensure target lowering actually moved the cursor.
     assert(Context.getCur() != Orig);
   }
+  Context.availabilityReset();
   // Do preliminary lowering of the Phi instructions.
   Target->prelowerPhis();
 }
@@ -683,7 +684,7 @@
 
 // Validate the integrity of the live ranges in this block.  If there are any
 // errors, it prints details and returns false.  On success, it returns true.
-bool CfgNode::livenessValidateIntervals(Liveness *Liveness) {
+bool CfgNode::livenessValidateIntervals(Liveness *Liveness) const {
   if (!BuildDefs::asserts())
     return true;
 
diff --git a/src/IceCfgNode.h b/src/IceCfgNode.h
index 6aae2b3..14eafa9 100644
--- a/src/IceCfgNode.h
+++ b/src/IceCfgNode.h
@@ -110,7 +110,7 @@
 
 private:
   CfgNode(Cfg *Func, SizeT LabelIndex);
-  bool livenessValidateIntervals(Liveness *Liveness);
+  bool livenessValidateIntervals(Liveness *Liveness) const;
   Cfg *const Func;
   SizeT Number;            /// invariant: Func->Nodes[Number]==this
   const SizeT LabelNumber; /// persistent number for label generation
diff --git a/src/IceRegAlloc.cpp b/src/IceRegAlloc.cpp
index 480b907..4922848 100644
--- a/src/IceRegAlloc.cpp
+++ b/src/IceRegAlloc.cpp
@@ -137,7 +137,7 @@
     const DefUseErrorList &DefsWithoutUses,
     const DefUseErrorList &UsesBeforeDefs,
     const CfgVector<InstNumberT> &LRBegin,
-    const CfgVector<InstNumberT> &LREnd) {
+    const CfgVector<InstNumberT> &LREnd) const {
   if (DefsWithoutUses.empty() && UsesBeforeDefs.empty())
     return true;
 
diff --git a/src/IceRegAlloc.h b/src/IceRegAlloc.h
index 6508aa5..d0aa111 100644
--- a/src/IceRegAlloc.h
+++ b/src/IceRegAlloc.h
@@ -62,7 +62,7 @@
   bool livenessValidateIntervals(const DefUseErrorList &DefsWithoutUses,
                                  const DefUseErrorList &UsesBeforeDefs,
                                  const CfgVector<InstNumberT> &LRBegin,
-                                 const CfgVector<InstNumberT> &LREnd);
+                                 const CfgVector<InstNumberT> &LREnd) const;
   void initForGlobal();
   void initForInfOnly();
   /// Move an item from the From set to the To set. From[Index] is pushed onto
diff --git a/src/IceTargetLowering.cpp b/src/IceTargetLowering.cpp
index eb4a54d..9a8a02a 100644
--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp
@@ -46,6 +46,7 @@
   Cur = Begin;
   skipDeleted(Cur);
   Next = Cur;
+  availabilityReset();
 }
 
 void LoweringContext::insert(Inst *Inst) {
@@ -70,6 +71,31 @@
   return LastInserted;
 }
 
+void LoweringContext::availabilityReset() {
+  LastDest = nullptr;
+  LastSrc = nullptr;
+}
+
+void LoweringContext::availabilityUpdate() {
+  availabilityReset();
+  Inst *Instr = LastInserted;
+  if (Instr == nullptr)
+    return;
+  if (!Instr->isSimpleAssign())
+    return;
+  if (auto *SrcVar = llvm::dyn_cast<Variable>(Instr->getSrc(0))) {
+    LastDest = Instr->getDest();
+    LastSrc = SrcVar;
+  }
+}
+
+Variable *LoweringContext::availabilityGet(Operand *Src) const {
+  assert(Src);
+  if (Src == LastDest)
+    return LastSrc;
+  return nullptr;
+}
+
 TargetLowering *TargetLowering::createLowering(TargetArch Target, Cfg *Func) {
 #define SUBZERO_TARGET(X)                                                      \
   if (Target == Target_##X)                                                    \
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index 0095a39..f35983f 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -65,6 +65,9 @@
   void setNext(InstList::iterator N) { Next = N; }
   void rewind();
   void setInsertPoint(const InstList::iterator &Position) { Next = Position; }
+  void availabilityReset();
+  void availabilityUpdate();
+  Variable *availabilityGet(Operand *Src) const;
 
 private:
   /// Node is the argument to Inst::updateVars().
@@ -85,6 +88,11 @@
   InstList::iterator Begin;
   /// End is a copy of Insts.end(), used if Next needs to be advanced.
   InstList::iterator End;
+  /// LastDest and LastSrc capture the parameters of the last "Dest=Src" simple
+  /// assignment inserted (provided Src is a variable).  This is used for simple
+  /// availability analysis.
+  Variable *LastDest = nullptr;
+  Variable *LastSrc = nullptr;
 
   void skipDeleted(InstList::iterator &I) const;
   void advanceForward(InstList::iterator &I) const;
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 7f8b79b..0bcb5f7 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -5016,6 +5016,24 @@
   // the shl shift amount to be either an immediate or in ecx.)
   assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
 
+  // Substitute with an available infinite-weight variable if possible.  Only do
+  // this when we are not asking for a specific register, and when the
+  // substitution is not locked to a specific register, and when the types
+  // match, in order to capture the vast majority of opportunities and avoid
+  // corner cases in the lowering.
+  if (RegNum == Variable::NoRegister) {
+    if (Variable *Subst = getContext().availabilityGet(From)) {
+      // At this point we know there is a potential substitution available.
+      if (Subst->mustHaveReg() && !Subst->hasReg()) {
+        // At this point we know the substitution will have a register.
+        if (From->getType() == Subst->getType()) {
+          // At this point we know the substitution's register is compatible.
+          return Subst;
+        }
+      }
+    }
+  }
+
   if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {
     // Before doing anything with a Mem operand, we need to ensure that the
     // Base and Index components are in physical registers.
@@ -5239,6 +5257,7 @@
   if (Ctx->getFlags().getOptLevel() == Opt_m1)
     return;
   markRedefinitions();
+  Context.availabilityUpdate();
 }
 
 template <class Machine>
diff --git a/tests_lit/llvm2ice_tests/callindirect.pnacl.ll b/tests_lit/llvm2ice_tests/callindirect.pnacl.ll
index 9eca74d..3f3dbaf 100644
--- a/tests_lit/llvm2ice_tests/callindirect.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/callindirect.pnacl.ll
@@ -77,10 +77,12 @@
   ret void
 }
 ; CHECK-LABEL: CallIndirectGlobal
+; Allow the first call to be to a different register because of simple
+; availability optimization.
+; CHECK: call
 ; CHECK: call [[REGISTER:[a-z]+]]
 ; CHECK: call [[REGISTER]]
 ; CHECK: call [[REGISTER]]
-; CHECK: call [[REGISTER]]
 ;
 ; OPTM1-LABEL: CallIndirectGlobal
 ; OPTM1: call [[TARGET:.+]]
diff --git a/tests_lit/llvm2ice_tests/phi.ll b/tests_lit/llvm2ice_tests/phi.ll
index 86da00d..d5ca2b1 100644
--- a/tests_lit/llvm2ice_tests/phi.ll
+++ b/tests_lit/llvm2ice_tests/phi.ll
@@ -92,12 +92,15 @@
 
 ; CHECK-LABEL: testPhi3
 ; CHECK: push [[EBX:.*]]
-; CHECK: mov {{.*}},DWORD PTR [esp
-; CHECK: mov
-; CHECK: mov {{.*}},DWORD PTR [[ADDR:.*0x3e8]]
+; CHECK: mov [[EAX:.*]],DWORD PTR [esp
+; CHECK: mov [[ECX:.*]],[[EAX]]
+;;; start of loop body
+; CHECK: mov [[EDX:.*]],[[ECX]]
+; CHECK: mov {{.*}},DWORD PTR [{{.*}}+0x3e8]
 ; CHECK: cmp {{.*}},0x0
 ; CHECK: jne
-; CHECK: mov DWORD PTR [[ADDR]]
+;;; start of epilog
+; CHECK: mov DWORD PTR {{.}}[[EDX]]+0x3e8],
 ; CHECK: pop [[EBX]]
 
 ; Test of "advanced phi lowering" with undef phi arg (integer vector).