Quick compiler: Fix liveness tracking

Rework temp register liveness tracking to play nicely with aliased
physical registers, and re-enable liveness tracking optimization.

Add a pair of x86 utility routines that act like UpdateLoc(),
but only show in-register live temps if they are of the expected
register class.

Change-Id: I92779e0da2554689103e7488025be281f1a58989
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index a03e5f2..c57b813 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -512,6 +512,19 @@
     void Materialize();
 
     /*
+     * Mir2Lir's UpdateLoc() looks to see if the Dalvik value is currently live in any temp register
+     * without regard to data type.  In practice, this can result in UpdateLoc returning a
+     * location record for a Dalvik float value in a core register, and vis-versa.  For targets
+     * which can inexpensively move data between core and float registers, this can often be a win.
+     * However, for x86 this is generally not a win.  These variants of UpdateLoc()
+     * take a register class argument - and will return an in-register location record only if
+     * the value is live in a temp register of the correct class.  Additionally, if the value is in
+     * a temp register of the wrong register class, it will be clobbered.
+     */
+    RegLocation UpdateLocTyped(RegLocation loc, int reg_class);
+    RegLocation UpdateLocWideTyped(RegLocation loc, int reg_class);
+
+    /*
      * @brief Analyze MIR before generating code, to prepare for the code generation.
      */
     void AnalyzeMIR();
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index d1c2e70..22e554e 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -173,7 +173,8 @@
    * If the result's location is in memory, then we do not need to do anything
    * more since the fstp has already placed the correct value in memory.
    */
-  RegLocation rl_result = is_double ? UpdateLocWide(rl_dest) : UpdateLoc(rl_dest);
+  RegLocation rl_result = is_double ? UpdateLocWideTyped(rl_dest, kFPReg) :
+      UpdateLocTyped(rl_dest, kFPReg);
   if (rl_result.location == kLocPhysReg) {
     /*
      * We already know that the result is in a physical register but do not know if it is the
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index ce5766f..698fce4 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -1054,7 +1054,7 @@
     int32_t val_hi = High32Bits(val);
     FlushAllRegs();
     LockCallTemps();  // Prepare for explicit register usage.
-    rl_src1 = UpdateLocWide(rl_src1);
+    rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
     bool src1_in_reg = rl_src1.location == kLocPhysReg;
     int displacement = SRegOffset(rl_src1.s_reg_low);
 
@@ -1100,8 +1100,8 @@
 
   FlushAllRegs();
   LockCallTemps();  // Prepare for explicit register usage.
-  rl_src1 = UpdateLocWide(rl_src1);
-  rl_src2 = UpdateLocWide(rl_src2);
+  rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
+  rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
 
   // At this point, the VRs are in their home locations.
   bool src1_in_reg = rl_src1.location == kLocPhysReg;
@@ -1227,12 +1227,12 @@
 }
 
 void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
-  rl_dest = UpdateLocWide(rl_dest);
+  rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
   if (rl_dest.location == kLocPhysReg) {
     // Ensure we are in a register pair
     RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
 
-    rl_src = UpdateLocWide(rl_src);
+    rl_src = UpdateLocWideTyped(rl_src, kCoreReg);
     GenLongRegOrMemOp(rl_result, rl_src, op);
     StoreFinalValueWide(rl_dest, rl_result);
     return;
@@ -1285,7 +1285,7 @@
     rl_result = ForceTempWide(rl_result);
 
     // Perform the operation using the RHS.
-    rl_src2 = UpdateLocWide(rl_src2);
+    rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
     GenLongRegOrMemOp(rl_result, rl_src2, op);
 
     // And now record that the result is in the temp.
@@ -1296,8 +1296,8 @@
   // It wasn't in registers, so it better be in memory.
   DCHECK((rl_dest.location == kLocDalvikFrame) ||
          (rl_dest.location == kLocCompilerTemp));
-  rl_src1 = UpdateLocWide(rl_src1);
-  rl_src2 = UpdateLocWide(rl_src2);
+  rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
+  rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
 
   // Get one of the source operands into temporary register.
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
@@ -1731,7 +1731,7 @@
   int64_t val = mir_graph_->ConstantValueWide(rl_src);
   int32_t val_lo = Low32Bits(val);
   int32_t val_hi = High32Bits(val);
-  rl_dest = UpdateLocWide(rl_dest);
+  rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
 
   // Can we just do this into memory?
   if ((rl_dest.location == kLocDalvikFrame) ||
@@ -1779,8 +1779,8 @@
   int64_t val = mir_graph_->ConstantValueWide(rl_src2);
   int32_t val_lo = Low32Bits(val);
   int32_t val_hi = High32Bits(val);
-  rl_dest = UpdateLocWide(rl_dest);
-  rl_src1 = UpdateLocWide(rl_src1);
+  rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
+  rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
 
   // Can we do this directly into the destination registers?
   if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg &&
@@ -2070,7 +2070,7 @@
 
   if (unary) {
     rl_lhs = LoadValue(rl_lhs, kCoreReg);
-    rl_result = UpdateLoc(rl_dest);
+    rl_result = UpdateLocTyped(rl_dest, kCoreReg);
     rl_result = EvalLoc(rl_dest, kCoreReg, true);
     OpRegReg(op, rl_result.reg, rl_lhs.reg);
   } else {
@@ -2080,7 +2080,7 @@
       LoadValueDirectFixed(rl_rhs, t_reg);
       if (is_two_addr) {
         // Can we do this directly into memory?
-        rl_result = UpdateLoc(rl_dest);
+        rl_result = UpdateLocTyped(rl_dest, kCoreReg);
         rl_rhs = LoadValue(rl_rhs, kCoreReg);
         if (rl_result.location != kLocPhysReg) {
           // Okay, we can do this into memory
@@ -2104,12 +2104,12 @@
       // Multiply is 3 operand only (sort of).
       if (is_two_addr && op != kOpMul) {
         // Can we do this directly into memory?
-        rl_result = UpdateLoc(rl_dest);
+        rl_result = UpdateLocTyped(rl_dest, kCoreReg);
         if (rl_result.location == kLocPhysReg) {
           // Ensure res is in a core reg
           rl_result = EvalLoc(rl_dest, kCoreReg, true);
           // Can we do this from memory directly?
-          rl_rhs = UpdateLoc(rl_rhs);
+          rl_rhs = UpdateLocTyped(rl_rhs, kCoreReg);
           if (rl_rhs.location != kLocPhysReg) {
             OpRegMem(op, rl_result.reg, rl_rhs);
             StoreFinalValue(rl_dest, rl_result);
@@ -2137,8 +2137,8 @@
         }
       } else {
         // Try to use reg/memory instructions.
-        rl_lhs = UpdateLoc(rl_lhs);
-        rl_rhs = UpdateLoc(rl_rhs);
+        rl_lhs = UpdateLocTyped(rl_lhs, kCoreReg);
+        rl_rhs = UpdateLocTyped(rl_rhs, kCoreReg);
         // We can't optimize with FP registers.
         if (!IsOperationSafeWithoutTemps(rl_lhs, rl_rhs)) {
           // Something is difficult, so fall back to the standard case.
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 05bef52..bc33cb1 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -839,7 +839,7 @@
       }
     } else {
       // Runtime start index.
-      rl_start = UpdateLoc(rl_start);
+      rl_start = UpdateLocTyped(rl_start, kCoreReg);
       if (rl_start.location == kLocPhysReg) {
         // Handle "start index < 0" case.
         OpRegReg(kOpXor, rs_rBX, rs_rBX);
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 8423ec4..03312fd 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -890,4 +890,30 @@
   }
 }
 
+RegLocation X86Mir2Lir::UpdateLocTyped(RegLocation loc, int reg_class) {
+  loc = UpdateLoc(loc);
+  if ((loc.location == kLocPhysReg) && (loc.fp != loc.reg.IsFloat())) {
+    if (GetRegInfo(loc.reg)->IsTemp()) {
+      Clobber(loc.reg);
+      FreeTemp(loc.reg);
+      loc.reg = RegStorage::InvalidReg();
+      loc.location = kLocDalvikFrame;
+    }
+  }
+  return loc;
+}
+
+RegLocation X86Mir2Lir::UpdateLocWideTyped(RegLocation loc, int reg_class) {
+  loc = UpdateLocWide(loc);
+  if ((loc.location == kLocPhysReg) && (loc.fp != loc.reg.IsFloat())) {
+    if (GetRegInfo(loc.reg)->IsTemp()) {
+      Clobber(loc.reg);
+      FreeTemp(loc.reg);
+      loc.reg = RegStorage::InvalidReg();
+      loc.location = kLocDalvikFrame;
+    }
+  }
+  return loc;
+}
+
 }  // namespace art