ART: Improve fused compare long branch

The code generated by a fused compare long with an immediate value is
much longer than comparing to a runtime value. Rewrite the code to
improve it.

The special cases are == or != to 0, and whether the source is a
temporary value or not.  Try to handle all of these well.  For all
except == and !=, we can use a 'cmp' instruction for the upper word, in
order to set the carry flag properly, rather than a 'sub' into a temp.

Also, we have to handle the <= and > cases properly, in order to get the
correct code generated, in the same manner as
X86Mir2Lir::GenFusedLongCmpBranch().

Change-Id: Ic29bf89ff2c06916d7fc996926997888ea013ba7
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 4446f43..b747102 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -325,49 +325,60 @@
   int32_t val_lo = Low32Bits(val);
   int32_t val_hi = High32Bits(val);
   LIR* taken = &block_label_list_[bb->taken];
-  LIR* not_taken = &block_label_list_[bb->fall_through];
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  bool is_equality_test = ccode == kCondEq || ccode == kCondNe;
+  if (is_equality_test && val != 0) {
+    rl_src1 = ForceTempWide(rl_src1);
+  }
   RegStorage low_reg = rl_src1.reg.GetLow();
   RegStorage high_reg = rl_src1.reg.GetHigh();
 
-  if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
-    RegStorage t_reg = AllocTemp();
-    OpRegRegReg(kOpOr, t_reg, low_reg, high_reg);
-    FreeTemp(t_reg);
-    OpCondBranch(ccode, taken);
-    return;
+  if (is_equality_test) {
+    // We can simpolify of comparing for ==, != to 0.
+    if (val == 0) {
+      if (IsTemp(low_reg)) {
+        OpRegReg(kOpOr, low_reg, high_reg);
+        // We have now changed it; ignore the old values.
+        Clobber(rl_src1.reg);
+      } else {
+        RegStorage t_reg = AllocTemp();
+        OpRegRegReg(kOpOr, t_reg, low_reg, high_reg);
+        FreeTemp(t_reg);
+      }
+      OpCondBranch(ccode, taken);
+      return;
+    }
+
+    // Need to compute the actual value for ==, !=.
+    OpRegImm(kOpSub, low_reg, val_lo);
+    NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
+    OpRegReg(kOpOr, high_reg, low_reg);
+    Clobber(rl_src1.reg);
+  } else if (ccode == kCondLe || ccode == kCondGt) {
+    // Swap operands and condition code to prevent use of zero flag.
+    RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
+    LoadConstantWide(tmp, val);
+    OpRegReg(kOpSub, tmp.GetLow(), low_reg);
+    OpRegReg(kOpSbc, tmp.GetHigh(), high_reg);
+    ccode = (ccode == kCondLe) ? kCondGe : kCondLt;
+    FreeTemp(tmp);
+  } else {
+    // We can use a compare for the low word to set CF.
+    OpRegImm(kOpCmp, low_reg, val_lo);
+    if (IsTemp(high_reg)) {
+      NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
+      // We have now changed it; ignore the old values.
+      Clobber(rl_src1.reg);
+    } else {
+      // mov temp_reg, high_reg; sbb temp_reg, high_constant
+      RegStorage t_reg = AllocTemp();
+      OpRegCopy(t_reg, high_reg);
+      NewLIR2(kX86Sbb32RI, t_reg.GetReg(), val_hi);
+      FreeTemp(t_reg);
+    }
   }
 
-  OpRegImm(kOpCmp, high_reg, val_hi);
-  switch (ccode) {
-    case kCondEq:
-    case kCondNe:
-      OpCondBranch(kCondNe, (ccode == kCondEq) ? not_taken : taken);
-      break;
-    case kCondLt:
-      OpCondBranch(kCondLt, taken);
-      OpCondBranch(kCondGt, not_taken);
-      ccode = kCondUlt;
-      break;
-    case kCondLe:
-      OpCondBranch(kCondLt, taken);
-      OpCondBranch(kCondGt, not_taken);
-      ccode = kCondLs;
-      break;
-    case kCondGt:
-      OpCondBranch(kCondGt, taken);
-      OpCondBranch(kCondLt, not_taken);
-      ccode = kCondHi;
-      break;
-    case kCondGe:
-      OpCondBranch(kCondGt, taken);
-      OpCondBranch(kCondLt, not_taken);
-      ccode = kCondUge;
-      break;
-    default:
-      LOG(FATAL) << "Unexpected ccode: " << ccode;
-  }
-  OpCmpImmBranch(ccode, low_reg, val_lo, taken);
+  OpCondBranch(ccode, taken);
 }
 
 void X86Mir2Lir::CalculateMagicAndShift(int divisor, int& magic, int& shift) {