Quick compiler: tighten null/0/0.0 workaround

Change 255389 worked around an issue with the compiler and
garbage collector being out of sync regarding the type of a Dalvik
register loaded with a constant 0 value.

The workaround was to detect the case of a constant 0 loaded
into a promoted floating point register, and then also store a
zero in the core/reference identity of that Dalvik vreg.

This CL tightens the workaround by ensuring that the additional
store is only performed in cases in which the promoted-to-float
Dalvik vreg is also used as a reference at some point in the
method.

Additionally, it improves the code sequence somewhat by reusing
the register loaded with zero for the subsequent store.  Further,
an unrelated enhancement is made to the floating point immediate
loading code.  For Arm, a few floating point constant values are
supported via vmov immediate.  However, 0.0 is not a bit pattern
that can be directly generated.  Previously, we would load 0.0 from
the method's literal pool.  In this CL we instead materialize +2
in a core register, and then emit an vsub reg,reg,reg to convert
it to +0.0. This saves a few bytes of code space, and avoids a
memory reference.

In the future, we'll want to have more info about the target
CPU's capabilities.  A vector exclusive or would likely be
better here.

Change-Id: Icacd85c86112c5355d35b536e2f7a41c0357682c
diff --git a/src/compiler/codegen/arm/utility_arm.cc b/src/compiler/codegen/arm/utility_arm.cc
index d1bf14e..7f37bea 100644
--- a/src/compiler/codegen/arm/utility_arm.cc
+++ b/src/compiler/codegen/arm/utility_arm.cc
@@ -47,10 +47,19 @@
 
 static LIR* LoadFPConstantValue(CompilationUnit* cu, int r_dest, int value)
 {
-  int encoded_imm = EncodeImmSingle(value);
   DCHECK(ARM_SINGLEREG(r_dest));
-  if (encoded_imm >= 0) {
-    return NewLIR2(cu, kThumb2Vmovs_IMM8, r_dest, encoded_imm);
+  if (value == 0) {
+    // TODO: we need better info about the target CPU.  a vector exclusive or
+    //       would probably be better here if we could rely on its existance.
+    // Load an immediate +2.0 (which encodes to 0)
+    NewLIR2(cu, kThumb2Vmovs_IMM8, r_dest, 0);
+    // +0.0 = +2.0 - +2.0
+    return NewLIR3(cu, kThumb2Vsubs, r_dest, r_dest, r_dest);
+  } else {
+    int encoded_imm = EncodeImmSingle(value);
+    if (encoded_imm >= 0) {
+      return NewLIR2(cu, kThumb2Vmovs_IMM8, r_dest, encoded_imm);
+    }
   }
   LIR* data_target = ScanLiteralPool(cu->literal_list, value, 0);
   if (data_target == NULL) {
@@ -600,24 +609,33 @@
 LIR* ArmCodegen::LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi,
                                        int val_lo, int val_hi)
 {
-  int encoded_imm = EncodeImmDouble(val_lo, val_hi);
   LIR* res;
+  int target_reg = S2d(r_dest_lo, r_dest_hi);
   if (ARM_FPREG(r_dest_lo)) {
-    if (encoded_imm >= 0) {
-      res = NewLIR2(cu, kThumb2Vmovd_IMM8, S2d(r_dest_lo, r_dest_hi),
-              encoded_imm);
+    if ((val_lo == 0) && (val_hi == 0)) {
+      // TODO: we need better info about the target CPU.  a vector exclusive or
+      //       would probably be better here if we could rely on its existance.
+      // Load an immediate +2.0 (which encodes to 0)
+      NewLIR2(cu, kThumb2Vmovd_IMM8, target_reg, 0);
+      // +0.0 = +2.0 - +2.0
+      res = NewLIR3(cu, kThumb2Vsubd, target_reg, target_reg, target_reg);
     } else {
-      LIR* data_target = ScanLiteralPoolWide(cu->literal_list, val_lo, val_hi);
-      if (data_target == NULL) {
-        data_target = AddWideData(cu, &cu->literal_list, val_lo, val_hi);
+      int encoded_imm = EncodeImmDouble(val_lo, val_hi);
+      if (encoded_imm >= 0) {
+        res = NewLIR2(cu, kThumb2Vmovd_IMM8, target_reg, encoded_imm);
+      } else {
+        LIR* data_target = ScanLiteralPoolWide(cu->literal_list, val_lo, val_hi);
+        if (data_target == NULL) {
+          data_target = AddWideData(cu, &cu->literal_list, val_lo, val_hi);
+        }
+        LIR* load_pc_rel =
+            RawLIR(cu, cu->current_dalvik_offset, kThumb2Vldrd,
+                   target_reg, r15pc, 0, 0, 0, data_target);
+        SetMemRefType(cu, load_pc_rel, true, kLiteral);
+        load_pc_rel->alias_info = reinterpret_cast<uintptr_t>(data_target);
+        AppendLIR(cu, load_pc_rel);
+        res = load_pc_rel;
       }
-      LIR* load_pc_rel =
-          RawLIR(cu, cu->current_dalvik_offset, kThumb2Vldrd,
-                 S2d(r_dest_lo, r_dest_hi), r15pc, 0, 0, 0, data_target);
-      SetMemRefType(cu, load_pc_rel, true, kLiteral);
-      load_pc_rel->alias_info = reinterpret_cast<uintptr_t>(data_target);
-      AppendLIR(cu, load_pc_rel);
-      res = load_pc_rel;
     }
   } else {
     res = LoadConstantNoClobber(cu, r_dest_lo, val_lo);