[optimizing] More x86_64 code improvements

Use the constant area some more, use 32-bit immediates in movq
instructions when possible, and other small tweaks.

Remove the commented out code for Math.Abs(float/double) as it would
fail for baseline compiler due to the output being the same as the
input.

Change-Id: Ifa39f1865b94cec2e1c0a99af3066a645e9d3618
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 2b5fcbd..a0f45ed 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -104,7 +104,7 @@
       if (is_div_) {
         __ negl(cpu_reg_);
       } else {
-        __ movl(cpu_reg_, Immediate(0));
+        __ xorl(cpu_reg_, cpu_reg_);
       }
 
     } else {
@@ -749,8 +749,7 @@
         DCHECK(constant->IsLongConstant());
         value = constant->AsLongConstant()->GetValue();
       }
-      Load64BitValue(CpuRegister(TMP), value);
-      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
+      Store64BitValueToStack(destination, value);
     } else {
       DCHECK(source.IsDoubleStackSlot());
       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
@@ -784,8 +783,7 @@
       if (location.IsRegister()) {
         Load64BitValue(location.AsRegister<CpuRegister>(), value);
       } else if (location.IsDoubleStackSlot()) {
-        Load64BitValue(CpuRegister(TMP), value);
-        __ movq(Address(CpuRegister(RSP), location.GetStackIndex()), CpuRegister(TMP));
+        Store64BitValueToStack(location, value);
       } else {
         DCHECK(location.IsConstant());
         DCHECK_EQ(location.GetConstant(), const_to_move);
@@ -1849,14 +1847,12 @@
           // Processing a Dex `float-to-int' instruction.
           locations->SetInAt(0, Location::RequiresFpuRegister());
           locations->SetOut(Location::RequiresRegister());
-          locations->AddTemp(Location::RequiresFpuRegister());
           break;
 
         case Primitive::kPrimDouble:
           // Processing a Dex `double-to-int' instruction.
           locations->SetInAt(0, Location::RequiresFpuRegister());
           locations->SetOut(Location::RequiresRegister());
-          locations->AddTemp(Location::RequiresFpuRegister());
           break;
 
         default:
@@ -1884,14 +1880,12 @@
           // Processing a Dex `float-to-long' instruction.
           locations->SetInAt(0, Location::RequiresFpuRegister());
           locations->SetOut(Location::RequiresRegister());
-          locations->AddTemp(Location::RequiresFpuRegister());
           break;
 
         case Primitive::kPrimDouble:
           // Processing a Dex `double-to-long' instruction.
           locations->SetInAt(0, Location::RequiresFpuRegister());
           locations->SetOut(Location::RequiresRegister());
-          locations->AddTemp(Location::RequiresFpuRegister());
           break;
 
         default:
@@ -2067,14 +2061,11 @@
           // Processing a Dex `float-to-int' instruction.
           XmmRegister input = in.AsFpuRegister<XmmRegister>();
           CpuRegister output = out.AsRegister<CpuRegister>();
-          XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
           Label done, nan;
 
           __ movl(output, Immediate(kPrimIntMax));
-          // temp = int-to-float(output)
-          __ cvtsi2ss(temp, output, false);
-          // if input >= temp goto done
-          __ comiss(input, temp);
+          // if input >= (float)INT_MAX goto done
+          __ comiss(input, codegen_->LiteralFloatAddress(kPrimIntMax));
           __ j(kAboveEqual, &done);
           // if input == NaN goto nan
           __ j(kUnordered, &nan);
@@ -2092,14 +2083,11 @@
           // Processing a Dex `double-to-int' instruction.
           XmmRegister input = in.AsFpuRegister<XmmRegister>();
           CpuRegister output = out.AsRegister<CpuRegister>();
-          XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
           Label done, nan;
 
           __ movl(output, Immediate(kPrimIntMax));
-          // temp = int-to-double(output)
-          __ cvtsi2sd(temp, output);
-          // if input >= temp goto done
-          __ comisd(input, temp);
+          // if input >= (double)INT_MAX goto done
+          __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
           __ j(kAboveEqual, &done);
           // if input == NaN goto nan
           __ j(kUnordered, &nan);
@@ -2137,14 +2125,11 @@
           // Processing a Dex `float-to-long' instruction.
           XmmRegister input = in.AsFpuRegister<XmmRegister>();
           CpuRegister output = out.AsRegister<CpuRegister>();
-          XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
           Label done, nan;
 
           codegen_->Load64BitValue(output, kPrimLongMax);
-          // temp = long-to-float(output)
-          __ cvtsi2ss(temp, output, true);
-          // if input >= temp goto done
-          __ comiss(input, temp);
+          // if input >= (float)LONG_MAX goto done
+          __ comiss(input, codegen_->LiteralFloatAddress(kPrimLongMax));
           __ j(kAboveEqual, &done);
           // if input == NaN goto nan
           __ j(kUnordered, &nan);
@@ -2162,14 +2147,11 @@
           // Processing a Dex `double-to-long' instruction.
           XmmRegister input = in.AsFpuRegister<XmmRegister>();
           CpuRegister output = out.AsRegister<CpuRegister>();
-          XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
           Label done, nan;
 
           codegen_->Load64BitValue(output, kPrimLongMax);
-          // temp = long-to-double(output)
-          __ cvtsi2sd(temp, output, true);
-          // if input >= temp goto done
-          __ comisd(input, temp);
+          // if input >= (double)LONG_MAX goto done
+          __ comisd(input, codegen_->LiteralDoubleAddress(kPrimLongMax));
           __ j(kAboveEqual, &done);
           // if input == NaN goto nan
           __ j(kUnordered, &nan);
@@ -4336,8 +4318,7 @@
         codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
       } else {
         DCHECK(destination.IsDoubleStackSlot()) << destination;
-        codegen_->Load64BitValue(CpuRegister(TMP), value);
-        __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
+        codegen_->Store64BitValueToStack(destination, value);
       }
     } else if (constant->IsFloatConstant()) {
       float fp_value = constant->AsFloatConstant()->GetValue();
@@ -4368,8 +4349,7 @@
         }
       } else {
         DCHECK(destination.IsDoubleStackSlot()) << destination;
-        codegen_->Load64BitValue(CpuRegister(TMP), value);
-        __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
+        codegen_->Store64BitValueToStack(destination, value);
       }
     }
   } else if (source.IsFpuRegister()) {
@@ -4871,6 +4851,18 @@
   }
 }
 
+void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
+  DCHECK(dest.IsDoubleStackSlot());
+  if (IsInt<32>(value)) {
+    // Can move directly as an int32 constant.
+    __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
+            Immediate(static_cast<int32_t>(value)));
+  } else {
+    Load64BitValue(CpuRegister(TMP), value);
+    __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
+  }
+}
+
 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
   // Generate the constant area if needed.
   X86_64Assembler* assembler = GetAssembler();