Merge "[optimizing] Improve x86, x86_64 code"
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 754dd10..02b9b32 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -2730,26 +2730,45 @@
   Label less, greater, done;
   switch (compare->InputAt(0)->GetType()) {
     case Primitive::kPrimLong: {
+      Register left_low = left.AsRegisterPairLow<Register>();
+      Register left_high = left.AsRegisterPairHigh<Register>();
+      int32_t val_low = 0;
+      int32_t val_high = 0;
+      bool right_is_const = false;
+
+      if (right.IsConstant()) {
+        DCHECK(right.GetConstant()->IsLongConstant());
+        right_is_const = true;
+        int64_t val = right.GetConstant()->AsLongConstant()->GetValue();
+        val_low = Low32Bits(val);
+        val_high = High32Bits(val);
+      }
+
       if (right.IsRegisterPair()) {
-        __ cmpl(left.AsRegisterPairHigh<Register>(), right.AsRegisterPairHigh<Register>());
+        __ cmpl(left_high, right.AsRegisterPairHigh<Register>());
       } else if (right.IsDoubleStackSlot()) {
-        __ cmpl(left.AsRegisterPairHigh<Register>(),
-                Address(ESP, right.GetHighStackIndex(kX86WordSize)));
+        __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
       } else {
-        DCHECK(right.IsConstant()) << right;
-        __ cmpl(left.AsRegisterPairHigh<Register>(),
-                Immediate(High32Bits(right.GetConstant()->AsLongConstant()->GetValue())));
+        DCHECK(right_is_const) << right;
+        if (val_high == 0) {
+          __ testl(left_high, left_high);
+        } else {
+          __ cmpl(left_high, Immediate(val_high));
+        }
       }
       __ j(kLess, &less);  // Signed compare.
       __ j(kGreater, &greater);  // Signed compare.
       if (right.IsRegisterPair()) {
-        __ cmpl(left.AsRegisterPairLow<Register>(), right.AsRegisterPairLow<Register>());
+        __ cmpl(left_low, right.AsRegisterPairLow<Register>());
       } else if (right.IsDoubleStackSlot()) {
-        __ cmpl(left.AsRegisterPairLow<Register>(), Address(ESP, right.GetStackIndex()));
+        __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
       } else {
-        DCHECK(right.IsConstant()) << right;
-        __ cmpl(left.AsRegisterPairLow<Register>(),
-                Immediate(Low32Bits(right.GetConstant()->AsLongConstant()->GetValue())));
+        DCHECK(right_is_const) << right;
+        if (val_low == 0) {
+          __ testl(left_low, left_low);
+        } else {
+          __ cmpl(left_low, Immediate(val_low));
+        }
       }
       break;
     }
@@ -3645,14 +3664,21 @@
         __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
       }
     } else if (constant->IsFloatConstant()) {
-      float value = constant->AsFloatConstant()->GetValue();
-      Immediate imm(bit_cast<float, int32_t>(value));
+      float fp_value = constant->AsFloatConstant()->GetValue();
+      int32_t value = bit_cast<float, int32_t>(fp_value);
+      Immediate imm(value);
       if (destination.IsFpuRegister()) {
-        ScratchRegisterScope ensure_scratch(
-            this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
-        Register temp = static_cast<Register>(ensure_scratch.GetRegister());
-        __ movl(temp, imm);
-        __ movd(destination.AsFpuRegister<XmmRegister>(), temp);
+        XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
+        if (value == 0) {
+          // Easy handling of 0.0.
+          __ xorps(dest, dest);
+        } else {
+          ScratchRegisterScope ensure_scratch(
+              this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
+          Register temp = static_cast<Register>(ensure_scratch.GetRegister());
+          __ movl(temp, Immediate(value));
+          __ movd(dest, temp);
+        }
       } else {
         DCHECK(destination.IsStackSlot()) << destination;
         __ movl(Address(ESP, destination.GetStackIndex()), imm);
@@ -4107,18 +4133,38 @@
     } else {
       DCHECK(second.IsConstant()) << second;
       int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
-      Immediate low(Low32Bits(value));
-      Immediate high(High32Bits(value));
+      int32_t low_value = Low32Bits(value);
+      int32_t high_value = High32Bits(value);
+      Immediate low(low_value);
+      Immediate high(high_value);
+      Register first_low = first.AsRegisterPairLow<Register>();
+      Register first_high = first.AsRegisterPairHigh<Register>();
       if (instruction->IsAnd()) {
-        __ andl(first.AsRegisterPairLow<Register>(), low);
-        __ andl(first.AsRegisterPairHigh<Register>(), high);
+        if (low_value == 0) {
+          __ xorl(first_low, first_low);
+        } else if (low_value != -1) {
+          __ andl(first_low, low);
+        }
+        if (high_value == 0) {
+          __ xorl(first_high, first_high);
+        } else if (high_value != -1) {
+          __ andl(first_high, high);
+        }
       } else if (instruction->IsOr()) {
-        __ orl(first.AsRegisterPairLow<Register>(), low);
-        __ orl(first.AsRegisterPairHigh<Register>(), high);
+        if (low_value != 0) {
+          __ orl(first_low, low);
+        }
+        if (high_value != 0) {
+          __ orl(first_high, high);
+        }
       } else {
         DCHECK(instruction->IsXor());
-        __ xorl(first.AsRegisterPairLow<Register>(), low);
-        __ xorl(first.AsRegisterPairHigh<Register>(), high);
+        if (low_value != 0) {
+          __ xorl(first_low, low);
+        }
+        if (high_value != 0) {
+          __ xorl(first_high, high);
+        }
       }
     }
   }
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index dbd7c9e..d09c8f8 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -956,7 +956,7 @@
   switch (compare->InputAt(0)->GetType()) {
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(compare->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
@@ -982,7 +982,18 @@
   Primitive::Type type = compare->InputAt(0)->GetType();
   switch (type) {
     case Primitive::kPrimLong: {
-      __ cmpq(left.AsRegister<CpuRegister>(), right.AsRegister<CpuRegister>());
+      CpuRegister left_reg = left.AsRegister<CpuRegister>();
+      if (right.IsConstant()) {
+        int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
+        DCHECK(IsInt<32>(value));
+        if (value == 0) {
+          __ testq(left_reg, left_reg);
+        } else {
+          __ cmpq(left_reg, Immediate(static_cast<int32_t>(value)));
+        }
+      } else {
+        __ cmpq(left_reg, right.AsRegister<CpuRegister>());
+      }
       break;
     }
     case Primitive::kPrimFloat: {
@@ -1865,17 +1876,7 @@
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       // We can use a leaq or addq if the constant can fit in an immediate.
-      HInstruction* rhs = add->InputAt(1);
-      bool is_int32_constant = false;
-      if (rhs->IsLongConstant()) {
-        int64_t value = rhs->AsLongConstant()->GetValue();
-        if (static_cast<int32_t>(value) == value) {
-          is_int32_constant = true;
-        }
-      }
-      locations->SetInAt(1,
-          is_int32_constant ? Location::RegisterOrConstant(rhs) :
-                              Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(add->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
@@ -1973,7 +1974,7 @@
     }
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(sub->InputAt(1)));
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -2007,7 +2008,13 @@
       break;
     }
     case Primitive::kPrimLong: {
-      __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      if (second.IsConstant()) {
+        int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
+        DCHECK(IsInt<32>(value));
+        __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
+      } else {
+        __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      }
       break;
     }
 
@@ -2038,8 +2045,13 @@
     }
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
-      locations->SetOut(Location::SameAsFirstInput());
+      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(mul->InputAt(1)));
+      if (locations->InAt(1).IsConstant()) {
+        // Can use 3 operand multiply.
+        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      } else {
+        locations->SetOut(Location::SameAsFirstInput());
+      }
       break;
     }
     case Primitive::kPrimFloat:
@@ -2059,9 +2071,9 @@
   LocationSummary* locations = mul->GetLocations();
   Location first = locations->InAt(0);
   Location second = locations->InAt(1);
-  DCHECK(first.Equals(locations->Out()));
   switch (mul->GetResultType()) {
     case Primitive::kPrimInt: {
+      DCHECK(first.Equals(locations->Out()));
       if (second.IsRegister()) {
         __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
       } else if (second.IsConstant()) {
@@ -2075,16 +2087,27 @@
       break;
     }
     case Primitive::kPrimLong: {
-      __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      if (second.IsConstant()) {
+        int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
+        DCHECK(IsInt<32>(value));
+        __ imulq(locations->Out().AsRegister<CpuRegister>(),
+                 first.AsRegister<CpuRegister>(),
+                 Immediate(static_cast<int32_t>(value)));
+      } else {
+        DCHECK(first.Equals(locations->Out()));
+        __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      }
       break;
     }
 
     case Primitive::kPrimFloat: {
+      DCHECK(first.Equals(locations->Out()));
       __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       break;
     }
 
     case Primitive::kPrimDouble: {
+      DCHECK(first.Equals(locations->Out()));
       __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       break;
     }
@@ -3320,20 +3343,35 @@
         __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
       }
     } else if (constant->IsFloatConstant()) {
-      Immediate imm(bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue()));
+      float fp_value = constant->AsFloatConstant()->GetValue();
+      int32_t value = bit_cast<float, int32_t>(fp_value);
+      Immediate imm(value);
       if (destination.IsFpuRegister()) {
-        __ movl(CpuRegister(TMP), imm);
-        __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
+        XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
+        if (value == 0) {
+          // easy FP 0.0.
+          __ xorps(dest, dest);
+        } else {
+          __ movl(CpuRegister(TMP), imm);
+          __ movd(dest, CpuRegister(TMP));
+        }
       } else {
         DCHECK(destination.IsStackSlot()) << destination;
         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
       }
     } else {
       DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
-      Immediate imm(bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue()));
+      double fp_value =  constant->AsDoubleConstant()->GetValue();
+      int64_t value = bit_cast<double, int64_t>(fp_value);
+      Immediate imm(value);
       if (destination.IsFpuRegister()) {
-        __ movq(CpuRegister(TMP), imm);
-        __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
+        XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
+        if (value == 0) {
+          __ xorpd(dest, dest);
+        } else {
+          __ movq(CpuRegister(TMP), imm);
+          __ movd(dest, CpuRegister(TMP));
+        }
       } else {
         DCHECK(destination.IsDoubleStackSlot()) << destination;
         __ movq(CpuRegister(TMP), imm);
@@ -3673,8 +3711,9 @@
   if (instruction->GetType() == Primitive::kPrimInt) {
     locations->SetInAt(1, Location::Any());
   } else {
-    // Request a register to avoid loading a 64bits constant.
+    // We can handle 32 bit constants.
     locations->SetInAt(1, Location::RequiresRegister());
+    locations->SetInAt(1, Location::RegisterOrInt32LongConstant(instruction->InputAt(1)));
   }
   locations->SetOut(Location::SameAsFirstInput());
 }
@@ -3730,13 +3769,34 @@
     }
   } else {
     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
+    CpuRegister first_reg = first.AsRegister<CpuRegister>();
+    bool second_is_constant = false;
+    int64_t value = 0;
+    if (second.IsConstant()) {
+      second_is_constant = true;
+      value = second.GetConstant()->AsLongConstant()->GetValue();
+      DCHECK(IsInt<32>(value));
+    }
+
     if (instruction->IsAnd()) {
-      __ andq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      if (second_is_constant) {
+        __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
+      } else {
+        __ andq(first_reg, second.AsRegister<CpuRegister>());
+      }
     } else if (instruction->IsOr()) {
-      __ orq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      if (second_is_constant) {
+        __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
+      } else {
+        __ orq(first_reg, second.AsRegister<CpuRegister>());
+      }
     } else {
       DCHECK(instruction->IsXor());
-      __ xorq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      if (second_is_constant) {
+        __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
+      } else {
+        __ xorq(first_reg, second.AsRegister<CpuRegister>());
+      }
     }
   }
 }
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index 4ac1fe8..a1ae670 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -56,6 +56,19 @@
       : Location::RequiresRegister();
 }
 
+Location Location::RegisterOrInt32LongConstant(HInstruction* instruction) {
+  if (!instruction->IsConstant() || !instruction->AsConstant()->IsLongConstant()) {
+    return Location::RequiresRegister();
+  }
+
+  // Does the long constant fit in a 32 bit int?
+  int64_t value = instruction->AsConstant()->AsLongConstant()->GetValue();
+
+  return IsInt<32>(value)
+      ? Location::ConstantLocation(instruction->AsConstant())
+      : Location::RequiresRegister();
+}
+
 Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) {
   return instruction->IsConstant()
       ? Location::ConstantLocation(instruction->AsConstant())
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 566c0da..de876be 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -345,6 +345,7 @@
   }
 
   static Location RegisterOrConstant(HInstruction* instruction);
+  static Location RegisterOrInt32LongConstant(HInstruction* instruction);
   static Location ByteRegisterOrConstant(int reg, HInstruction* instruction);
 
   // The location of the first input to the instruction will be
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index f2704b7..bd155ed 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1277,6 +1277,14 @@
 }
 
 
+void X86_64Assembler::orq(CpuRegister dst, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  CHECK(imm.is_int32());  // orq only supports 32b immediate.
+  EmitRex64(dst);
+  EmitComplex(1, Operand(dst), imm);
+}
+
+
 void X86_64Assembler::orq(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitRex64(dst, src);
@@ -1548,27 +1556,30 @@
 
 
 void X86_64Assembler::imulq(CpuRegister reg, const Immediate& imm) {
+  imulq(reg, reg, imm);
+}
+
+void X86_64Assembler::imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK(imm.is_int32());  // imulq only supports 32b immediate.
 
-  EmitRex64(reg, reg);
+  EmitRex64(dst, reg);
 
   // See whether imm can be represented as a sign-extended 8bit value.
   int64_t v64 = imm.value();
   if (IsInt<8>(v64)) {
     // Sign-extension works.
     EmitUint8(0x6B);
-    EmitOperand(reg.LowBits(), Operand(reg));
+    EmitOperand(dst.LowBits(), Operand(reg));
     EmitUint8(static_cast<uint8_t>(v64 & 0xFF));
   } else {
     // Not representable, use full immediate.
     EmitUint8(0x69);
-    EmitOperand(reg.LowBits(), Operand(reg));
+    EmitOperand(dst.LowBits(), Operand(reg));
     EmitImmediate(imm);
   }
 }
 
-
 void X86_64Assembler::imulq(CpuRegister reg, const Address& address) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitRex64(reg, address);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 5dfcf45..495f74f 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -429,6 +429,7 @@
   void orl(CpuRegister dst, CpuRegister src);
   void orl(CpuRegister reg, const Address& address);
   void orq(CpuRegister dst, CpuRegister src);
+  void orq(CpuRegister dst, const Immediate& imm);
 
   void xorl(CpuRegister dst, CpuRegister src);
   void xorl(CpuRegister dst, const Immediate& imm);
@@ -467,6 +468,7 @@
   void imulq(CpuRegister dst, CpuRegister src);
   void imulq(CpuRegister reg, const Immediate& imm);
   void imulq(CpuRegister reg, const Address& address);
+  void imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm);
 
   void imull(CpuRegister reg);
   void imull(const Address& address);