Optimizing: Use more X86 3 operand multiplies

The X86_64 code generator generated 3 operand multiplies for long
multiplication only.  Add support for 3 operand multiplication for
int as well for both X86 and X86_64.

Note that the RHS operand must be a 32 bit constant, and that it is
possible for the constant to end up in a register (!) due to a previous
use by another instruction.  Handle this case by checking the operand,
otherwise the first input might not be the same as the output, due to
the use of Any().

Also allow stack operands for multiplication.

Change-Id: I8f3d14cc01e9a91210f418258aa18065ee87979d
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index a95ce68..287737b 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -2535,13 +2535,19 @@
     case Primitive::kPrimInt: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::Any());
-      locations->SetOut(Location::SameAsFirstInput());
+      if (mul->InputAt(1)->IsIntConstant()) {
+        // Can use 3 operand multiply.
+        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      } else {
+        locations->SetOut(Location::SameAsFirstInput());
+      }
       break;
     }
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(mul->InputAt(1)));
-      if (locations->InAt(1).IsConstant()) {
+      locations->SetInAt(1, Location::Any());
+      if (mul->InputAt(1)->IsLongConstant() &&
+          IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
         // Can use 3 operand multiply.
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       } else {
@@ -2566,37 +2572,51 @@
   LocationSummary* locations = mul->GetLocations();
   Location first = locations->InAt(0);
   Location second = locations->InAt(1);
+  Location out = locations->Out();
   switch (mul->GetResultType()) {
-    case Primitive::kPrimInt: {
-      DCHECK(first.Equals(locations->Out()));
-      if (second.IsRegister()) {
+    case Primitive::kPrimInt:
+      // The constant may have ended up in a register, so test explicitly to avoid
+      // problems where the output may not be the same as the first operand.
+      if (mul->InputAt(1)->IsIntConstant()) {
+        Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
+        __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
+      } else if (second.IsRegister()) {
+        DCHECK(first.Equals(out));
         __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
-      } else if (second.IsConstant()) {
-        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
-        __ imull(first.AsRegister<CpuRegister>(), imm);
       } else {
+        DCHECK(first.Equals(out));
         DCHECK(second.IsStackSlot());
         __ imull(first.AsRegister<CpuRegister>(),
                  Address(CpuRegister(RSP), second.GetStackIndex()));
       }
       break;
-    }
     case Primitive::kPrimLong: {
-      if (second.IsConstant()) {
-        int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
-        DCHECK(IsInt<32>(value));
-        __ imulq(locations->Out().AsRegister<CpuRegister>(),
-                 first.AsRegister<CpuRegister>(),
-                 Immediate(static_cast<int32_t>(value)));
-      } else {
-        DCHECK(first.Equals(locations->Out()));
+      // The constant may have ended up in a register, so test explicitly to avoid
+      // problems where the output may not be the same as the first operand.
+      if (mul->InputAt(1)->IsLongConstant()) {
+        int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
+        if (IsInt<32>(value)) {
+          __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
+                   Immediate(static_cast<int32_t>(value)));
+        } else {
+          // Have to use the constant area.
+          DCHECK(first.Equals(out));
+          __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
+        }
+      } else if (second.IsRegister()) {
+        DCHECK(first.Equals(out));
         __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      } else {
+        DCHECK(second.IsDoubleStackSlot());
+        DCHECK(first.Equals(out));
+        __ imulq(first.AsRegister<CpuRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
       }
       break;
     }
 
     case Primitive::kPrimFloat: {
-      DCHECK(first.Equals(locations->Out()));
+      DCHECK(first.Equals(out));
       if (second.IsFpuRegister()) {
         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
@@ -2611,7 +2631,7 @@
     }
 
     case Primitive::kPrimDouble: {
-      DCHECK(first.Equals(locations->Out()));
+      DCHECK(first.Equals(out));
       if (second.IsFpuRegister()) {
         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {