[optimizing] Improve x86 shifts

Support memory operands for integer shifts.  Generate better code for
long shifts by constants.

Change-Id: Icc92fa1b59cc280d4894af6f054e19b01977d5ce
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 845c6c2..ed47f65 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -2713,16 +2713,16 @@
 
   switch (op->GetResultType()) {
     case Primitive::kPrimInt: {
-      locations->SetInAt(0, Location::RequiresRegister());
-      // The shift count needs to be in CL.
+      locations->SetInAt(0, Location::Any());
+      // The shift count needs to be in CL or a constant.
       locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      // The shift count needs to be in CL.
-      locations->SetInAt(1, Location::RegisterLocation(ECX));
+      // The shift count needs to be in CL or a constant.
+      locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -2741,46 +2741,115 @@
 
   switch (op->GetResultType()) {
     case Primitive::kPrimInt: {
-      Register first_reg = first.AsRegister<Register>();
+      if (first.IsRegister()) {
+        Register first_reg = first.AsRegister<Register>();
+        if (second.IsRegister()) {
+          Register second_reg = second.AsRegister<Register>();
+          DCHECK_EQ(ECX, second_reg);
+          if (op->IsShl()) {
+            __ shll(first_reg, second_reg);
+          } else if (op->IsShr()) {
+            __ sarl(first_reg, second_reg);
+          } else {
+            __ shrl(first_reg, second_reg);
+          }
+        } else {
+          int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue;
+          if (shift == 0) {
+            return;
+          }
+          Immediate imm(shift);
+          if (op->IsShl()) {
+            __ shll(first_reg, imm);
+          } else if (op->IsShr()) {
+            __ sarl(first_reg, imm);
+          } else {
+            __ shrl(first_reg, imm);
+          }
+        }
+      } else {
+        DCHECK(first.IsStackSlot()) << first;
+        Address addr(ESP, first.GetStackIndex());
+        if (second.IsRegister()) {
+          Register second_reg = second.AsRegister<Register>();
+          DCHECK_EQ(ECX, second_reg);
+          if (op->IsShl()) {
+            __ shll(addr, second_reg);
+          } else if (op->IsShr()) {
+            __ sarl(addr, second_reg);
+          } else {
+            __ shrl(addr, second_reg);
+          }
+        } else {
+          int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue;
+          if (shift == 0) {
+            return;
+          }
+          Immediate imm(shift);
+          if (op->IsShl()) {
+            __ shll(addr, imm);
+          } else if (op->IsShr()) {
+            __ sarl(addr, imm);
+          } else {
+            __ shrl(addr, imm);
+          }
+        }
+      }
+
+      break;
+    }
+    case Primitive::kPrimLong: {
       if (second.IsRegister()) {
         Register second_reg = second.AsRegister<Register>();
         DCHECK_EQ(ECX, second_reg);
         if (op->IsShl()) {
-          __ shll(first_reg, second_reg);
+          GenerateShlLong(first, second_reg);
         } else if (op->IsShr()) {
-          __ sarl(first_reg, second_reg);
+          GenerateShrLong(first, second_reg);
         } else {
-          __ shrl(first_reg, second_reg);
+          GenerateUShrLong(first, second_reg);
         }
       } else {
-        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
-        if (op->IsShl()) {
-          __ shll(first_reg, imm);
-        } else if (op->IsShr()) {
-          __ sarl(first_reg, imm);
-        } else {
-          __ shrl(first_reg, imm);
+        // Shift by a constant.
+        int shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue;
+        // Nothing to do if the shift is 0, as the input is already the output.
+        if (shift != 0) {
+          if (op->IsShl()) {
+            GenerateShlLong(first, shift);
+          } else if (op->IsShr()) {
+            GenerateShrLong(first, shift);
+          } else {
+            GenerateUShrLong(first, shift);
+          }
         }
       }
       break;
     }
-    case Primitive::kPrimLong: {
-      Register second_reg = second.AsRegister<Register>();
-      DCHECK_EQ(ECX, second_reg);
-      if (op->IsShl()) {
-        GenerateShlLong(first, second_reg);
-      } else if (op->IsShr()) {
-        GenerateShrLong(first, second_reg);
-      } else {
-        GenerateUShrLong(first, second_reg);
-      }
-      break;
-    }
     default:
       LOG(FATAL) << "Unexpected op type " << op->GetResultType();
   }
 }
 
+void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
+  Register low = loc.AsRegisterPairLow<Register>();
+  Register high = loc.AsRegisterPairHigh<Register>();
+  if (shift == 32) {
+    // Shift by 32 is easy. High gets low, and low gets 0.
+    codegen_->EmitParallelMoves(
+        loc.ToLow(), loc.ToHigh(),
+        Location::ConstantLocation(GetGraph()->GetIntConstant(0)), loc.ToLow());
+  } else if (shift > 32) {
+    // Low part becomes 0.  High part is low part << (shift-32).
+    __ movl(high, low);
+    __ shll(high, Immediate(shift - 32));
+    __ xorl(low, low);
+  } else {
+    // Between 1 and 31.
+    __ shld(high, low, Immediate(shift));
+    __ shll(low, Immediate(shift));
+  }
+}
+
 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
   Label done;
   __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
@@ -2792,6 +2861,27 @@
   __ Bind(&done);
 }
 
+void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) {
+  Register low = loc.AsRegisterPairLow<Register>();
+  Register high = loc.AsRegisterPairHigh<Register>();
+  if (shift == 32) {
+    // Need to copy the sign.
+    DCHECK_NE(low, high);
+    __ movl(low, high);
+    __ sarl(high, Immediate(31));
+  } else if (shift > 32) {
+    DCHECK_NE(low, high);
+    // High part becomes sign. Low part is shifted by shift - 32.
+    __ movl(low, high);
+    __ sarl(high, Immediate(31));
+    __ shrl(low, Immediate(shift - 32));
+  } else {
+    // Between 1 and 31.
+    __ shrd(low, high, Immediate(shift));
+    __ sarl(high, Immediate(shift));
+  }
+}
+
 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
   Label done;
   __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
@@ -2803,6 +2893,26 @@
   __ Bind(&done);
 }
 
+void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) {
+  Register low = loc.AsRegisterPairLow<Register>();
+  Register high = loc.AsRegisterPairHigh<Register>();
+  if (shift == 32) {
+    // Shift by 32 is easy. Low gets high, and high gets 0.
+    codegen_->EmitParallelMoves(
+        loc.ToHigh(), loc.ToLow(),
+        Location::ConstantLocation(GetGraph()->GetIntConstant(0)), loc.ToHigh());
+  } else if (shift > 32) {
+    // Low part is high >> (shift - 32). High part becomes 0.
+    __ movl(low, high);
+    __ shrl(low, Immediate(shift - 32));
+    __ xorl(high, high);
+  } else {
+    // Between 1 and 31.
+    __ shrd(low, high, Immediate(shift));
+    __ shrl(high, Immediate(shift));
+  }
+}
+
 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
   Label done;
   __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 20f14fb..e6e7fb7 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -171,6 +171,9 @@
   void GenerateShlLong(const Location& loc, Register shifter);
   void GenerateShrLong(const Location& loc, Register shifter);
   void GenerateUShrLong(const Location& loc, Register shifter);
+  void GenerateShlLong(const Location& loc, int shift);
+  void GenerateShrLong(const Location& loc, int shift);
+  void GenerateUShrLong(const Location& loc, int shift);
   void GenerateMemoryBarrier(MemBarrierKind kind);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 51cc7ac..f8bba07 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1285,32 +1285,62 @@
 
 
 void X86Assembler::shll(Register reg, const Immediate& imm) {
-  EmitGenericShift(4, reg, imm);
+  EmitGenericShift(4, Operand(reg), imm);
 }
 
 
 void X86Assembler::shll(Register operand, Register shifter) {
-  EmitGenericShift(4, operand, shifter);
+  EmitGenericShift(4, Operand(operand), shifter);
+}
+
+
+void X86Assembler::shll(const Address& address, const Immediate& imm) {
+  EmitGenericShift(4, address, imm);
+}
+
+
+void X86Assembler::shll(const Address& address, Register shifter) {
+  EmitGenericShift(4, address, shifter);
 }
 
 
 void X86Assembler::shrl(Register reg, const Immediate& imm) {
-  EmitGenericShift(5, reg, imm);
+  EmitGenericShift(5, Operand(reg), imm);
 }
 
 
 void X86Assembler::shrl(Register operand, Register shifter) {
-  EmitGenericShift(5, operand, shifter);
+  EmitGenericShift(5, Operand(operand), shifter);
+}
+
+
+void X86Assembler::shrl(const Address& address, const Immediate& imm) {
+  EmitGenericShift(5, address, imm);
+}
+
+
+void X86Assembler::shrl(const Address& address, Register shifter) {
+  EmitGenericShift(5, address, shifter);
 }
 
 
 void X86Assembler::sarl(Register reg, const Immediate& imm) {
-  EmitGenericShift(7, reg, imm);
+  EmitGenericShift(7, Operand(reg), imm);
 }
 
 
 void X86Assembler::sarl(Register operand, Register shifter) {
-  EmitGenericShift(7, operand, shifter);
+  EmitGenericShift(7, Operand(operand), shifter);
+}
+
+
+void X86Assembler::sarl(const Address& address, const Immediate& imm) {
+  EmitGenericShift(7, address, imm);
+}
+
+
+void X86Assembler::sarl(const Address& address, Register shifter) {
+  EmitGenericShift(7, address, shifter);
 }
 
 
@@ -1323,6 +1353,15 @@
 }
 
 
+void X86Assembler::shld(Register dst, Register src, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xA4);
+  EmitRegisterOperand(src, dst);
+  EmitUint8(imm.value() & 0xFF);
+}
+
+
 void X86Assembler::shrd(Register dst, Register src, Register shifter) {
   DCHECK_EQ(ECX, shifter);
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1332,6 +1371,15 @@
 }
 
 
+void X86Assembler::shrd(Register dst, Register src, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xAC);
+  EmitRegisterOperand(src, dst);
+  EmitUint8(imm.value() & 0xFF);
+}
+
+
 void X86Assembler::negl(Register reg) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF7);
@@ -1615,28 +1663,28 @@
 
 
 void X86Assembler::EmitGenericShift(int reg_or_opcode,
-                                    Register reg,
+                                    const Operand& operand,
                                     const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK(imm.is_int8());
   if (imm.value() == 1) {
     EmitUint8(0xD1);
-    EmitOperand(reg_or_opcode, Operand(reg));
+    EmitOperand(reg_or_opcode, operand);
   } else {
     EmitUint8(0xC1);
-    EmitOperand(reg_or_opcode, Operand(reg));
+    EmitOperand(reg_or_opcode, operand);
     EmitUint8(imm.value() & 0xFF);
   }
 }
 
 
 void X86Assembler::EmitGenericShift(int reg_or_opcode,
-                                    Register operand,
+                                    const Operand& operand,
                                     Register shifter) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK_EQ(shifter, ECX);
   EmitUint8(0xD3);
-  EmitOperand(reg_or_opcode, Operand(operand));
+  EmitOperand(reg_or_opcode, operand);
 }
 
 static dwarf::Reg DWARFReg(Register reg) {
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index f3675ae..37acb6e 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -429,12 +429,20 @@
 
   void shll(Register reg, const Immediate& imm);
   void shll(Register operand, Register shifter);
+  void shll(const Address& address, const Immediate& imm);
+  void shll(const Address& address, Register shifter);
   void shrl(Register reg, const Immediate& imm);
   void shrl(Register operand, Register shifter);
+  void shrl(const Address& address, const Immediate& imm);
+  void shrl(const Address& address, Register shifter);
   void sarl(Register reg, const Immediate& imm);
   void sarl(Register operand, Register shifter);
+  void sarl(const Address& address, const Immediate& imm);
+  void sarl(const Address& address, Register shifter);
   void shld(Register dst, Register src, Register shifter);
+  void shld(Register dst, Register src, const Immediate& imm);
   void shrd(Register dst, Register src, Register shifter);
+  void shrd(Register dst, Register src, const Immediate& imm);
 
   void negl(Register reg);
   void notl(Register reg);
@@ -619,8 +627,8 @@
   void EmitLabelLink(Label* label);
   void EmitNearLabelLink(Label* label);
 
-  void EmitGenericShift(int rm, Register reg, const Immediate& imm);
-  void EmitGenericShift(int rm, Register operand, Register shifter);
+  void EmitGenericShift(int rm, const Operand& operand, const Immediate& imm);
+  void EmitGenericShift(int rm, const Operand& operand, Register shifter);
 
   DISALLOW_COPY_AND_ASSIGN(X86Assembler);
 };