Implement inlined shift long for 32bit
Added support for x86 inlined shift long for 32bit
Change-Id: I6caef60dd7d80227c3057fd6f64b0ecb11025afa
Signed-off-by: Yixin Shou <yixin.shou@intel.com>
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 93b7999..6173163 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -263,8 +263,10 @@
{ kX86Cmc, kNullary, NO_OPERAND, { 0, 0, 0xF5, 0, 0, 0, 0, 0, false }, "Cmc", "" },
{ kX86Shld32RRI, kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0_USE01 | SETS_CCODES, { 0, 0, 0x0F, 0xA4, 0, 0, 0, 1, false }, "Shld32RRI", "!0r,!1r,!2d" },
+ { kX86Shld32RRC, kShiftRegRegCl, IS_TERTIARY_OP | REG_DEF0_USE01 | REG_USEC | SETS_CCODES, { 0, 0, 0x0F, 0xA5, 0, 0, 0, 0, false }, "Shld32RRC", "!0r,!1r,cl" },
{ kX86Shld32MRI, kMemRegImm, IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { 0, 0, 0x0F, 0xA4, 0, 0, 0, 1, false }, "Shld32MRI", "[!0r+!1d],!2r,!3d" },
{ kX86Shrd32RRI, kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0_USE01 | SETS_CCODES, { 0, 0, 0x0F, 0xAC, 0, 0, 0, 1, false }, "Shrd32RRI", "!0r,!1r,!2d" },
+ { kX86Shrd32RRC, kShiftRegRegCl, IS_TERTIARY_OP | REG_DEF0_USE01 | REG_USEC | SETS_CCODES, { 0, 0, 0x0F, 0xAD, 0, 0, 0, 0, false }, "Shrd32RRC", "!0r,!1r,cl" },
{ kX86Shrd32MRI, kMemRegImm, IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { 0, 0, 0x0F, 0xAC, 0, 0, 0, 1, false }, "Shrd32MRI", "[!0r+!1d],!2r,!3d" },
{ kX86Shld64RRI, kRegRegImmStore, IS_TERTIARY_OP | REG_DEF0_USE01 | SETS_CCODES, { REX_W, 0, 0x0F, 0xA4, 0, 0, 0, 1, false }, "Shld64RRI", "!0r,!1r,!2d" },
{ kX86Shld64MRI, kMemRegImm, IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { REX_W, 0, 0x0F, 0xA4, 0, 0, 0, 1, false }, "Shld64MRI", "[!0r+!1d],!2r,!3d" },
@@ -591,6 +593,7 @@
case kShiftRegCl: return true;
case kRegCond: return true;
case kRegRegCond: return true;
+ case kShiftRegRegCl: return true;
case kJmp:
switch (entry->opcode) {
case kX86JmpR: return true;
@@ -768,6 +771,9 @@
DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(lir->operands[4]));
return ComputeSize(entry, lir->operands[4], lir->operands[1], lir->operands[0],
lir->operands[3]);
+ case kShiftRegRegCl: // lir operands - 0: reg1, 1: reg2, 2: cl
+ DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(lir->operands[2]));
+ return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], 0);
case kRegCond: // lir operands - 0: reg, 1: cond
return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], 0);
case kMemCond: // lir operands - 0: base, 1: disp, 2: cond
@@ -1336,6 +1342,19 @@
DCHECK_EQ(0, entry->skeleton.immediate_bytes);
}
+void X86Mir2Lir::EmitShiftRegRegCl(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2, int32_t raw_cl) {
+ DCHECK_EQ(false, entry->skeleton.r8_form);
+ DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(raw_cl));
+ EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2);
+ uint8_t low_reg1 = LowRegisterBits(raw_reg1);
+ uint8_t low_reg2 = LowRegisterBits(raw_reg2);
+ uint8_t modrm = (3 << 6) | (low_reg1 << 3) | low_reg2;
+ code_buffer_.push_back(modrm);
+ DCHECK_EQ(0, entry->skeleton.modrm_opcode);
+ DCHECK_EQ(0, entry->skeleton.ax_opcode);
+ DCHECK_EQ(0, entry->skeleton.immediate_bytes);
+}
+
void X86Mir2Lir::EmitShiftMemImm(const X86EncodingMap* entry, int32_t raw_base, int32_t disp,
int32_t imm) {
DCHECK_EQ(false, entry->skeleton.r8_form);
@@ -1829,6 +1848,9 @@
case kShiftMemCl: // lir operands - 0: base, 1:displacement, 2: cl
EmitShiftMemCl(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
break;
+ case kShiftRegRegCl: // lir operands - 0: reg1, 1: reg2, 2: cl
+ EmitShiftRegRegCl(entry, lir->operands[1], lir->operands[0], lir->operands[2]);
+ break;
case kRegCond: // lir operands - 0: reg, 1: condition
EmitRegCond(entry, lir->operands[0], lir->operands[1]);
break;
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 1f5b350..7d1e20e 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -457,6 +457,8 @@
void EmitShiftRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_t imm);
void EmitShiftRegCl(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_cl);
void EmitShiftMemCl(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, int32_t raw_cl);
+ void EmitShiftRegRegCl(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2,
+ int32_t raw_cl);
void EmitShiftMemImm(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, int32_t imm);
void EmitRegCond(const X86EncodingMap* entry, int32_t raw_reg, int32_t cc);
void EmitMemCond(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, int32_t cc);
@@ -478,8 +480,10 @@
void GenConstWide(RegLocation rl_dest, int64_t value);
void GenMultiplyVectorSignedByte(BasicBlock *bb, MIR *mir);
void GenShiftByteVector(BasicBlock *bb, MIR *mir);
- void AndMaskVectorRegister(RegStorage rs_src1, uint32_t m1, uint32_t m2, uint32_t m3, uint32_t m4);
- void MaskVectorRegister(X86OpCode opcode, RegStorage rs_src1, uint32_t m1, uint32_t m2, uint32_t m3, uint32_t m4);
+ void AndMaskVectorRegister(RegStorage rs_src1, uint32_t m1, uint32_t m2, uint32_t m3,
+ uint32_t m4);
+ void MaskVectorRegister(X86OpCode opcode, RegStorage rs_src1, uint32_t m1, uint32_t m2,
+ uint32_t m3, uint32_t m4);
void AppendOpcodeWithConst(X86OpCode opcode, int reg, MIR* mir);
static bool ProvidesFullMemoryBarrier(X86OpCode opcode);
@@ -551,7 +555,8 @@
void GenMoveVector(BasicBlock *bb, MIR *mir);
/*
- * @brief Packed multiply of units in two vector registers: vB = vB .* @note vC using vA to know the type of the vector.
+ * @brief Packed multiply of units in two vector registers: vB = vB .* @note vC using vA to know
+ * the type of the vector.
* @param bb The basic block in which the MIR is from.
* @param mir The MIR whose opcode is kMirConstVector.
* @note vA: TypeSize
@@ -561,7 +566,8 @@
void GenMultiplyVector(BasicBlock *bb, MIR *mir);
/*
- * @brief Packed addition of units in two vector registers: vB = vB .+ vC using vA to know the type of the vector.
+ * @brief Packed addition of units in two vector registers: vB = vB .+ vC using vA to know the
+ * type of the vector.
* @param bb The basic block in which the MIR is from.
* @param mir The MIR whose opcode is kMirConstVector.
* @note vA: TypeSize
@@ -571,7 +577,8 @@
void GenAddVector(BasicBlock *bb, MIR *mir);
/*
- * @brief Packed subtraction of units in two vector registers: vB = vB .- vC using vA to know the type of the vector.
+ * @brief Packed subtraction of units in two vector registers: vB = vB .- vC using vA to know the
+ * type of the vector.
* @param bb The basic block in which the MIR is from.
* @param mir The MIR whose opcode is kMirConstVector.
* @note vA: TypeSize
@@ -581,7 +588,8 @@
void GenSubtractVector(BasicBlock *bb, MIR *mir);
/*
- * @brief Packed shift left of units in two vector registers: vB = vB .<< vC using vA to know the type of the vector.
+ * @brief Packed shift left of units in two vector registers: vB = vB .<< vC using vA to know the
+ * type of the vector.
* @param bb The basic block in which the MIR is from.
* @param mir The MIR whose opcode is kMirConstVector.
* @note vA: TypeSize
@@ -591,7 +599,8 @@
void GenShiftLeftVector(BasicBlock *bb, MIR *mir);
/*
- * @brief Packed signed shift right of units in two vector registers: vB = vB .>> vC using vA to know the type of the vector.
+ * @brief Packed signed shift right of units in two vector registers: vB = vB .>> vC using vA to
+ * know the type of the vector.
* @param bb The basic block in which the MIR is from.
* @param mir The MIR whose opcode is kMirConstVector.
* @note vA: TypeSize
@@ -601,7 +610,8 @@
void GenSignedShiftRightVector(BasicBlock *bb, MIR *mir);
/*
- * @brief Packed unsigned shift right of units in two vector registers: vB = vB .>>> vC using vA to know the type of the vector.
+ * @brief Packed unsigned shift right of units in two vector registers: vB = vB .>>> vC using vA
+ * to know the type of the vector.
* @param bb The basic block in which the MIR is from..
* @param mir The MIR whose opcode is kMirConstVector.
* @note vA: TypeSize
@@ -611,7 +621,8 @@
void GenUnsignedShiftRightVector(BasicBlock *bb, MIR *mir);
/*
- * @brief Packed bitwise and of units in two vector registers: vB = vB .& vC using vA to know the type of the vector.
+ * @brief Packed bitwise and of units in two vector registers: vB = vB .& vC using vA to know the
+ * type of the vector.
* @note vA: TypeSize
* @note vB: destination and source
* @note vC: source
@@ -619,7 +630,8 @@
void GenAndVector(BasicBlock *bb, MIR *mir);
/*
- * @brief Packed bitwise or of units in two vector registers: vB = vB .| vC using vA to know the type of the vector.
+ * @brief Packed bitwise or of units in two vector registers: vB = vB .| vC using vA to know the
+ * type of the vector.
* @param bb The basic block in which the MIR is from.
* @param mir The MIR whose opcode is kMirConstVector.
* @note vA: TypeSize
@@ -629,7 +641,8 @@
void GenOrVector(BasicBlock *bb, MIR *mir);
/*
- * @brief Packed bitwise xor of units in two vector registers: vB = vB .^ vC using vA to know the type of the vector.
+ * @brief Packed bitwise xor of units in two vector registers: vB = vB .^ vC using vA to know the
+ * type of the vector.
* @param bb The basic block in which the MIR is from.
* @param mir The MIR whose opcode is kMirConstVector.
* @note vA: TypeSize
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 7ba9570..cc51538 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -3140,7 +3140,53 @@
void X86Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
RegLocation rl_src1, RegLocation rl_shift) {
if (!cu_->target64) {
- Mir2Lir::GenShiftOpLong(opcode, rl_dest, rl_src1, rl_shift);
+ // Long shift operations in 32-bit. Use shld or shrd to create a 32-bit register filled from
+ // the other half, shift the other half, if the shift amount is less than 32 we're done,
+ // otherwise move one register to the other and place zero or sign bits in the other.
+ LIR* branch;
+ FlushAllRegs();
+ LockCallTemps();
+ LoadValueDirectFixed(rl_shift, rs_rCX);
+ RegStorage r_tmp = RegStorage::MakeRegPair(rs_rAX, rs_rDX);
+ LoadValueDirectWideFixed(rl_src1, r_tmp);
+ switch (opcode) {
+ case Instruction::SHL_LONG:
+ case Instruction::SHL_LONG_2ADDR:
+ NewLIR3(kX86Shld32RRC, r_tmp.GetHighReg(), r_tmp.GetLowReg(), rs_rCX.GetReg());
+ NewLIR2(kX86Sal32RC, r_tmp.GetLowReg(), rs_rCX.GetReg());
+ NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32);
+ branch = NewLIR2(kX86Jcc8, 0, kX86CondZ);
+ OpRegCopy(r_tmp.GetHigh(), r_tmp.GetLow());
+ LoadConstant(r_tmp.GetLow(), 0);
+ branch->target = NewLIR0(kPseudoTargetLabel);
+ break;
+ case Instruction::SHR_LONG:
+ case Instruction::SHR_LONG_2ADDR:
+ NewLIR3(kX86Shrd32RRC, r_tmp.GetLowReg(), r_tmp.GetHighReg(), rs_rCX.GetReg());
+ NewLIR2(kX86Sar32RC, r_tmp.GetHighReg(), rs_rCX.GetReg());
+ NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32);
+ branch = NewLIR2(kX86Jcc8, 0, kX86CondZ);
+ OpRegCopy(r_tmp.GetLow(), r_tmp.GetHigh());
+ NewLIR2(kX86Sar32RI, r_tmp.GetHighReg(), 31);
+ branch->target = NewLIR0(kPseudoTargetLabel);
+ break;
+ case Instruction::USHR_LONG:
+ case Instruction::USHR_LONG_2ADDR:
+ NewLIR3(kX86Shrd32RRC, r_tmp.GetLowReg(), r_tmp.GetHighReg(),
+ rs_rCX.GetReg());
+ NewLIR2(kX86Shr32RC, r_tmp.GetHighReg(), rs_rCX.GetReg());
+ NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32);
+ branch = NewLIR2(kX86Jcc8, 0, kX86CondZ);
+ OpRegCopy(r_tmp.GetLow(), r_tmp.GetHigh());
+ LoadConstant(r_tmp.GetHigh(), 0);
+ branch->target = NewLIR0(kPseudoTargetLabel);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected case: " << opcode;
+ return;
+ }
+ RegLocation rl_result = LocCReturnWide();
+ StoreValueWide(rl_dest, rl_result);
return;
}
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 500c6b8..9620cd1 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -484,8 +484,10 @@
#undef BinaryShiftOpcode
kX86Cmc,
kX86Shld32RRI,
+ kX86Shld32RRC,
kX86Shld32MRI,
kX86Shrd32RRI,
+ kX86Shrd32RRC,
kX86Shrd32MRI,
kX86Shld64RRI,
kX86Shld64MRI,
@@ -675,6 +677,7 @@
kMemRegImm, // MRI instruction kinds.
kShiftRegImm, kShiftMemImm, kShiftArrayImm, // Shift opcode with immediate.
kShiftRegCl, kShiftMemCl, kShiftArrayCl, // Shift opcode with register CL.
+ kShiftRegRegCl,
// kRegRegReg, kRegRegMem, kRegRegArray, // RRR, RRM, RRA instruction kinds.
kRegCond, kMemCond, kArrayCond, // R, M, A instruction kinds following by a condition.
kRegRegCond, // RR instruction kind followed by a condition.
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index 0ca8962..0bf758e 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -702,12 +702,24 @@
load = true;
immediate_bytes = 1;
break;
+ case 0xA5:
+ opcode << "shld";
+ has_modrm = true;
+ load = true;
+ cx = true;
+ break;
case 0xAC:
opcode << "shrd";
has_modrm = true;
load = true;
immediate_bytes = 1;
break;
+ case 0xAD:
+ opcode << "shrd";
+ has_modrm = true;
+ load = true;
+ cx = true;
+ break;
case 0xAE:
if (prefix[0] == 0xF3) {
prefix[0] = 0; // clear prefix now it's served its purpose as part of the opcode