Add 3rd argument register to X86.
Implement more instructions.
Change-Id: I3af7bbaf18eedc6537f1cfc2d57c4f6106fb5164
diff --git a/src/calling_convention_x86.cc b/src/calling_convention_x86.cc
index ee002c5..15f4495 100644
--- a/src/calling_convention_x86.cc
+++ b/src/calling_convention_x86.cc
@@ -91,9 +91,12 @@
if (entry_spills_.size() == 0) {
size_t num_spills = NumArgs() + NumLongOrDoubleArgs();
if (num_spills > 0) {
- entry_spills_.push_back(X86ManagedRegister::FromCpuRegister(EDX));
+ entry_spills_.push_back(X86ManagedRegister::FromCpuRegister(ECX));
if (num_spills > 1) {
- entry_spills_.push_back(X86ManagedRegister::FromCpuRegister(ECX));
+ entry_spills_.push_back(X86ManagedRegister::FromCpuRegister(EDX));
+ if (num_spills > 2) {
+ entry_spills_.push_back(X86ManagedRegister::FromCpuRegister(EBX));
+ }
}
}
}
diff --git a/src/compiled_method.cc b/src/compiled_method.cc
index bf88880..3ac32d4 100644
--- a/src/compiled_method.cc
+++ b/src/compiled_method.cc
@@ -133,7 +133,7 @@
case kThumb2:
return RoundUp(offset, kArmAlignment);
case kX86:
- return offset;
+ return RoundUp(offset, kX86Alignment);
default:
LOG(FATAL) << "Unknown InstructionSet: " << static_cast<int>(instruction_set);
return 0;
diff --git a/src/compiler/codegen/CodegenUtil.cc b/src/compiler/codegen/CodegenUtil.cc
index 2e2c254..f2449e5 100644
--- a/src/compiler/codegen/CodegenUtil.cc
+++ b/src/compiler/codegen/CodegenUtil.cc
@@ -819,7 +819,8 @@
} else {
cUnit->assemblerRetries++;
if (cUnit->assemblerRetries > MAX_ASSEMBLER_RETRIES) {
- LOG(FATAL) << "Assembler error - too many retries";
+ oatCodegenDump(cUnit);
+ LOG(FATAL) << "Assembler error - too many retries";
}
// Redo offsets and try again
oatAssignOffsets(cUnit);
diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc
index cc0d624..c5b28b3 100644
--- a/src/compiler/codegen/GenCommon.cc
+++ b/src/compiler/codegen/GenCommon.cc
@@ -178,11 +178,7 @@
if (arg1.wide == 0) {
loadValueDirectFixed(cUnit, arg1, rARG2);
} else {
-#if defined(TARGET_X86)
- UNIMPLEMENTED(FATAL);
-#else
loadValueDirectWideFixed(cUnit, arg1, rARG2, rARG3);
-#endif
}
}
oatClobberCalleeSave(cUnit);
@@ -274,11 +270,7 @@
if (arg2.wide == 0) {
loadValueDirectFixed(cUnit, arg2, rARG2);
} else {
-#if defined(TARGET_X86)
- UNIMPLEMENTED(FATAL);
-#else
loadValueDirectWideFixed(cUnit, arg2, rARG2, rARG3);
-#endif
}
loadConstant(cUnit, rARG0, arg0);
oatClobberCalleeSave(cUnit);
@@ -1406,10 +1398,21 @@
// Now, redo loadValues in case they didn't survive the call
- int regPtr;
rlArray = loadValue(cUnit, rlArray, kCoreReg);
rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
+#if defined(TARGET_X86)
+ if (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
+ /* if (rlIndex >= [rlArray + lenOffset]) goto kThrowArrayBounds */
+ genRegMemCheck(cUnit, kCondUge, rlIndex.lowReg, rlArray.lowReg,
+ lenOffset, mir, kThrowArrayBounds);
+ }
+ rlSrc = loadValue(cUnit, rlSrc, regClass);
+ storeBaseIndexedDisp(cUnit, NULL, rlArray.lowReg, rlIndex.lowReg, scale,
+ dataOffset, rlSrc.lowReg, INVALID_REG, kWord,
+ INVALID_SREG);
+#else
+ int regPtr;
if (oatIsTemp(cUnit, rlArray.lowReg)) {
oatClobber(cUnit, rlArray.lowReg);
regPtr = rlArray.lowReg;
@@ -1437,6 +1440,7 @@
}
storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
scale, kWord);
+#endif
markGCCard(cUnit, rlSrc.lowReg, rlArray.lowReg);
}
@@ -1555,10 +1559,10 @@
dataOffset = Array::DataOffset(sizeof(int32_t)).Int32Value();
}
- int regPtr;
rlArray = loadValue(cUnit, rlArray, kCoreReg);
rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
-
+#if !defined(TARGET_X86)
+ int regPtr;
if (oatIsTemp(cUnit, rlArray.lowReg)) {
oatClobber(cUnit, rlArray.lowReg);
regPtr = rlArray.lowReg;
@@ -1566,10 +1570,21 @@
regPtr = oatAllocTemp(cUnit);
opRegCopy(cUnit, regPtr, rlArray.lowReg);
}
+#endif
/* null object? */
genNullCheck(cUnit, rlArray.sRegLow, rlArray.lowReg, mir);
+#if defined(TARGET_X86)
+ if (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
+ /* if (rlIndex >= [rlArray + lenOffset]) goto kThrowArrayBounds */
+ genRegMemCheck(cUnit, kCondUge, rlIndex.lowReg, rlArray.lowReg,
+ lenOffset, mir, kThrowArrayBounds);
+ }
+ rlSrc = loadValue(cUnit, rlSrc, regClass);
+ storeBaseIndexedDisp(cUnit, NULL, rlArray.lowReg, rlIndex.lowReg, scale, dataOffset,
+ rlSrc.lowReg, rlSrc.highReg, size, INVALID_SREG);
+#else
bool needsRangeCheck = (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK));
int regLen = INVALID_REG;
if (needsRangeCheck) {
@@ -1612,6 +1627,7 @@
storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
scale, size);
}
+#endif
}
void genLong3Addr(CompilationUnit* cUnit, MIR* mir, OpKind firstOp,
@@ -1893,20 +1909,18 @@
opRegRegImm(cUnit, kOpAsr, rlResult.lowReg, tReg, k);
}
} else {
- int cReg = oatAllocTemp(cUnit);
- loadConstant(cUnit, cReg, lit - 1);
int tReg1 = oatAllocTemp(cUnit);
int tReg2 = oatAllocTemp(cUnit);
if (lit == 2) {
opRegRegImm(cUnit, kOpLsr, tReg1, rlSrc.lowReg, 32 - k);
opRegRegReg(cUnit, kOpAdd, tReg2, tReg1, rlSrc.lowReg);
- opRegRegReg(cUnit, kOpAnd, tReg2, tReg2, cReg);
+ opRegRegImm(cUnit, kOpAnd, tReg2, tReg2, lit -1);
opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg2, tReg1);
} else {
opRegRegImm(cUnit, kOpAsr, tReg1, rlSrc.lowReg, 31);
opRegRegImm(cUnit, kOpLsr, tReg1, tReg1, 32 - k);
opRegRegReg(cUnit, kOpAdd, tReg2, tReg1, rlSrc.lowReg);
- opRegRegReg(cUnit, kOpAnd, tReg2, tReg2, cReg);
+ opRegRegImm(cUnit, kOpAnd, tReg2, tReg2, lit - 1);
opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg2, tReg1);
}
}
diff --git a/src/compiler/codegen/GenInvoke.cc b/src/compiler/codegen/GenInvoke.cc
index 037a9bb..ebc8bc2 100644
--- a/src/compiler/codegen/GenInvoke.cc
+++ b/src/compiler/codegen/GenInvoke.cc
@@ -51,13 +51,8 @@
if (cUnit->numIns == 0)
return;
-#if !defined(TARGET_X86)
const int numArgRegs = 3;
static int argRegs[] = {rARG1, rARG2, rARG3};
-#else
- const int numArgRegs = 2;
- static int argRegs[] = {rARG1, rARG2};
-#endif
int startVReg = cUnit->numDalvikRegisters - cUnit->numIns;
/*
* Copy incoming arguments to their proper home locations.
@@ -425,11 +420,7 @@
reg = rlArg.highReg;
} else {
// rARG2 & rARG3 can safely be used here
-#if defined(TARGET_X86)
- UNIMPLEMENTED(FATAL);
-#else
reg = rARG3;
-#endif
loadWordDisp(cUnit, rSP,
oatSRegOffset(cUnit, rlArg.sRegLow) + 4, reg);
callState = nextCallInsn(cUnit, mir, callState, dexIdx,
@@ -453,12 +444,8 @@
highReg = rlArg.highReg;
} else {
lowReg = rARG2;
-#if defined(TARGET_X86)
- UNIMPLEMENTED(FATAL);
-#else
- highReg = rARG3;
-#endif
if (rlArg.wide) {
+ highReg = rARG3;
loadValueDirectWideFixed(cUnit, rlArg, lowReg, highReg);
} else {
loadValueDirectFixed(cUnit, rlArg, lowReg);
diff --git a/src/compiler/codegen/x86/ArchFactory.cc b/src/compiler/codegen/x86/ArchFactory.cc
index efa54e0..eec1cbd 100644
--- a/src/compiler/codegen/x86/ArchFactory.cc
+++ b/src/compiler/codegen/x86/ArchFactory.cc
@@ -214,7 +214,7 @@
thisLIR = NEXT_LIR(thisLIR)) {
/* Branch to the next instruction */
- if (thisLIR->opcode == kX86Jmp) {
+ if (thisLIR->opcode == kX86Jmp8 || thisLIR->opcode == kX86Jmp32) {
LIR* nextLIR = thisLIR;
while (true) {
diff --git a/src/compiler/codegen/x86/Assemble.cc b/src/compiler/codegen/x86/Assemble.cc
index d2a33ea..b9dd978 100644
--- a/src/compiler/codegen/x86/Assemble.cc
+++ b/src/compiler/codegen/x86/Assemble.cc
@@ -257,6 +257,8 @@
EXT_0F_ENCODING_MAP(Ucomiss, 0x00, 0x2E),
EXT_0F_ENCODING_MAP(Comisd, 0x66, 0x2F),
EXT_0F_ENCODING_MAP(Comiss, 0x00, 0x2F),
+ EXT_0F_ENCODING_MAP(Orps, 0x00, 0x56),
+ EXT_0F_ENCODING_MAP(Xorps, 0x00, 0x57),
EXT_0F_ENCODING_MAP(Addsd, 0xF2, 0x58),
EXT_0F_ENCODING_MAP(Addss, 0xF3, 0x58),
EXT_0F_ENCODING_MAP(Mulsd, 0xF2, 0x59),
@@ -268,6 +270,8 @@
EXT_0F_ENCODING_MAP(Divsd, 0xF2, 0x5E),
EXT_0F_ENCODING_MAP(Divss, 0xF3, 0x5E),
+ { kX86PsllqRI, kRegImm, IS_BINARY_OP, { 0, 0, 0x0F, 0x73, 0, 7, 0, 1 }, "PsllqRI", "!0r, !1d" },
+
EXT_0F_ENCODING_MAP(Movdxr, 0x66, 0x6E),
EXT_0F_ENCODING_MAP(Movdrx, 0x66, 0x7E),
@@ -283,8 +287,10 @@
EXT_0F_ENCODING_MAP(Movsx16, 0x00, 0xBF),
#undef EXT_0F_ENCODING_MAP
- { kX86Jcc, kJcc, IS_BINARY_OP | IS_BRANCH | NEEDS_FIXUP, { 0, 0, 0x70, 0, 0, 0, 0, 0 }, "Jcc", "!1c !0t" },
- { kX86Jmp, kJmp, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP, { 0, 0, 0xE9, 0, 0, 0, 0, 0 }, "Jmp", "!0t" },
+ { kX86Jcc8, kJcc, IS_BINARY_OP | IS_BRANCH | NEEDS_FIXUP, { 0, 0, 0x70, 0, 0, 0, 0, 0 }, "Jcc8", "!1c !0t" },
+ { kX86Jcc32, kJcc, IS_BINARY_OP | IS_BRANCH | NEEDS_FIXUP, { 0, 0, 0x0F, 0x80, 0, 0, 0, 0 }, "Jcc32", "!1c !0t" },
+ { kX86Jmp8, kJmp, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP, { 0, 0, 0xEB, 0, 0, 0, 0, 0 }, "Jmp8", "!0t" },
+ { kX86Jmp32, kJmp, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP, { 0, 0, 0xE9, 0, 0, 0, 0, 0 }, "Jmp32", "!0t" },
{ kX86CallR, kCall, IS_UNARY_OP | IS_BRANCH, { 0, 0, 0xE8, 0, 0, 0, 0, 0 }, "CallR", "!0r" },
{ kX86CallM, kCall, IS_BINARY_OP | IS_BRANCH | IS_LOAD, { 0, 0, 0xFF, 0, 0, 2, 0, 0 }, "CallM", "[!0r+!1d]" },
{ kX86CallA, kCall, IS_QUAD_OP | IS_BRANCH | IS_LOAD, { 0, 0, 0xFF, 0, 0, 2, 0, 0 }, "CallA", "[!0r+!1r<<!2d+!3d]" },
@@ -359,9 +365,14 @@
case kRegThread: // lir operands - 0: reg, 1: disp
return computeSize(entry, 0x12345678, false); // displacement size is always 32bit
case kRegImm: { // lir operands - 0: reg, 1: immediate
- int reg = lir->operands[0];
- // AX opcodes don't require the modrm byte.
- return computeSize(entry, 0, false) - (reg == rAX ? 1 : 0);
+ size_t size = computeSize(entry, 0, false);
+ if (entry->skeleton.ax_opcode == 0) {
+ return size;
+ } else {
+ // AX opcodes don't require the modrm byte.
+ int reg = lir->operands[0];
+ return size - (reg == rAX ? 1 : 0);
+ }
}
case kMemImm: // lir operands - 0: base, 1: disp, 2: immediate
CHECK_NE(lir->operands[0], static_cast<int>(rSP)); // TODO: add extra SIB byte
@@ -403,10 +414,20 @@
return computeSize(entry, lir->operands[1], false);
case kArrayCond: // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: cond
return computeSize(entry, lir->operands[3], true);
- case kJcc: case kJmp:
- // Jumps only return the short form length, the correct length will be assigned to LIR
- // flags.size during assembly.
- return 2;
+ case kJcc:
+ if (lir->opcode == kX86Jcc8) {
+ return 2; // opcode + rel8
+ } else {
+ DCHECK(lir->opcode == kX86Jcc32);
+ return 6; // 2 byte opcode + rel32
+ }
+ case kJmp:
+ if (lir->opcode == kX86Jmp8) {
+ return 2; // opcode + rel8
+ } else {
+ DCHECK(lir->opcode == kX86Jmp32);
+ return 5; // opcode + rel32
+ }
case kCall:
switch(lir->opcode) {
case kX86CallR: return 2; // opcode modrm
@@ -586,6 +607,12 @@
DCHECK_EQ(0, entry->skeleton.immediate_bytes);
}
+static void emitArrayReg(CompilationUnit* cUnit, const X86EncodingMap* entry,
+ uint8_t base, uint8_t index, int scale, int disp, uint8_t reg) {
+ // Opcode will flip operands.
+ emitRegArray(cUnit, entry, reg, base, index, scale, disp);
+}
+
static void emitRegThread(CompilationUnit* cUnit, const X86EncodingMap* entry,
uint8_t reg, int disp) {
DCHECK_NE(entry->skeleton.prefix1, 0);
@@ -770,11 +797,50 @@
cUnit->codeBuffer.push_back((imm >> 24) & 0xFF);
}
+static void emitShiftRegImm(CompilationUnit* cUnit, const X86EncodingMap* entry,
+ uint8_t reg, int imm) {
+ if (entry->skeleton.prefix1 != 0) {
+ cUnit->codeBuffer.push_back(entry->skeleton.prefix1);
+ if (entry->skeleton.prefix2 != 0) {
+ cUnit->codeBuffer.push_back(entry->skeleton.prefix2);
+ }
+ } else {
+ DCHECK_EQ(0, entry->skeleton.prefix2);
+ }
+ if (imm != 1) {
+ cUnit->codeBuffer.push_back(entry->skeleton.opcode);
+ } else {
+ // Shorter encoding for 1 bit shift
+ cUnit->codeBuffer.push_back(entry->skeleton.ax_opcode);
+ }
+ if (entry->skeleton.opcode == 0x0F) {
+ cUnit->codeBuffer.push_back(entry->skeleton.extra_opcode1);
+ if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) {
+ cUnit->codeBuffer.push_back(entry->skeleton.extra_opcode2);
+ } else {
+ DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+ }
+ } else {
+ DCHECK_EQ(0, entry->skeleton.extra_opcode1);
+ DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+ }
+ DCHECK_LT(reg, 8);
+ uint8_t modrm = (0 << 6) | (entry->skeleton.modrm_opcode << 3) | reg;
+ cUnit->codeBuffer.push_back(modrm);
+ if (imm != 1) {
+ DCHECK_EQ(entry->skeleton.immediate_bytes, 1);
+ DCHECK(IS_SIMM8(imm));
+ cUnit->codeBuffer.push_back(imm & 0xFF);
+ }
+}
+
static void emitJmp(CompilationUnit* cUnit, const X86EncodingMap* entry, int rel) {
- if (IS_SIMM8(rel)) {
+ if (entry->opcode == kX86Jmp8) {
+ DCHECK(IS_SIMM8(rel));
cUnit->codeBuffer.push_back(0xEB);
cUnit->codeBuffer.push_back(rel & 0xFF);
} else {
+ DCHECK(entry->opcode == kX86Jmp32);
cUnit->codeBuffer.push_back(0xE9);
cUnit->codeBuffer.push_back(rel & 0xFF);
cUnit->codeBuffer.push_back((rel >> 8) & 0xFF);
@@ -786,10 +852,12 @@
static void emitJcc(CompilationUnit* cUnit, const X86EncodingMap* entry,
int rel, uint8_t cc) {
DCHECK_LT(cc, 16);
- if (IS_SIMM8(rel)) {
+ if (entry->opcode == kX86Jcc8) {
+ DCHECK(IS_SIMM8(rel));
cUnit->codeBuffer.push_back(0x70 | cc);
cUnit->codeBuffer.push_back(rel & 0xFF);
} else {
+ DCHECK(entry->opcode == kX86Jcc32);
cUnit->codeBuffer.push_back(0x0F);
cUnit->codeBuffer.push_back(0x80 | cc);
cUnit->codeBuffer.push_back(rel & 0xFF);
@@ -889,45 +957,53 @@
if (lir->flags.pcRelFixup) {
switch (lir->opcode) {
- case kX86Jcc: {
- LIR *targetLIR = lir->target;
- DCHECK(targetLIR != NULL);
- int delta = 0;
- intptr_t pc;
- if (IS_SIMM8(lir->operands[0])) {
- pc = lir->offset + 2 /* opcode + rel8 */;
- } else {
- pc = lir->offset + 6 /* 2 byte opcode + rel32 */;
- }
- intptr_t target = targetLIR->offset;
- delta = target - pc;
- if (IS_SIMM8(delta) != IS_SIMM8(lir->operands[0])) {
- res = kRetryAll;
- }
- lir->operands[0] = delta;
- break;
+ case kX86Jcc8: {
+ LIR *targetLIR = lir->target;
+ DCHECK(targetLIR != NULL);
+ int delta = 0;
+ intptr_t pc;
+ if (IS_SIMM8(lir->operands[0])) {
+ pc = lir->offset + 2 /* opcode + rel8 */;
+ } else {
+ pc = lir->offset + 6 /* 2 byte opcode + rel32 */;
+ }
+ intptr_t target = targetLIR->offset;
+ delta = target - pc;
+ if (IS_SIMM8(delta) != IS_SIMM8(lir->operands[0])) {
+ LOG(INFO) << "Retry for JCC growth at " << lir->offset
+ << " delta: " << delta << " old delta: " << lir->operands[0];
+ lir->opcode = kX86Jcc32;
+ oatSetupResourceMasks(lir);
+ res = kRetryAll;
+ }
+ lir->operands[0] = delta;
+ break;
}
- case kX86Jmp: {
- LIR *targetLIR = lir->target;
- DCHECK(targetLIR != NULL);
- int delta = 0;
- intptr_t pc;
- if (IS_SIMM8(lir->operands[0])) {
- pc = lir->offset + 2 /* opcode + rel8 */;
- } else {
- pc = lir->offset + 5 /* opcode + rel32 */;
- }
- intptr_t target = targetLIR->offset;
- delta = target - pc;
- if (!(cUnit->disableOpt & (1 << kSafeOptimizations)) && lir->operands[0] == 0) {
- // Useless branch
- lir->flags.isNop = true;
- res = kRetryAll;
- } else if (IS_SIMM8(delta) != IS_SIMM8(lir->operands[0])) {
- res = kRetryAll;
- }
- lir->operands[0] = delta;
- break;
+ case kX86Jmp8: {
+ LIR *targetLIR = lir->target;
+ DCHECK(targetLIR != NULL);
+ int delta = 0;
+ intptr_t pc;
+ if (IS_SIMM8(lir->operands[0])) {
+ pc = lir->offset + 2 /* opcode + rel8 */;
+ } else {
+ pc = lir->offset + 5 /* opcode + rel32 */;
+ }
+ intptr_t target = targetLIR->offset;
+ delta = target - pc;
+ if (!(cUnit->disableOpt & (1 << kSafeOptimizations)) && lir->operands[0] == 0) {
+ // Useless branch
+ lir->flags.isNop = true;
+ LOG(INFO) << "Retry for useless branch at " << lir->offset;
+ res = kRetryAll;
+ } else if (IS_SIMM8(delta) != IS_SIMM8(lir->operands[0])) {
+ LOG(INFO) << "Retry for JMP growth at " << lir->offset;
+ lir->opcode = kX86Jmp32;
+ oatSetupResourceMasks(lir);
+ res = kRetryAll;
+ }
+ lir->operands[0] = delta;
+ break;
}
default:
break;
@@ -967,6 +1043,10 @@
case kMemReg: // lir operands - 0: base, 1: disp, 2: reg
emitMemReg(cUnit, entry, lir->operands[0], lir->operands[1], lir->operands[2]);
break;
+ case kArrayReg: // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
+ emitArrayReg(cUnit, entry, lir->operands[0], lir->operands[1], lir->operands[2],
+ lir->operands[3], lir->operands[4]);
+ break;
case kRegMem: // lir operands - 0: reg, 1: base, 2: disp
emitRegMem(cUnit, entry, lir->operands[0], lir->operands[1], lir->operands[2]);
break;
@@ -989,6 +1069,9 @@
case kMovRegImm: // lir operands - 0: reg, 1: immediate
emitMovRegImm(cUnit, entry, lir->operands[0], lir->operands[1]);
break;
+ case kShiftRegImm: // lir operands - 0: reg, 1: immediate
+ emitShiftRegImm(cUnit, entry, lir->operands[0], lir->operands[1]);
+ break;
case kJmp: // lir operands - 0: rel
emitJmp(cUnit, entry, lir->operands[0]);
break;
@@ -1012,10 +1095,11 @@
emitUnimplemented(cUnit, entry, lir);
break;
}
- CHECK_EQ(static_cast<size_t>(oatGetInsnSize(lir)),
- cUnit->codeBuffer.size() - starting_cbuf_size)
- << "Instruction size mismatch for entry: " << EncodingMap[lir->opcode].name;
-
+ if (entry->kind != kJcc && entry->kind != kJmp) {
+ CHECK_EQ(static_cast<size_t>(oatGetInsnSize(lir)),
+ cUnit->codeBuffer.size() - starting_cbuf_size)
+ << "Instruction size mismatch for entry: " << EncodingMap[lir->opcode].name;
+ }
}
return res;
}
diff --git a/src/compiler/codegen/x86/FP/X86FP.cc b/src/compiler/codegen/x86/FP/X86FP.cc
index 52b4fc4..c916640 100644
--- a/src/compiler/codegen/x86/FP/X86FP.cc
+++ b/src/compiler/codegen/x86/FP/X86FP.cc
@@ -213,7 +213,7 @@
}
LIR* branch = NULL;
if (unorderedGt) {
- branch = newLIR2(cUnit, kX86Jcc, 0, kX86CondPE);
+ branch = newLIR2(cUnit, kX86Jcc8, 0, kX86CondPE);
}
newLIR2(cUnit, kX86Set8R, rlResult.lowReg, kX86CondA /* above - unsigned > */);
newLIR2(cUnit, kX86Sbb32RI, rlResult.lowReg, 0);
diff --git a/src/compiler/codegen/x86/X86/Factory.cc b/src/compiler/codegen/x86/X86/Factory.cc
index 96fa08a..aef5879 100644
--- a/src/compiler/codegen/x86/X86/Factory.cc
+++ b/src/compiler/codegen/x86/X86/Factory.cc
@@ -32,7 +32,7 @@
#endif
};
/*static*/ int reservedRegs[] = {rSP};
-/*static*/ int coreTemps[] = {rAX, rCX, rDX};
+/*static*/ int coreTemps[] = {rAX, rCX, rDX, rBX};
/*static*/ int fpRegs[] = {
fr0, fr1, fr2, fr3, fr4, fr5, fr6, fr7,
#ifdef TARGET_REX_SUPPORT
@@ -93,25 +93,24 @@
* 2) The codegen is under fixed register usage
*/
LIR *loadConstantNoClobber(CompilationUnit *cUnit, int rDest, int value) {
- LIR *res;
-
int rDestSave = rDest;
- int isFpReg = FPREG(rDest);
- if (isFpReg) {
+ if (FPREG(rDest)) {
+ if (value == 0) {
+ return newLIR2(cUnit, kX86XorpsRR, rDest, rDest);
+ }
DCHECK(SINGLEREG(rDest));
rDest = oatAllocTemp(cUnit);
}
- /* See if the value can be constructed cheaply */
+ LIR *res;
if (value == 0) {
res = newLIR2(cUnit, kX86Xor32RR, rDest, rDest);
} else {
res = newLIR2(cUnit, kX86Mov32RI, rDest, value);
}
- if (isFpReg) {
- UNIMPLEMENTED(FATAL);
- newLIR2(cUnit, kX86Mov32RR, rDest, rDestSave);
+ if (FPREG(rDestSave)) {
+ newLIR2(cUnit, kX86MovdxrRR, rDestSave, rDest);
oatFreeTemp(cUnit, rDest);
}
@@ -120,7 +119,7 @@
LIR* opBranchUnconditional(CompilationUnit *cUnit, OpKind op) {
CHECK_EQ(op, kOpUncondBr);
- return newLIR1(cUnit, kX86Jmp, 0 /* offset to be patched */ );
+ return newLIR1(cUnit, kX86Jmp8, 0 /* offset to be patched */ );
}
LIR *loadMultiple(CompilationUnit *cUnit, int rBase, int rMask);
@@ -128,7 +127,7 @@
X86ConditionCode oatX86ConditionEncoding(ConditionCode cond);
LIR* opCondBranch(CompilationUnit* cUnit, ConditionCode cc, LIR* target)
{
- LIR* branch = newLIR2(cUnit, kX86Jcc, 0 /* offset to be patched */,
+ LIR* branch = newLIR2(cUnit, kX86Jcc8, 0 /* offset to be patched */,
oatX86ConditionEncoding(cc));
branch->target = target;
return branch;
@@ -285,13 +284,18 @@
if (op == kOpMul) {
X86OpCode opcode = IS_SIMM8(value) ? kX86Imul32RRI8 : kX86Imul32RRI;
return newLIR3(cUnit, opcode, rDest, rSrc, value);
- }
- if (op == kOpLsl && value >= 0 && value <= 3) { // lea shift special case
- return newLIR5(cUnit, kX86Lea32RA, rDest, rSrc /* base */,
- r4sib_no_index /* index */, value /* scale */, value /* disp */);
+ } else if (op == kOpAnd) {
+ if (value == 0xFF) {
+ return newLIR2(cUnit, kX86Movzx8RR, rDest, rSrc);
+ } else if (value == 0xFFFF) {
+ return newLIR2(cUnit, kX86Movzx16RR, rDest, rSrc);
+ }
}
if (rDest != rSrc) {
- if (op == kOpAdd) { // lea add special case
+ if (op == kOpLsl && value >= 0 && value <= 3) { // lea shift special case
+ return newLIR5(cUnit, kX86Lea32RA, rDest, rSrc /* base */,
+ r4sib_no_index /* index */, value /* scale */, value /* disp */);
+ } else if (op == kOpAdd) { // lea add special case
return newLIR5(cUnit, kX86Lea32RA, rDest, rSrc /* base */,
r4sib_no_index /* index */, 0 /* scale */, value /* disp */);
}
@@ -326,8 +330,26 @@
int rDestHi, int valLo, int valHi)
{
LIR *res;
- res = loadConstantNoClobber(cUnit, rDestLo, valLo);
- loadConstantNoClobber(cUnit, rDestHi, valHi);
+ if (FPREG(rDestLo)) {
+ DCHECK(FPREG(rDestHi)); // ignore rDestHi
+ if (valLo == 0 && valHi == 0) {
+ return newLIR2(cUnit, kX86XorpsRR, rDestLo, rDestLo);
+ } else {
+ if (valLo == 0) {
+ res = newLIR2(cUnit, kX86XorpsRR, rDestLo, rDestLo);
+ } else {
+ res = loadConstantNoClobber(cUnit, rDestLo, valLo);
+ }
+ if (valHi != 0) {
+ loadConstantNoClobber(cUnit, rDestHi, valHi);
+ newLIR2(cUnit, kX86PsllqRI, rDestHi, 32);
+ newLIR2(cUnit, kX86OrpsRR, rDestLo, rDestHi);
+ }
+ }
+ } else {
+ res = loadConstantNoClobber(cUnit, rDestLo, valLo);
+ loadConstantNoClobber(cUnit, rDestHi, valHi);
+ }
return res;
}
@@ -593,23 +615,22 @@
rDestLo, rDestHi, kLong, sReg);
}
-LIR *storeBaseDispBody(CompilationUnit *cUnit, int rBase,
- int displacement, int rSrc, int rSrcHi,
- OpSize size)
-{
- LIR *res = NULL;
+LIR* storeBaseIndexedDisp(CompilationUnit *cUnit, MIR *mir,
+ int rBase, int rIndex, int scale, int displacement,
+ int rSrc, int rSrcHi,
+ OpSize size, int sReg) {
LIR *store = NULL;
LIR *store2 = NULL;
- X86OpCode opcode = kX86Bkpt;
+ bool isArray = rIndex != INVALID_REG;
bool pair = false;
bool is64bit = false;
+ X86OpCode opcode = kX86Nop;
switch (size) {
case kLong:
case kDouble:
is64bit = true;
if (FPREG(rSrc)) {
- pair = false;
- opcode = kX86MovsdMR;
+ opcode = isArray ? kX86MovsdAR : kX86MovsdMR;
if (DOUBLEREG(rSrc)) {
rSrc = rSrc - FP_DOUBLE;
} else {
@@ -619,61 +640,61 @@
rSrcHi = rSrc + 1;
} else {
pair = true;
- opcode = kX86Mov32MR;
+ opcode = isArray ? kX86Mov32AR : kX86Mov32MR;
}
// TODO: double store is to unaligned address
DCHECK_EQ((displacement & 0x3), 0);
break;
case kWord:
case kSingle:
- opcode = kX86Mov32MR;
+ opcode = isArray ? kX86Mov32AR : kX86Mov32MR;
if (FPREG(rSrc)) {
- opcode = kX86MovssMR;
+ opcode = isArray ? kX86MovssAR : kX86MovssMR;
DCHECK(SINGLEREG(rSrc));
}
DCHECK_EQ((displacement & 0x3), 0);
break;
case kUnsignedHalf:
case kSignedHalf:
- opcode = kX86Mov16MR;
+ opcode = isArray ? kX86Mov16AR : kX86Mov16MR;
DCHECK_EQ((displacement & 0x1), 0);
break;
case kUnsignedByte:
case kSignedByte:
- opcode = kX86Mov8MR;
+ opcode = isArray ? kX86Mov8AR : kX86Mov8MR;
break;
default:
- LOG(FATAL) << "Bad case in storeBaseIndexedBody";
+ LOG(FATAL) << "Bad case in loadBaseIndexedDispBody";
}
- if (!pair) {
- store = res = newLIR3(cUnit, opcode, rBase, displacement, rSrc);
+ if (!isArray) {
+ if (!pair) {
+ store = newLIR3(cUnit, opcode, rBase, displacement + LOWORD_OFFSET, rSrc);
+ } else {
+ store = newLIR3(cUnit, opcode, rBase, displacement + LOWORD_OFFSET, rSrc);
+ store2 = newLIR3(cUnit, opcode, rBase, displacement + HIWORD_OFFSET, rSrcHi);
+ }
} else {
- store = res = newLIR3(cUnit, opcode, rBase, displacement + LOWORD_OFFSET, rSrc);
- store2 = newLIR3(cUnit, opcode, rBase, displacement + HIWORD_OFFSET, rSrcHi);
- }
-
- if (rBase == rSP) {
- annotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2,
- false /* isLoad */, is64bit);
- if (pair) {
- annotateDalvikRegAccess(store2, (displacement + HIWORD_OFFSET) >> 2,
- false /* isLoad */, is64bit);
+ if (!pair) {
+ store = newLIR5(cUnit, opcode, rBase, rIndex, scale, displacement + LOWORD_OFFSET, rSrc);
+ } else {
+ store = newLIR5(cUnit, opcode, rBase, rIndex, scale, displacement + LOWORD_OFFSET, rSrc);
+ store2 = newLIR5(cUnit, opcode, rBase, rIndex, scale, displacement + HIWORD_OFFSET, rSrcHi);
}
}
- return res;
+
+ return store;
}
-LIR *storeBaseDisp(CompilationUnit *cUnit, int rBase,
- int displacement, int rSrc, OpSize size)
-{
- return storeBaseDispBody(cUnit, rBase, displacement, rSrc, -1, size);
+LIR *storeBaseDisp(CompilationUnit *cUnit, int rBase, int displacement, int rSrc, OpSize size) {
+ return storeBaseIndexedDisp(cUnit, NULL, rBase, INVALID_REG, 0, displacement,
+ rSrc, INVALID_REG, size, INVALID_SREG);
}
-LIR *storeBaseDispWide(CompilationUnit *cUnit, int rBase,
- int displacement, int rSrcLo, int rSrcHi)
-{
- return storeBaseDispBody(cUnit, rBase, displacement, rSrcLo, rSrcHi, kLong);
+LIR *storeBaseDispWide(CompilationUnit *cUnit, int rBase, int displacement,
+ int rSrcLo, int rSrcHi) {
+ return storeBaseIndexedDisp(cUnit, NULL, rBase, INVALID_REG, 0, displacement,
+ rSrcLo, rSrcHi, kLong, INVALID_SREG);
}
void storePair(CompilationUnit *cUnit, int base, int lowReg, int highReg)
diff --git a/src/compiler/codegen/x86/X86/Gen.cc b/src/compiler/codegen/x86/X86/Gen.cc
index 5542317..6f33b56 100644
--- a/src/compiler/codegen/x86/X86/Gen.cc
+++ b/src/compiler/codegen/x86/X86/Gen.cc
@@ -406,7 +406,7 @@
{
newLIR2(cUnit, kX86Cmp32RR, src1, src2);
X86ConditionCode cc = oatX86ConditionEncoding(cond);
- LIR* branch = newLIR2(cUnit, kX86Jcc, 0 /* lir operand for Jcc offset */ , cc);
+ LIR* branch = newLIR2(cUnit, kX86Jcc8, 0 /* lir operand for Jcc offset */ , cc);
branch->target = target;
return branch;
}
@@ -417,7 +417,7 @@
// TODO: when checkValue == 0 and reg is rCX, use the jcxz/nz opcode
newLIR2(cUnit, kX86Cmp32RI, reg, checkValue);
X86ConditionCode cc = oatX86ConditionEncoding(cond);
- LIR* branch = newLIR2(cUnit, kX86Jcc, 0 /* lir operand for Jcc offset */ , cc);
+ LIR* branch = newLIR2(cUnit, kX86Jcc8, 0 /* lir operand for Jcc offset */ , cc);
branch->target = target;
return branch;
}
diff --git a/src/compiler/codegen/x86/X86LIR.h b/src/compiler/codegen/x86/X86LIR.h
index a767ff8..9b9fc6b 100644
--- a/src/compiler/codegen/x86/X86LIR.h
+++ b/src/compiler/codegen/x86/X86LIR.h
@@ -32,14 +32,16 @@
* caller save places a burden on up-calls to save/restore the callee save register, however, there
* are few registers that are callee save in the ABI. Changing something that is caller save and
* making it callee save places a burden on down-calls to save/restore the callee save register.
- * For these reasons we aim to match native conventions for caller and callee save
+ * For these reasons we aim to match native conventions for caller and callee save. The first 4
+ * registers can be used for byte operations, for this reason they are preferred for temporary
+ * scratch registers.
*
* General Purpose Register:
* Native: x86 | x86-64 / x32 | ART
* r0/eax: caller save | caller save | caller, Method*, scratch, return value
- * r1/ecx: caller save | caller save, arg4 | caller, arg2, scratch
- * r2/edx: caller save | caller save, arg3 | caller, arg1, scratch, high half of long return
- * r3/ebx: callee save | callee save | callee, available for dalvik register promotion
+ * r1/ecx: caller save | caller save, arg4 | caller, arg1, scratch
+ * r2/edx: caller save | caller save, arg3 | caller, arg2, scratch, high half of long return
+ * r3/ebx: callEE save | callEE save | callER, arg3, scratch
* r4/esp: stack pointer
* r5/ebp: callee save | callee save | callee, available for dalvik register promotion
* r6/esi: callEE save | callER save, arg2 | callee, available for dalvik register promotion
@@ -228,8 +230,9 @@
*/
#define rARG0 rAX
-#define rARG1 rDX
-#define rARG2 rCX
+#define rARG1 rCX
+#define rARG2 rDX
+#define rARG3 rBX
#define rRET0 rAX
#define rRET1 rDX
#define rINVOKE_TGT rAX
@@ -417,6 +420,8 @@
Binary0fOpCode(kX86Ucomiss), // unordered float compare
Binary0fOpCode(kX86Comisd), // double compare
Binary0fOpCode(kX86Comiss), // float compare
+ Binary0fOpCode(kX86Orps), // or of floating point registers
+ Binary0fOpCode(kX86Xorps), // xor of floating point registers
Binary0fOpCode(kX86Addsd), // double add
Binary0fOpCode(kX86Addss), // float add
Binary0fOpCode(kX86Mulsd), // double multiply
@@ -425,8 +430,9 @@
Binary0fOpCode(kX86Cvtsd2ss), // double to float
Binary0fOpCode(kX86Subsd), // double subtract
Binary0fOpCode(kX86Subss), // float subtract
- Binary0fOpCode(kX86Divsd), // double subtract
- Binary0fOpCode(kX86Divss), // float subtract
+ Binary0fOpCode(kX86Divsd), // double divide
+ Binary0fOpCode(kX86Divss), // float divide
+ kX86PsllqRI, // shift of floating point registers
Binary0fOpCode(kX86Movdxr), // move into xmm from gpr
Binary0fOpCode(kX86Movdrx), // move into reg from xmm
kX86Set8R, kX86Set8M, kX86Set8A,// set byte depending on condition operand
@@ -437,8 +443,8 @@
Binary0fOpCode(kX86Movsx8), // sign-extend 8-bit value
Binary0fOpCode(kX86Movsx16), // sign-extend 16-bit value
#undef Binary0fOpCode
- kX86Jcc, // jCC rel; lir operands - 0: rel, 1: CC, target assigned
- kX86Jmp, // jmp rel; lir operands - 0: rel, target assigned
+ kX86Jcc8, kX86Jcc32, // jCC rel8/32; lir operands - 0: rel, 1: CC, target assigned
+ kX86Jmp8, kX86Jmp32, // jmp rel8/32; lir operands - 0: rel, target assigned
kX86CallR, // call reg; lir operands - 0: reg
kX86CallM, // call [base + disp]; lir operands - 0: base, 1: disp
kX86CallA, // call [base + index * scale + disp]
diff --git a/src/compiler/codegen/x86/X86RallocUtil.cc b/src/compiler/codegen/x86/X86RallocUtil.cc
index 156a2d5..ba5c063 100644
--- a/src/compiler/codegen/x86/X86RallocUtil.cc
+++ b/src/compiler/codegen/x86/X86RallocUtil.cc
@@ -96,7 +96,6 @@
/* Clobber all regs that might be used by an external C call */
extern void oatClobberCalleeSave(CompilationUnit *cUnit)
{
- oatClobber(cUnit, rBX);
oatClobber(cUnit, rBP);
oatClobber(cUnit, rSI);
oatClobber(cUnit, rDI);
diff --git a/src/globals.h b/src/globals.h
index 4300a6e..0cf4260 100644
--- a/src/globals.h
+++ b/src/globals.h
@@ -46,6 +46,9 @@
// Required ARM instruction alignment
const int kArmAlignment = 4;
+// Required X86 instruction alignment
+const int kX86Alignment = 16;
+
// System page size. Normally you're expected to get this from
// sysconf(_SC_PAGESIZE) or some system-specific define (usually
// PAGESIZE or PAGE_SIZE). If we use a simple compile-time constant
diff --git a/src/jni_internal_x86.cc b/src/jni_internal_x86.cc
index f9871c4..86d7749 100644
--- a/src/jni_internal_x86.cc
+++ b/src/jni_internal_x86.cc
@@ -42,26 +42,27 @@
UniquePtr<X86Assembler> assembler(down_cast<X86Assembler*>(Assembler::Create(kX86)));
#define __ assembler->
size_t num_arg_array_bytes = NumArgArrayBytes(shorty, shorty_len);
- // Size of frame = return address + Method* + possible receiver + arg array size
+ // Size of frame = return address + saved EBX + Method* + possible receiver + arg array size
// Note, space is left in the frame to flush arguments in registers back to out locations.
- size_t frame_size = 2 * kPointerSize + (is_static ? 0 : kPointerSize) + num_arg_array_bytes;
+ size_t frame_size = 3 * kPointerSize + (is_static ? 0 : kPointerSize) + num_arg_array_bytes;
size_t pad_size = RoundUp(frame_size, kStackAlignment) - frame_size;
Register rMethod = EAX;
__ movl(rMethod, Address(ESP, 4)); // EAX = method
- Register rReceiver = EDX;
+ Register rReceiver = ECX;
if (!is_static) {
- __ movl(rReceiver, Address(ESP, 8)); // EDX = receiver
+ __ movl(rReceiver, Address(ESP, 8)); // ECX = receiver
}
- Register rArgArray = ECX;
- __ movl(rArgArray, Address(ESP, 16)); // ECX = arg array
+ // Save EBX
+ __ pushl(EBX);
+ Register rArgArray = EBX;
+ __ movl(rArgArray, Address(ESP, 20)); // EBX = arg array
// TODO: optimize the frame set up to avoid excessive SP math
// Push padding
if (pad_size != 0) {
__ subl(ESP, Immediate(pad_size));
}
-
// Push/copy arguments.
size_t arg_count = (shorty_len - 1);
size_t dst_offset = num_arg_array_bytes;
@@ -87,33 +88,48 @@
}
}
- // Backing space for receiver
+ // Backing space for receiver.
if (!is_static) {
__ pushl(Immediate(0));
}
- // Push 0 as NULL Method* thereby terminating managed stack crawls
+ // Push 0 as NULL Method* thereby terminating managed stack crawls.
__ pushl(Immediate(0));
if (!is_static) {
- if (num_arg_array_bytes >= static_cast<size_t>(kPointerSize)) {
- // Receiver already in EDX, pass 1st arg in ECX.
- __ movl(ECX, Address(rArgArray, 0));
+ if (shorty_len > 1) {
+ // Receiver already in ECX, pass remaining 2 args in EDX and EBX.
+ __ movl(EDX, Address(rArgArray, 0));
+ if (shorty[1] == 'D' || shorty[1] == 'J') {
+ __ movl(EBX, Address(rArgArray, sizeof(JValue) / 2));
+ } else if (shorty_len > 2) {
+ __ movl(EBX, Address(rArgArray, sizeof(JValue)));
+ }
}
} else {
- if (num_arg_array_bytes >= static_cast<size_t>(kPointerSize)) {
- // Pass 1st arg in EDX.
- __ movl(EDX, Address(rArgArray, 0));
- if (num_arg_array_bytes >= static_cast<size_t>(2* kPointerSize)) {
- // Pass 2nd arg (or second 32-bit chunk of a wide 1st arg) in ECX.
- bool is_wide = (shorty[1] == 'D' || shorty[1] == 'J');
- __ movl(ECX, Address(rArgArray, is_wide ? kPointerSize : 2 * kPointerSize));
+ if (shorty_len > 1) {
+ // Pass remaining 3 args in ECX, EDX and EBX.
+ __ movl(ECX, Address(rArgArray, 0));
+ if (shorty[1] == 'D' || shorty[1] == 'J') {
+ __ movl(EDX, Address(rArgArray, sizeof(JValue) / 2));
+ if (shorty_len > 2) {
+ __ movl(EBX, Address(rArgArray, sizeof(JValue)));
+ }
+ } else if (shorty_len > 2) {
+ __ movl(EDX, Address(rArgArray, sizeof(JValue)));
+ if (shorty[2] == 'D' || shorty[2] == 'J') {
+ __ movl(EBX, Address(rArgArray, sizeof(JValue) + (sizeof(JValue) / 2)));
+ } else {
+ __ movl(EBX, Address(rArgArray, sizeof(JValue) + sizeof(JValue)));
+ }
}
}
}
__ call(Address(EAX, Method::GetCodeOffset())); // Call code off of method
- // pop arguments up to the return address
- __ addl(ESP, Immediate(frame_size + pad_size - kPointerSize));
+ // Pop arguments up to EBX and the return address.
+ __ addl(ESP, Immediate(frame_size + pad_size - (2 * kPointerSize)));
+ // Restore EBX.
+ __ popl(EBX);
char ch = shorty[0];
if (ch != 'V') {
// Load the result JValue pointer.
diff --git a/src/runtime.cc b/src/runtime.cc
index 96c4451..b340317 100644
--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -968,8 +968,7 @@
method->SetFpSpillMask(fp_spills);
} else if (instruction_set == kX86) {
method->SetFrameSizeInBytes(32);
- method->SetCoreSpillMask((1 << art::x86::EBX) | (1 << art::x86::EBP) | (1 << art::x86::ESI) |
- (1 << art::x86::EDI));
+ method->SetCoreSpillMask((1 << art::x86::EBP) | (1 << art::x86::ESI) | (1 << art::x86::EDI));
method->SetFpSpillMask(0);
} else {
UNIMPLEMENTED(FATAL);
diff --git a/src/runtime_support_x86.S b/src/runtime_support_x86.S
index e621eff..3d57d5d 100644
--- a/src/runtime_support_x86.S
+++ b/src/runtime_support_x86.S
@@ -18,7 +18,7 @@
pushl %edi // Save callee saves
pushl %esi
pushl %ebp
- pushl %ebx
+ pushl $0
pushl $0
pushl $0
pushl $0 // Will be clobbered to be Method*
diff --git a/src/stub_x86.cc b/src/stub_x86.cc
index 1820f5f..845c179 100644
--- a/src/stub_x86.cc
+++ b/src/stub_x86.cc
@@ -49,7 +49,7 @@
__ pushl(EDI);
__ pushl(ESI);
__ pushl(EBP);
- __ pushl(EBX);
+ __ pushl(Immediate(0));
__ pushl(Immediate(0));
__ pushl(Immediate(0));
__ pushl(Immediate(0)); // <-- callee save Method* to go here