Implemented a new scheduler and FP register allocator.
Improved performance by 50% over existing JIT for some FP benchmarks.
diff --git a/vm/compiler/codegen/arm/Codegen.c b/vm/compiler/codegen/arm/Codegen.c
index 30dc508..65792be 100644
--- a/vm/compiler/codegen/arm/Codegen.c
+++ b/vm/compiler/codegen/arm/Codegen.c
@@ -122,9 +122,10 @@
if (heapSpacePtr == shadowSpace->heapSpaceTail)
data = *((unsigned int*) addr);
- //LOGD("*** HEAP LOAD: Addr: 0x%x Data: 0x%x", addr, data);
-
int reg = (heapArgSpace->regMap >> 4) & 0xF;
+
+ //LOGD("*** HEAP LOAD: Reg:%d Addr: 0x%x Data: 0x%x", reg, addr, data);
+
selfVerificationLoadDecodeData(heapArgSpace, data, reg);
}
@@ -483,6 +484,48 @@
#endif
/*
+ * Mark load/store instructions that access Dalvik registers through rFP +
+ * offset.
+ */
+static void annotateDalvikRegAccess(ArmLIR *lir, int regId, bool isLoad)
+{
+ if (isLoad) {
+ lir->useMask |= ENCODE_DALVIK_REG;
+ } else {
+ lir->defMask |= ENCODE_DALVIK_REG;
+ }
+
+ /*
+ * Store the Dalvik register id in aliasInfo. Mark he MSB if it is a 64-bit
+ * access.
+ */
+ lir->aliasInfo = regId;
+ if (DOUBLEREG(lir->operands[0])) {
+ lir->aliasInfo |= 0x80000000;
+ }
+}
+
+/*
+ * Decode the register id and mark the corresponding bit(s).
+ */
+static inline void setupRegMask(u8 *mask, int reg)
+{
+ u8 seed;
+ int shift;
+ int regId = reg & 0x1f;
+
+ /*
+ * Each double register is equal to a pair of single-precision FP registers
+ */
+ seed = DOUBLEREG(reg) ? 3 : 1;
+ /* FP register starts at bit position 16 */
+ shift = FPREG(reg) ? kFPReg0 : 0;
+ /* Expand the double register id into single offset */
+ shift += regId;
+ *mask |= seed << shift;
+}
+
+/*
* Set up the proper fields in the resource mask
*/
static void setupResourceMasks(ArmLIR *lir)
@@ -500,20 +543,25 @@
/* Set up the mask for resources that are updated */
if (flags & IS_BRANCH) {
lir->defMask |= ENCODE_REG_PC;
+ lir->useMask |= ENCODE_REG_PC;
}
if (flags & REG_DEF0) {
- lir->defMask |= ENCODE_GP_REG(lir->operands[0]);
+ setupRegMask(&lir->defMask, lir->operands[0]);
}
if (flags & REG_DEF1) {
- lir->defMask |= ENCODE_GP_REG(lir->operands[1]);
+ setupRegMask(&lir->defMask, lir->operands[1]);
}
if (flags & REG_DEF_SP) {
lir->defMask |= ENCODE_REG_SP;
}
+ if (flags & REG_DEF_SP) {
+ lir->defMask |= ENCODE_REG_LR;
+ }
+
if (flags & REG_DEF_LIST0) {
lir->defMask |= ENCODE_REG_LIST(lir->operands[0]);
}
@@ -528,7 +576,7 @@
/* Conservatively treat the IT block */
if (flags & IS_IT) {
- lir->defMask = -1;
+ lir->defMask = ENCODE_ALL;
}
/* Set up the mask for resources that are used */
@@ -541,7 +589,7 @@
for (i = 0; i < 3; i++) {
if (flags & (1 << (kRegUse0 + i))) {
- lir->useMask |= ENCODE_GP_REG(lir->operands[i]);
+ setupRegMask(&lir->useMask, lir->operands[i]);
}
}
}
@@ -698,6 +746,17 @@
return NULL;
}
+/*
+ * Generate an ARM_PSEUDO_BARRIER marker to indicate the boundary of special
+ * blocks.
+ */
+static void genBarrier(CompilationUnit *cUnit)
+{
+ ArmLIR *barrier = newLIR0(cUnit, ARM_PSEUDO_BARRIER);
+ /* Mark all resources as being clobbered */
+ barrier->defMask = -1;
+}
+
/* Perform the actual operation for OP_RETURN_* */
static void genReturnCommon(CompilationUnit *cUnit, MIR *mir)
{
@@ -1603,7 +1662,13 @@
opRegRegImm(cUnit, OP_ADD, r4PC, rFP, srcOffset, rNone);
/* load [r0 .. min(numArgs,4)] */
regMask = (1 << ((numArgs < 4) ? numArgs : 4)) - 1;
+ /*
+ * Protect the loadMultiple instruction from being reordered with other
+ * Dalvik stack accesses.
+ */
+ genBarrier(cUnit);
loadMultiple(cUnit, r4PC, regMask);
+ genBarrier(cUnit);
opRegRegImm(cUnit, OP_SUB, r7, rFP,
sizeof(StackSaveArea) + (numArgs << 2), rNone);
@@ -1627,9 +1692,16 @@
if (numArgs > 11) {
loadConstant(cUnit, 5, ((numArgs - 4) >> 2) << 2);
loopLabel = newLIR0(cUnit, ARM_PSEUDO_TARGET_LABEL);
+ loopLabel->defMask = ENCODE_ALL;
}
storeMultiple(cUnit, r7, regMask);
+ /*
+ * Protect the loadMultiple instruction from being reordered with other
+ * Dalvik stack accesses.
+ */
+ genBarrier(cUnit);
loadMultiple(cUnit, r4PC, regMask);
+ genBarrier(cUnit);
/* No need to generate the loop structure if numArgs <= 11 */
if (numArgs > 11) {
opRegImm(cUnit, OP_SUB, rFP, 4, rNone);
@@ -1643,7 +1715,13 @@
/* Generate the loop epilogue - don't use r0 */
if ((numArgs > 4) && (numArgs % 4)) {
regMask = ((1 << (numArgs & 0x3)) - 1) << 1;
+ /*
+ * Protect the loadMultiple instruction from being reordered with other
+ * Dalvik stack accesses.
+ */
+ genBarrier(cUnit);
loadMultiple(cUnit, r4PC, regMask);
+ genBarrier(cUnit);
}
if (numArgs >= 8)
opImm(cUnit, OP_POP, (1 << r0 | 1 << rFP));
@@ -1960,7 +2038,13 @@
} else {
assert(rDestLo < rDestHi);
res = loadValueAddress(cUnit, vSrc, rDestLo);
+ /*
+ * Protect the loadMultiple instruction from being reordered with other
+ * Dalvik stack accesses.
+ */
+ genBarrier(cUnit);
loadMultiple(cUnit, rDestLo, (1<<rDestLo) | (1<<rDestHi));
+ genBarrier(cUnit);
}
return res;
}
@@ -1984,7 +2068,13 @@
} else {
assert(rSrcLo < rSrcHi);
res = loadValueAddress(cUnit, vDest, rScratch);
+ /*
+ * Protect the storeMultiple instruction from being reordered with
+ * other Dalvik stack accesses.
+ */
+ genBarrier(cUnit);
storeMultiple(cUnit, rScratch, (1<<rSrcLo) | (1 << rSrcHi));
+ genBarrier(cUnit);
}
return res;
}
@@ -2262,6 +2352,7 @@
genZeroCheck(cUnit, r0, mir->offset, NULL);
/* check cast passed - branch target here */
ArmLIR *target = newLIR0(cUnit, ARM_PSEUDO_TARGET_LABEL);
+ target->defMask = ENCODE_ALL;
branch1->generic.target = (LIR *)target;
branch2->generic.target = (LIR *)target;
break;
@@ -2738,6 +2829,7 @@
opReg(cUnit, OP_BLX, r4PC);
/* branch target here */
ArmLIR *target = newLIR0(cUnit, ARM_PSEUDO_TARGET_LABEL);
+ target->defMask = ENCODE_ALL;
storeValue(cUnit, r0, mir->dalvikInsn.vA, r1);
branch1->generic.target = (LIR *)target;
branch2->generic.target = (LIR *)target;
@@ -3888,6 +3980,8 @@
/* Remember the first LIR for this block */
if (headLIR == NULL) {
headLIR = boundaryLIR;
+ /* Set the first boundaryLIR as a scheduling barrier */
+ headLIR->defMask = ENCODE_ALL;
}
bool notHandled;
@@ -4155,3 +4249,19 @@
LOGD("dalvik.vm.jit.op = %s", buf);
}
}
+
+/* Common initialization routine for an architecture family */
+bool dvmCompilerArchInit()
+{
+ int i;
+
+ for (i = 0; i < ARM_LAST; i++) {
+ if (EncodingMap[i].opCode != i) {
+ LOGE("Encoding order for %s is wrong: expecting %d, seeing %d",
+ EncodingMap[i].name, i, EncodingMap[i].opCode);
+ dvmAbort();
+ }
+ }
+
+ return compilerArchVariantInit();
+}