Add support to do suspend polling on backward branches in JIT'ed code.
The polling is expensive for now as it is done through three
instructions: ld/ld/branch. As a result, a bunch of bonus stuff has
been worked on to mitigate the extra overhead:
- Cleaned up resource flags for memory disambiguation.
- Rewrote load/store elimination and scheduler routines to hide
the ld/ld latency for GC flag. Seperate the dependency checking into
memory disambiguation part and resource conflict part.
- Allowed code motion for Dalvik/constant/non-aliasing loads to be
hoisted above branches for null/range checks.
- Created extended basic blocks following goto instructions so that
longer instruction streams can be optimized as a whole.
Without the bonus stuff, the performance dropped about ~5-10% on some
benchmarks because of the lack of headroom to hide the polling latency
in tight loops. With the bonus stuff, the performance delta is between
+/-5% with polling code generated. With the bonus stuff but disabling
polling, the new bonus stuff provides consistent performance
improvements:
CaffeineMark 3.6%
Linpack 11.1%
Scimark 9.7%
Sieve 33.0%
Checkers 6.0%
As a result, GC polling is disabled by default but can be turned on
through the -Xjitsuspendpoll flag for experimental purposes.
Change-Id: Ia81fc85de3e2b70e6cc93bc37c2b845892003cdb
diff --git a/vm/compiler/CompilerIR.h b/vm/compiler/CompilerIR.h
index 89cd141..271c31b 100644
--- a/vm/compiler/CompilerIR.h
+++ b/vm/compiler/CompilerIR.h
@@ -251,6 +251,7 @@
BasicBlock *entryBlock;
BasicBlock *exitBlock;
BasicBlock *curBlock;
+ BasicBlock *nextCodegenBlock; // for extended trace codegen
GrowableList dfsOrder;
GrowableList domPostOrderTraversal;
BitVector *tryBlockAddr;
diff --git a/vm/compiler/Frontend.c b/vm/compiler/Frontend.c
index c4dbf27..be8eee6 100644
--- a/vm/compiler/Frontend.c
+++ b/vm/compiler/Frontend.c
@@ -674,9 +674,11 @@
searchBlockId);
if (targetOffset == searchBB->startOffset) {
curBB->taken = searchBB;
+ dvmCompilerSetBit(searchBB->predecessors, curBB->id);
}
if (fallThroughOffset == searchBB->startOffset) {
curBB->fallThrough = searchBB;
+ dvmCompilerSetBit(searchBB->predecessors, curBB->id);
/*
* Fallthrough block of an invoke instruction needs to be
@@ -718,11 +720,13 @@
exitBB->needFallThroughBranch = true;
loopBranch->taken = exitBB;
+ dvmCompilerSetBit(exitBB->predecessors, loopBranch->id);
backwardCell =
dvmCompilerNewBB(kChainingCellBackwardBranch, numBlocks++);
dvmInsertGrowableList(blockList, (intptr_t) backwardCell);
backwardCell->startOffset = entryCodeBB->startOffset;
loopBranch->fallThrough = backwardCell;
+ dvmCompilerSetBit(backwardCell->predecessors, loopBranch->id);
/* Create the chaining cell as the fallthrough of the exit block */
exitChainingCell = dvmCompilerNewBB(kChainingCellNormal,
@@ -731,6 +735,7 @@
exitChainingCell->startOffset = targetOffset;
exitBB->fallThrough = exitChainingCell;
+ dvmCompilerSetBit(exitChainingCell->predecessors, exitBB->id);
cUnit.hasLoop = true;
}
@@ -788,6 +793,7 @@
dvmInsertGrowableList(blockList, (intptr_t) fallThroughBB);
fallThroughBB->startOffset = fallThroughOffset;
curBB->fallThrough = fallThroughBB;
+ dvmCompilerSetBit(fallThroughBB->predecessors, curBB->id);
}
/* Target block not included in the trace */
if (curBB->taken == NULL &&
@@ -835,6 +841,7 @@
}
if (newBB) {
curBB->taken = newBB;
+ dvmCompilerSetBit(newBB->predecessors, curBB->id);
dvmInsertGrowableList(blockList, (intptr_t) newBB);
}
}
diff --git a/vm/compiler/codegen/arm/ArchUtility.c b/vm/compiler/codegen/arm/ArchUtility.c
index c6bcac2..be6d56b 100644
--- a/vm/compiler/codegen/arm/ArchUtility.c
+++ b/vm/compiler/codegen/arm/ArchUtility.c
@@ -263,10 +263,22 @@
if (mask & ENCODE_FP_STATUS) {
strcat(buf, "fpcc ");
}
+
+ /* Memory bits */
if (armLIR && (mask & ENCODE_DALVIK_REG)) {
sprintf(buf + strlen(buf), "dr%d%s", armLIR->aliasInfo & 0xffff,
(armLIR->aliasInfo & 0x80000000) ? "(+1)" : "");
}
+ if (mask & ENCODE_LITERAL) {
+ strcat(buf, "lit ");
+ }
+
+ if (mask & ENCODE_HEAP_REF) {
+ strcat(buf, "heap ");
+ }
+ if (mask & ENCODE_MUST_NOT_ALIAS) {
+ strcat(buf, "noalias ");
+ }
}
if (buf[0]) {
LOGD("%s: %s", prefix, buf);
diff --git a/vm/compiler/codegen/arm/ArmLIR.h b/vm/compiler/codegen/arm/ArmLIR.h
index d3e145e..5adc1ed 100644
--- a/vm/compiler/codegen/arm/ArmLIR.h
+++ b/vm/compiler/codegen/arm/ArmLIR.h
@@ -129,12 +129,12 @@
kFPReg0 = 16,
kRegEnd = 48,
kCCode = kRegEnd,
- kFPStatus,
- kDalvikReg,
- kLiteral,
- kFrameRef,
- kHeapRef,
- kLitPoolRef
+ kFPStatus, // FP status word
+ // The following four bits are for memory disambiguation
+ kDalvikReg, // 1 Dalvik Frame (can be fully disambiguated)
+ kLiteral, // 2 Literal pool (can be fully disambiguated)
+ kHeapRef, // 3 Somewhere on the heap (alias with any other heap)
+ kMustNotAlias, // 4 Guaranteed to be non-alias (eg *(r6+x))
} ResourceEncodingPos;
#define ENCODE_REG_LIST(N) ((u8) N)
@@ -144,19 +144,15 @@
#define ENCODE_CCODE (1ULL << kCCode)
#define ENCODE_FP_STATUS (1ULL << kFPStatus)
- /* Must alias */
+/* Abstract memory locations */
#define ENCODE_DALVIK_REG (1ULL << kDalvikReg)
#define ENCODE_LITERAL (1ULL << kLiteral)
-
- /* May alias */
-#define ENCODE_FRAME_REF (1ULL << kFrameRef)
#define ENCODE_HEAP_REF (1ULL << kHeapRef)
-#define ENCODE_LITPOOL_REF (1ULL << kLitPoolRef)
+#define ENCODE_MUST_NOT_ALIAS (1ULL << kMustNotAlias)
#define ENCODE_ALL (~0ULL)
-#define ENCODE_MEM_DEF (ENCODE_FRAME_REF | ENCODE_HEAP_REF)
-#define ENCODE_MEM_USE (ENCODE_FRAME_REF | ENCODE_HEAP_REF \
- | ENCODE_LITPOOL_REF)
+#define ENCODE_MEM (ENCODE_DALVIK_REG | ENCODE_LITERAL | \
+ ENCODE_HEAP_REF | ENCODE_MUST_NOT_ALIAS)
#define DECODE_ALIAS_INFO_REG(X) (X & 0xffff)
#define DECODE_ALIAS_INFO_WIDE(X) ((X & 0x80000000) ? 1 : 0)
diff --git a/vm/compiler/codegen/arm/CodegenCommon.c b/vm/compiler/codegen/arm/CodegenCommon.c
index f4ca95c..75134bf 100644
--- a/vm/compiler/codegen/arm/CodegenCommon.c
+++ b/vm/compiler/codegen/arm/CodegenCommon.c
@@ -35,14 +35,12 @@
static void setMemRefType(ArmLIR *lir, bool isLoad, int memType)
{
u8 *maskPtr;
- u8 mask;
- assert( EncodingMap[lir->opcode].flags & (IS_LOAD | IS_STORE));
+ u8 mask = ENCODE_MEM;;
+ assert(EncodingMap[lir->opcode].flags & (IS_LOAD | IS_STORE));
if (isLoad) {
maskPtr = &lir->useMask;
- mask = ENCODE_MEM_USE;
} else {
maskPtr = &lir->defMask;
- mask = ENCODE_MEM_DEF;
}
/* Clear out the memref flags */
*maskPtr &= ~mask;
@@ -50,14 +48,19 @@
switch(memType) {
case kLiteral:
assert(isLoad);
- *maskPtr |= (ENCODE_LITERAL | ENCODE_LITPOOL_REF);
+ *maskPtr |= ENCODE_LITERAL;
break;
case kDalvikReg:
- *maskPtr |= (ENCODE_DALVIK_REG | ENCODE_FRAME_REF);
+ *maskPtr |= ENCODE_DALVIK_REG;
break;
case kHeapRef:
*maskPtr |= ENCODE_HEAP_REF;
break;
+ case kMustNotAlias:
+ /* Currently only loads can be marked as kMustNotAlias */
+ assert(!(EncodingMap[lir->opcode].flags & IS_STORE));
+ *maskPtr |= ENCODE_MUST_NOT_ALIAS;
+ break;
default:
LOGE("Jit: invalid memref kind - %d", memType);
assert(0); // Bail if debug build, set worst-case in the field
@@ -138,9 +141,13 @@
setMemRefType(lir, flags & IS_LOAD, kHeapRef);
}
+ /*
+ * Conservatively assume the branch here will call out a function that in
+ * turn will trash everything.
+ */
if (flags & IS_BRANCH) {
- lir->defMask |= ENCODE_REG_PC;
- lir->useMask |= ENCODE_REG_PC;
+ lir->defMask = lir->useMask = ENCODE_ALL;
+ return;
}
if (flags & REG_DEF0) {
@@ -176,11 +183,6 @@
lir->defMask = ENCODE_ALL;
}
- /* Set up the mask for resources that are used */
- if (flags & IS_BRANCH) {
- lir->useMask |= ENCODE_REG_PC;
- }
-
if (flags & (REG_USE0 | REG_USE1 | REG_USE2 | REG_USE3)) {
int i;
@@ -225,6 +227,37 @@
}
/*
+ * Set up the accurate resource mask for branch instructions
+ */
+static void relaxBranchMasks(ArmLIR *lir)
+{
+ int flags = EncodingMap[lir->opcode].flags;
+
+ /* Make sure only branch instructions are passed here */
+ assert(flags & IS_BRANCH);
+
+ lir->useMask = lir->defMask = ENCODE_REG_PC;
+
+ if (flags & REG_DEF_LR) {
+ lir->defMask |= ENCODE_REG_LR;
+ }
+
+ if (flags & (REG_USE0 | REG_USE1 | REG_USE2 | REG_USE3)) {
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (flags & (1 << (kRegUse0 + i))) {
+ setupRegMask(&lir->useMask, lir->operands[i]);
+ }
+ }
+ }
+
+ if (flags & USES_CCODES) {
+ lir->useMask |= ENCODE_CCODE;
+ }
+}
+
+/*
* The following are building blocks to construct low-level IRs with 0 - 4
* operands.
*/
@@ -407,5 +440,9 @@
}
/* Branch to the PC reconstruction code */
branch->generic.target = (LIR *) pcrLabel;
+
+ /* Clear the conservative flags for branches that punt to the interpreter */
+ relaxBranchMasks(branch);
+
return pcrLabel;
}
diff --git a/vm/compiler/codegen/arm/CodegenDriver.c b/vm/compiler/codegen/arm/CodegenDriver.c
index 7049e49..ade4197 100644
--- a/vm/compiler/codegen/arm/CodegenDriver.c
+++ b/vm/compiler/codegen/arm/CodegenDriver.c
@@ -1368,6 +1368,22 @@
#endif
/*
+ * Fetch *InterpState->pSelfSuspendCount. If the suspend count is non-zero,
+ * punt to the interpreter.
+ */
+static void genSuspendPoll(CompilationUnit *cUnit, MIR *mir)
+{
+ int rTemp = dvmCompilerAllocTemp(cUnit);
+ ArmLIR *ld;
+ ld = loadWordDisp(cUnit, rGLUE, offsetof(InterpState, pSelfSuspendCount),
+ rTemp);
+ setMemRefType(ld, true /* isLoad */, kMustNotAlias);
+ ld = loadWordDisp(cUnit, rTemp, 0, rTemp);
+ setMemRefType(ld, true /* isLoad */, kMustNotAlias);
+ genRegImmCheck(cUnit, kArmCondNe, rTemp, 0, mir->offset, NULL);
+}
+
+/*
* The following are the first-level codegen routines that analyze the format
* of each bytecode then either dispatch special purpose codegen routines
* or produce corresponding Thumb instructions directly.
@@ -1376,8 +1392,26 @@
static bool handleFmt10t_Fmt20t_Fmt30t(CompilationUnit *cUnit, MIR *mir,
BasicBlock *bb, ArmLIR *labelList)
{
- /* For OP_GOTO, OP_GOTO_16, and OP_GOTO_32 */
- genUnconditionalBranch(cUnit, &labelList[bb->taken->id]);
+ /* backward branch? */
+ bool backwardBranch = (bb->taken->startOffset <= mir->offset);
+
+ if (backwardBranch && gDvmJit.genSuspendPoll) {
+ genSuspendPoll(cUnit, mir);
+ }
+
+ int numPredecessors = dvmCountSetBits(bb->taken->predecessors);
+ /*
+ * Things could be hoisted out of the taken block into the predecessor, so
+ * make sure it is dominated by the predecessor.
+ */
+ if (numPredecessors == 1 && bb->taken->visited == false &&
+ bb->taken->blockType == kDalvikByteCode &&
+ cUnit->methodJitMode == false ) {
+ cUnit->nextCodegenBlock = bb->taken;
+ } else {
+ /* For OP_GOTO, OP_GOTO_16, and OP_GOTO_32 */
+ genUnconditionalBranch(cUnit, &labelList[bb->taken->id]);
+ }
return false;
}
@@ -1995,8 +2029,16 @@
{
Opcode dalvikOpcode = mir->dalvikInsn.opcode;
ArmConditionCode cond;
+ /* backward branch? */
+ bool backwardBranch = (bb->taken->startOffset <= mir->offset);
+
+ if (backwardBranch && gDvmJit.genSuspendPoll) {
+ genSuspendPoll(cUnit, mir);
+ }
+
RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0);
rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
+
opRegImm(cUnit, kOpCmp, rlSrc.lowReg, 0);
//TUNING: break this out to allow use of Thumb2 CB[N]Z
@@ -2509,11 +2551,19 @@
{
Opcode dalvikOpcode = mir->dalvikInsn.opcode;
ArmConditionCode cond;
+ /* backward branch? */
+ bool backwardBranch = (bb->taken->startOffset <= mir->offset);
+
+ if (backwardBranch && gDvmJit.genSuspendPoll) {
+ genSuspendPoll(cUnit, mir);
+ }
+
RegLocation rlSrc1 = dvmCompilerGetSrc(cUnit, mir, 0);
RegLocation rlSrc2 = dvmCompilerGetSrc(cUnit, mir, 1);
rlSrc1 = loadValue(cUnit, rlSrc1, kCoreReg);
rlSrc2 = loadValue(cUnit, rlSrc2, kCoreReg);
+
opRegReg(cUnit, kOpCmp, rlSrc1.lowReg, rlSrc2.lowReg);
switch (dalvikOpcode) {
@@ -4094,6 +4144,10 @@
dvmInitGrowableList(&chainingListByType[i], 2);
}
+ /* Clear the visited flag for each block */
+ dvmCompilerDataFlowAnalysisDispatcher(cUnit, dvmCompilerClearVisitedFlag,
+ kAllNodes, false /* isIterative */);
+
GrowableListIterator iterator;
dvmGrowableListIteratorInit(&cUnit->blockList, &iterator);
@@ -4105,6 +4159,7 @@
MIR *mir;
BasicBlock *bb = (BasicBlock *) dvmGrowableListIteratorNext(&iterator);
if (bb == NULL) break;
+ if (bb->visited == true) continue;
labelList[i].operands[0] = bb->startOffset;
@@ -4199,171 +4254,189 @@
}
ArmLIR *headLIR = NULL;
+ BasicBlock *nextBB = bb;
- for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
+ /*
+ * Try to build a longer optimization unit. Currently if the previous
+ * block ends with a goto, we continue adding instructions and don't
+ * reset the register allocation pool.
+ */
+ for (; nextBB != NULL; nextBB = cUnit->nextCodegenBlock) {
+ bb = nextBB;
+ bb->visited = true;
+ cUnit->nextCodegenBlock = NULL;
- dvmCompilerResetRegPool(cUnit);
- if (gDvmJit.disableOpt & (1 << kTrackLiveTemps)) {
- dvmCompilerClobberAllRegs(cUnit);
- }
+ for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
- if (gDvmJit.disableOpt & (1 << kSuppressLoads)) {
- dvmCompilerResetDefTracking(cUnit);
- }
-
- if (mir->dalvikInsn.opcode >= kMirOpFirst) {
- handleExtendedMIR(cUnit, mir);
- continue;
- }
-
-
- Opcode dalvikOpcode = mir->dalvikInsn.opcode;
- InstructionFormat dalvikFormat = dexGetFormatFromOpcode(dalvikOpcode);
- char *note;
- if (mir->OptimizationFlags & MIR_INLINED) {
- note = " (I)";
- } else if (mir->OptimizationFlags & MIR_INLINED_PRED) {
- note = " (PI)";
- } else if (mir->OptimizationFlags & MIR_CALLEE) {
- note = " (C)";
- } else {
- note = NULL;
- }
-
- ArmLIR *boundaryLIR;
-
- /*
- * Don't generate the boundary LIR unless we are debugging this
- * trace or we need a scheduling barrier.
- */
- if (headLIR == NULL || cUnit->printMe == true) {
- boundaryLIR =
- newLIR2(cUnit, kArmPseudoDalvikByteCodeBoundary,
- mir->offset,
- (int) dvmCompilerGetDalvikDisassembly(
- &mir->dalvikInsn, note));
- /* Remember the first LIR for this block */
- if (headLIR == NULL) {
- headLIR = boundaryLIR;
- /* Set the first boundaryLIR as a scheduling barrier */
- headLIR->defMask = ENCODE_ALL;
+ dvmCompilerResetRegPool(cUnit);
+ if (gDvmJit.disableOpt & (1 << kTrackLiveTemps)) {
+ dvmCompilerClobberAllRegs(cUnit);
}
- }
- /* Don't generate the SSA annotation unless verbose mode is on */
- if (cUnit->printMe && mir->ssaRep) {
- char *ssaString = dvmCompilerGetSSAString(cUnit, mir->ssaRep);
- newLIR1(cUnit, kArmPseudoSSARep, (int) ssaString);
- }
+ if (gDvmJit.disableOpt & (1 << kSuppressLoads)) {
+ dvmCompilerResetDefTracking(cUnit);
+ }
- bool notHandled;
- /*
- * Debugging: screen the opcode first to see if it is in the
- * do[-not]-compile list
- */
- bool singleStepMe = SINGLE_STEP_OP(dalvikOpcode);
+ if (mir->dalvikInsn.opcode >= kMirOpFirst) {
+ handleExtendedMIR(cUnit, mir);
+ continue;
+ }
+
+
+ Opcode dalvikOpcode = mir->dalvikInsn.opcode;
+ InstructionFormat dalvikFormat =
+ dexGetFormatFromOpcode(dalvikOpcode);
+ char *note;
+ if (mir->OptimizationFlags & MIR_INLINED) {
+ note = " (I)";
+ } else if (mir->OptimizationFlags & MIR_INLINED_PRED) {
+ note = " (PI)";
+ } else if (mir->OptimizationFlags & MIR_CALLEE) {
+ note = " (C)";
+ } else {
+ note = NULL;
+ }
+
+ ArmLIR *boundaryLIR;
+
+ /*
+ * Don't generate the boundary LIR unless we are debugging this
+ * trace or we need a scheduling barrier.
+ */
+ if (headLIR == NULL || cUnit->printMe == true) {
+ boundaryLIR =
+ newLIR2(cUnit, kArmPseudoDalvikByteCodeBoundary,
+ mir->offset,
+ (int) dvmCompilerGetDalvikDisassembly(
+ &mir->dalvikInsn, note));
+ /* Remember the first LIR for this block */
+ if (headLIR == NULL) {
+ headLIR = boundaryLIR;
+ /* Set the first boundaryLIR as a scheduling barrier */
+ headLIR->defMask = ENCODE_ALL;
+ }
+ }
+
+ /*
+ * Don't generate the SSA annotation unless verbose mode is on
+ */
+ if (cUnit->printMe && mir->ssaRep) {
+ char *ssaString = dvmCompilerGetSSAString(cUnit,
+ mir->ssaRep);
+ newLIR1(cUnit, kArmPseudoSSARep, (int) ssaString);
+ }
+
+ bool notHandled;
+ /*
+ * Debugging: screen the opcode first to see if it is in the
+ * do[-not]-compile list
+ */
+ bool singleStepMe = SINGLE_STEP_OP(dalvikOpcode);
#if defined(WITH_SELF_VERIFICATION)
- if (singleStepMe == false) {
- singleStepMe = selfVerificationPuntOps(mir);
- }
+ if (singleStepMe == false) {
+ singleStepMe = selfVerificationPuntOps(mir);
+ }
#endif
- if (singleStepMe || cUnit->allSingleStep) {
- notHandled = false;
- genInterpSingleStep(cUnit, mir);
- } else {
- opcodeCoverage[dalvikOpcode]++;
- switch (dalvikFormat) {
- case kFmt10t:
- case kFmt20t:
- case kFmt30t:
- notHandled = handleFmt10t_Fmt20t_Fmt30t(cUnit,
- mir, bb, labelList);
- break;
- case kFmt10x:
- notHandled = handleFmt10x(cUnit, mir);
- break;
- case kFmt11n:
- case kFmt31i:
- notHandled = handleFmt11n_Fmt31i(cUnit, mir);
- break;
- case kFmt11x:
- notHandled = handleFmt11x(cUnit, mir);
- break;
- case kFmt12x:
- notHandled = handleFmt12x(cUnit, mir);
- break;
- case kFmt20bc:
- case kFmt40sc:
- notHandled = handleFmt20bc_Fmt40sc(cUnit, mir);
- break;
- case kFmt21c:
- case kFmt31c:
- case kFmt41c:
- notHandled = handleFmt21c_Fmt31c_Fmt41c(cUnit, mir);
- break;
- case kFmt21h:
- notHandled = handleFmt21h(cUnit, mir);
- break;
- case kFmt21s:
- notHandled = handleFmt21s(cUnit, mir);
- break;
- case kFmt21t:
- notHandled = handleFmt21t(cUnit, mir, bb, labelList);
- break;
- case kFmt22b:
- case kFmt22s:
- notHandled = handleFmt22b_Fmt22s(cUnit, mir);
- break;
- case kFmt22c:
- case kFmt52c:
- notHandled = handleFmt22c_Fmt52c(cUnit, mir);
- break;
- case kFmt22cs:
- notHandled = handleFmt22cs(cUnit, mir);
- break;
- case kFmt22t:
- notHandled = handleFmt22t(cUnit, mir, bb, labelList);
- break;
- case kFmt22x:
- case kFmt32x:
- notHandled = handleFmt22x_Fmt32x(cUnit, mir);
- break;
- case kFmt23x:
- notHandled = handleFmt23x(cUnit, mir);
- break;
- case kFmt31t:
- notHandled = handleFmt31t(cUnit, mir);
- break;
- case kFmt3rc:
- case kFmt35c:
- case kFmt5rc:
- notHandled = handleFmt35c_3rc_5rc(cUnit, mir, bb,
+ if (singleStepMe || cUnit->allSingleStep) {
+ notHandled = false;
+ genInterpSingleStep(cUnit, mir);
+ } else {
+ opcodeCoverage[dalvikOpcode]++;
+ switch (dalvikFormat) {
+ case kFmt10t:
+ case kFmt20t:
+ case kFmt30t:
+ notHandled = handleFmt10t_Fmt20t_Fmt30t(cUnit,
+ mir, bb, labelList);
+ break;
+ case kFmt10x:
+ notHandled = handleFmt10x(cUnit, mir);
+ break;
+ case kFmt11n:
+ case kFmt31i:
+ notHandled = handleFmt11n_Fmt31i(cUnit, mir);
+ break;
+ case kFmt11x:
+ notHandled = handleFmt11x(cUnit, mir);
+ break;
+ case kFmt12x:
+ notHandled = handleFmt12x(cUnit, mir);
+ break;
+ case kFmt20bc:
+ case kFmt40sc:
+ notHandled = handleFmt20bc_Fmt40sc(cUnit, mir);
+ break;
+ case kFmt21c:
+ case kFmt31c:
+ case kFmt41c:
+ notHandled = handleFmt21c_Fmt31c_Fmt41c(cUnit, mir);
+ break;
+ case kFmt21h:
+ notHandled = handleFmt21h(cUnit, mir);
+ break;
+ case kFmt21s:
+ notHandled = handleFmt21s(cUnit, mir);
+ break;
+ case kFmt21t:
+ notHandled = handleFmt21t(cUnit, mir, bb,
labelList);
- break;
- case kFmt3rms:
- case kFmt35ms:
- notHandled = handleFmt35ms_3rms(cUnit, mir, bb,
- labelList);
- break;
- case kFmt35mi:
- case kFmt3rmi:
- notHandled = handleExecuteInline(cUnit, mir);
- break;
- case kFmt51l:
- notHandled = handleFmt51l(cUnit, mir);
- break;
- default:
- notHandled = true;
- break;
+ break;
+ case kFmt22b:
+ case kFmt22s:
+ notHandled = handleFmt22b_Fmt22s(cUnit, mir);
+ break;
+ case kFmt22c:
+ case kFmt52c:
+ notHandled = handleFmt22c_Fmt52c(cUnit, mir);
+ break;
+ case kFmt22cs:
+ notHandled = handleFmt22cs(cUnit, mir);
+ break;
+ case kFmt22t:
+ notHandled = handleFmt22t(cUnit, mir, bb,
+ labelList);
+ break;
+ case kFmt22x:
+ case kFmt32x:
+ notHandled = handleFmt22x_Fmt32x(cUnit, mir);
+ break;
+ case kFmt23x:
+ notHandled = handleFmt23x(cUnit, mir);
+ break;
+ case kFmt31t:
+ notHandled = handleFmt31t(cUnit, mir);
+ break;
+ case kFmt3rc:
+ case kFmt35c:
+ case kFmt5rc:
+ notHandled = handleFmt35c_3rc_5rc(cUnit, mir, bb,
+ labelList);
+ break;
+ case kFmt3rms:
+ case kFmt35ms:
+ notHandled = handleFmt35ms_3rms(cUnit, mir, bb,
+ labelList);
+ break;
+ case kFmt35mi:
+ case kFmt3rmi:
+ notHandled = handleExecuteInline(cUnit, mir);
+ break;
+ case kFmt51l:
+ notHandled = handleFmt51l(cUnit, mir);
+ break;
+ default:
+ notHandled = true;
+ break;
+ }
}
- }
- if (notHandled) {
- LOGE("%#06x: Opcode 0x%x (%s) / Fmt %d not handled\n",
- mir->offset,
- dalvikOpcode, dexGetOpcodeName(dalvikOpcode),
- dalvikFormat);
- dvmCompilerAbort(cUnit);
- break;
+ if (notHandled) {
+ LOGE("%#06x: Opcode 0x%x (%s) / Fmt %d not handled\n",
+ mir->offset,
+ dalvikOpcode, dexGetOpcodeName(dalvikOpcode),
+ dalvikFormat);
+ dvmCompilerAbort(cUnit);
+ break;
+ }
}
}
@@ -4389,10 +4462,8 @@
* insert an unconditional branch to the chaining cell.
*/
if (bb->needFallThroughBranch) {
- genUnconditionalBranch(cUnit,
- &labelList[bb->fallThrough->id]);
+ genUnconditionalBranch(cUnit, &labelList[bb->fallThrough->id]);
}
-
}
/* Handle the chaining cells in predefined order */
diff --git a/vm/compiler/codegen/arm/LocalOptimizations.c b/vm/compiler/codegen/arm/LocalOptimizations.c
index ae98a56..4c0354a 100644
--- a/vm/compiler/codegen/arm/LocalOptimizations.c
+++ b/vm/compiler/codegen/arm/LocalOptimizations.c
@@ -21,33 +21,23 @@
#define DEBUG_OPT(X)
-ArmLIR* dvmCompilerGenCopy(CompilationUnit *cUnit, int rDest, int rSrc);
+/* Check RAW, WAR, and WAR dependency on the register operands */
+#define CHECK_REG_DEP(use, def, check) ((def & check->useMask) || \
+ ((use | def) & check->defMask))
-/* Is this a Dalvik register access? */
-static inline bool isDalvikLoad(ArmLIR *lir)
-{
- return (lir->useMask != ENCODE_ALL) && (lir->useMask & ENCODE_DALVIK_REG);
-}
-
-/* Is this a load from the literal pool? */
-static inline bool isLiteralLoad(ArmLIR *lir)
-{
- return (lir->useMask != ENCODE_ALL) && (lir->useMask & ENCODE_LITERAL);
-}
-
-static inline bool isDalvikStore(ArmLIR *lir)
-{
- return (lir->defMask != ENCODE_ALL) && (lir->defMask & ENCODE_DALVIK_REG);
-}
+/* Scheduler heuristics */
+#define MAX_HOIST_DISTANCE 20
+#define LDLD_DISTANCE 4
+#define LD_LATENCY 2
static inline bool isDalvikRegisterClobbered(ArmLIR *lir1, ArmLIR *lir2)
{
- int reg1Lo = DECODE_ALIAS_INFO_REG(lir1->aliasInfo);
- int reg1Hi = reg1Lo + DECODE_ALIAS_INFO_WIDE(lir1->aliasInfo);
- int reg2Lo = DECODE_ALIAS_INFO_REG(lir2->aliasInfo);
- int reg2Hi = reg2Lo + DECODE_ALIAS_INFO_WIDE(lir2->aliasInfo);
+ int reg1Lo = DECODE_ALIAS_INFO_REG(lir1->aliasInfo);
+ int reg1Hi = reg1Lo + DECODE_ALIAS_INFO_WIDE(lir1->aliasInfo);
+ int reg2Lo = DECODE_ALIAS_INFO_REG(lir2->aliasInfo);
+ int reg2Hi = reg2Lo + DECODE_ALIAS_INFO_WIDE(lir2->aliasInfo);
- return (reg1Lo == reg2Lo) || (reg1Lo == reg2Hi) || (reg1Hi == reg2Lo);
+ return (reg1Lo == reg2Lo) || (reg1Lo == reg2Hi) || (reg1Hi == reg2Lo);
}
#if 0
@@ -61,10 +51,39 @@
}
#endif
+/* Convert a more expensive instruction (ie load) into a move */
+static void convertMemOpIntoMove(CompilationUnit *cUnit, ArmLIR *origLIR,
+ int dest, int src)
+{
+ /* Insert a move to replace the load */
+ ArmLIR *moveLIR;
+ moveLIR = dvmCompilerRegCopyNoInsert( cUnit, dest, src);
+ /*
+ * Insert the converted instruction after the original since the
+ * optimization is scannng in the top-down order and the new instruction
+ * will need to be re-checked (eg the new dest clobbers the src used in
+ * thisLIR).
+ */
+ dvmCompilerInsertLIRAfter((LIR *) origLIR, (LIR *) moveLIR);
+}
+
/*
- * Perform a pass of top-down walk to
- * 1) Eliminate redundant loads and stores
- * 2) Sink stores to latest possible slot
+ * Perform a pass of top-down walk, from the second-last instruction in the
+ * superblock, to eliminate redundant loads and stores.
+ *
+ * An earlier load can eliminate a later load iff
+ * 1) They are must-aliases
+ * 2) The native register is not clobbered in between
+ * 3) The memory location is not written to in between
+ *
+ * An earlier store can eliminate a later load iff
+ * 1) They are must-aliases
+ * 2) The native register is not clobbered in between
+ * 3) The memory location is not written to in between
+ *
+ * A later store can be eliminated by an earlier store iff
+ * 1) They are must-aliases
+ * 2) The memory location is not written to in between
*/
static void applyLoadStoreElimination(CompilationUnit *cUnit,
ArmLIR *headLIR,
@@ -72,428 +91,347 @@
{
ArmLIR *thisLIR;
- cUnit->optRound++;
- for (thisLIR = headLIR;
- thisLIR != tailLIR;
- thisLIR = NEXT_LIR(thisLIR)) {
- /* Skip newly added instructions */
- if (thisLIR->flags.age >= cUnit->optRound) {
+ if (headLIR == tailLIR) return;
+
+ for (thisLIR = PREV_LIR(tailLIR);
+ thisLIR != headLIR;
+ thisLIR = PREV_LIR(thisLIR)) {
+ int sinkDistance = 0;
+
+ /* Skip non-interesting instructions */
+ if ((thisLIR->flags.isNop == true) ||
+ isPseudoOpcode(thisLIR->opcode) ||
+ !(EncodingMap[thisLIR->opcode].flags & (IS_LOAD | IS_STORE))) {
continue;
}
- if (isDalvikStore(thisLIR)) {
- int nativeRegId = thisLIR->operands[0];
- ArmLIR *checkLIR;
- int sinkDistance = 0;
+
+ int nativeRegId = thisLIR->operands[0];
+ bool isThisLIRLoad = EncodingMap[thisLIR->opcode].flags & IS_LOAD;
+ ArmLIR *checkLIR;
+ /* Use the mem mask to determine the rough memory location */
+ u8 thisMemMask = (thisLIR->useMask | thisLIR->defMask) & ENCODE_MEM;
+
+ /*
+ * Currently only eliminate redundant ld/st for constant and Dalvik
+ * register accesses.
+ */
+ if (!(thisMemMask & (ENCODE_LITERAL | ENCODE_DALVIK_REG))) continue;
+
+ /*
+ * Add r15 (pc) to the resource mask to prevent this instruction
+ * from sinking past branch instructions. Also take out the memory
+ * region bits since stopMask is used to check data/control
+ * dependencies.
+ */
+ u8 stopUseRegMask = (ENCODE_REG_PC | thisLIR->useMask) &
+ ~ENCODE_MEM;
+ u8 stopDefRegMask = thisLIR->defMask & ~ENCODE_MEM;
+
+ for (checkLIR = NEXT_LIR(thisLIR);
+ checkLIR != tailLIR;
+ checkLIR = NEXT_LIR(checkLIR)) {
+
+ u8 checkMemMask = (checkLIR->useMask | checkLIR->defMask) &
+ ENCODE_MEM;
+ u8 aliasCondition = thisMemMask & checkMemMask;
+ bool stopHere = false;
+
/*
- * Add r15 (pc) to the mask to prevent this instruction
- * from sinking past branch instructions. Unset the Dalvik register
- * bit when checking with native resource constraints.
+ * Potential aliases seen - check the alias relations
*/
- u8 stopMask = (ENCODE_REG_PC | thisLIR->useMask) &
- ~ENCODE_DALVIK_REG;
-
- for (checkLIR = NEXT_LIR(thisLIR);
- checkLIR != tailLIR;
- checkLIR = NEXT_LIR(checkLIR)) {
-
- /* Check if a Dalvik register load is redundant */
- if (isDalvikLoad(checkLIR) &&
- (checkLIR->aliasInfo == thisLIR->aliasInfo) &&
- (REGTYPE(checkLIR->operands[0]) == REGTYPE(nativeRegId))) {
- /* Insert a move to replace the load */
- if (checkLIR->operands[0] != nativeRegId) {
- ArmLIR *moveLIR;
- moveLIR = dvmCompilerRegCopyNoInsert(
- cUnit, checkLIR->operands[0], nativeRegId);
- /*
- * Insert the converted checkLIR instruction after the
- * the original checkLIR since the optimization is
- * scannng in the top-down order and the new instruction
- * will need to be checked.
- */
- dvmCompilerInsertLIRAfter((LIR *) checkLIR,
- (LIR *) moveLIR);
- }
- checkLIR->flags.isNop = true;
- continue;
-
- /*
- * Found a true output dependency - nuke the previous store.
- * The register type doesn't matter here.
- */
- } else if (isDalvikStore(checkLIR) &&
- (checkLIR->aliasInfo == thisLIR->aliasInfo)) {
- thisLIR->flags.isNop = true;
- break;
- /* Find out the latest slot that the store can be sunk into */
- } else {
- /* Last instruction reached */
- bool stopHere = (NEXT_LIR(checkLIR) == tailLIR);
-
- /* Store data is clobbered */
- stopHere |= ((stopMask & checkLIR->defMask) != 0);
-
- /* Store data partially clobbers the Dalvik register */
- if (stopHere == false &&
- ((checkLIR->useMask | checkLIR->defMask) &
- ENCODE_DALVIK_REG)) {
- stopHere = isDalvikRegisterClobbered(thisLIR, checkLIR);
- }
-
- /* Found a new place to put the store - move it here */
- if (stopHere == true) {
- DEBUG_OPT(dumpDependentInsnPair(thisLIR, checkLIR,
- "SINK STORE"));
- /* The store can be sunk for at least one cycle */
- if (sinkDistance != 0) {
- ArmLIR *newStoreLIR =
- (ArmLIR *)dvmCompilerNew(sizeof(ArmLIR), true);
- *newStoreLIR = *thisLIR;
- newStoreLIR->flags.age = cUnit->optRound;
- /*
- * Stop point found - insert *before* the checkLIR
- * since the instruction list is scanned in the
- * top-down order.
- */
- dvmCompilerInsertLIRBefore((LIR *) checkLIR,
- (LIR *) newStoreLIR);
- thisLIR->flags.isNop = true;
- }
- break;
- }
-
+ if (checkMemMask != ENCODE_MEM && aliasCondition != 0) {
+ bool isCheckLIRLoad = EncodingMap[checkLIR->opcode].flags &
+ IS_LOAD;
+ if (aliasCondition == ENCODE_LITERAL) {
/*
- * Saw a real instruction that the store can be sunk after
+ * Should only see literal loads in the instruction
+ * stream.
*/
- if (!isPseudoOpcode(checkLIR->opcode)) {
- sinkDistance++;
+ assert(!(EncodingMap[checkLIR->opcode].flags &
+ IS_STORE));
+ /* Same value && same register type */
+ if (checkLIR->aliasInfo == thisLIR->aliasInfo &&
+ REGTYPE(checkLIR->operands[0]) == REGTYPE(nativeRegId)){
+ /*
+ * Different destination register - insert
+ * a move
+ */
+ if (checkLIR->operands[0] != nativeRegId) {
+ convertMemOpIntoMove(cUnit, checkLIR,
+ checkLIR->operands[0],
+ nativeRegId);
+ }
+ checkLIR->flags.isNop = true;
+ }
+ } else if (aliasCondition == ENCODE_DALVIK_REG) {
+ /* Must alias */
+ if (checkLIR->aliasInfo == thisLIR->aliasInfo) {
+ /* Only optimize compatible registers */
+ bool regCompatible =
+ REGTYPE(checkLIR->operands[0]) ==
+ REGTYPE(nativeRegId);
+ if ((isThisLIRLoad && isCheckLIRLoad) ||
+ (!isThisLIRLoad && isCheckLIRLoad)) {
+ /* RAR or RAW */
+ if (regCompatible) {
+ /*
+ * Different destination register -
+ * insert a move
+ */
+ if (checkLIR->operands[0] !=
+ nativeRegId) {
+ convertMemOpIntoMove(cUnit,
+ checkLIR,
+ checkLIR->operands[0],
+ nativeRegId);
+ }
+ checkLIR->flags.isNop = true;
+ } else {
+ /*
+ * Destinaions are of different types -
+ * something complicated going on so
+ * stop looking now.
+ */
+ stopHere = true;
+ }
+ } else if (isThisLIRLoad && !isCheckLIRLoad) {
+ /* WAR - register value is killed */
+ stopHere = true;
+ } else if (!isThisLIRLoad && !isCheckLIRLoad) {
+ /* WAW - nuke the earlier store */
+ thisLIR->flags.isNop = true;
+ stopHere = true;
+ }
+ /* Partial overlap */
+ } else if (isDalvikRegisterClobbered(thisLIR, checkLIR)) {
+ /*
+ * It is actually ok to continue if checkLIR
+ * is a read. But it is hard to make a test
+ * case for this so we just stop here to be
+ * conservative.
+ */
+ stopHere = true;
}
}
+ /* Memory content may be updated. Stop looking now. */
+ if (stopHere) {
+ break;
+ /* The checkLIR has been transformed - check the next one */
+ } else if (checkLIR->flags.isNop) {
+ continue;
+ }
+ }
+
+
+ /*
+ * this and check LIRs have no memory dependency. Now check if
+ * their register operands have any RAW, WAR, and WAW
+ * dependencies. If so, stop looking.
+ */
+ if (stopHere == false) {
+ stopHere = CHECK_REG_DEP(stopUseRegMask, stopDefRegMask,
+ checkLIR);
+ }
+
+ if (stopHere == true) {
+ DEBUG_OPT(dumpDependentInsnPair(thisLIR, checkLIR,
+ "REG CLOBBERED"));
+ /* Only sink store instructions */
+ if (sinkDistance && !isThisLIRLoad) {
+ ArmLIR *newStoreLIR =
+ (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true);
+ *newStoreLIR = *thisLIR;
+ /*
+ * Stop point found - insert *before* the checkLIR
+ * since the instruction list is scanned in the
+ * top-down order.
+ */
+ dvmCompilerInsertLIRBefore((LIR *) checkLIR,
+ (LIR *) newStoreLIR);
+ thisLIR->flags.isNop = true;
+ }
+ break;
+ } else if (!checkLIR->flags.isNop) {
+ sinkDistance++;
}
}
}
}
+/*
+ * Perform a pass of bottom-up walk, from the second instruction in the
+ * superblock, to try to hoist loads to earlier slots.
+ */
static void applyLoadHoisting(CompilationUnit *cUnit,
ArmLIR *headLIR,
ArmLIR *tailLIR)
{
- ArmLIR *thisLIR;
+ ArmLIR *thisLIR, *checkLIR;
/*
- * Don't want to hoist in front of first load following a barrier (or
- * first instruction of the block.
+ * Store the list of independent instructions that can be hoisted past.
+ * Will decide the best place to insert later.
*/
- bool firstLoad = true;
- int maxHoist = dvmCompilerTargetOptHint(kMaxHoistDistance);
+ ArmLIR *prevInstList[MAX_HOIST_DISTANCE];
- cUnit->optRound++;
- for (thisLIR = headLIR;
+ /* Empty block */
+ if (headLIR == tailLIR) return;
+
+ /* Start from the second instruction */
+ for (thisLIR = NEXT_LIR(headLIR);
thisLIR != tailLIR;
thisLIR = NEXT_LIR(thisLIR)) {
- /* Skip newly added instructions */
- if (thisLIR->flags.age >= cUnit->optRound ||
- thisLIR->flags.isNop == true) {
+
+ /* Skip non-interesting instructions */
+ if ((thisLIR->flags.isNop == true) ||
+ isPseudoOpcode(thisLIR->opcode) ||
+ !(EncodingMap[thisLIR->opcode].flags & IS_LOAD)) {
continue;
}
- if (firstLoad && (EncodingMap[thisLIR->opcode].flags & IS_LOAD)) {
- /*
- * Ensure nothing will be hoisted in front of this load because
- * it's result will likely be needed soon.
- */
- thisLIR->defMask |= ENCODE_MEM_USE;
- firstLoad = false;
+ u8 stopUseAllMask = thisLIR->useMask;
+
+ /*
+ * Branches for null/range checks are marked with the true resource
+ * bits, and loads to Dalvik registers, constant pools, and non-alias
+ * locations are safe to be hoisted. So only mark the heap references
+ * conservatively here.
+ */
+ if (stopUseAllMask & ENCODE_HEAP_REF) {
+ stopUseAllMask |= ENCODE_REG_PC;
}
- firstLoad |= (thisLIR->defMask == ENCODE_ALL);
+ /* Similar as above, but just check for pure register dependency */
+ u8 stopUseRegMask = stopUseAllMask & ~ENCODE_MEM;
+ u8 stopDefRegMask = thisLIR->defMask & ~ENCODE_MEM;
- if (isDalvikLoad(thisLIR)) {
- int dRegId = DECODE_ALIAS_INFO_REG(thisLIR->aliasInfo);
- int nativeRegId = thisLIR->operands[0];
- ArmLIR *checkLIR;
- int hoistDistance = 0;
- u8 stopUseMask = (ENCODE_REG_PC | thisLIR->useMask);
- u8 stopDefMask = thisLIR->defMask;
- u8 checkResult;
+ int nextSlot = 0;
+ bool stopHere;
- /* First check if the load can be completely elinimated */
- for (checkLIR = PREV_LIR(thisLIR);
- checkLIR != headLIR;
- checkLIR = PREV_LIR(checkLIR)) {
+ /* Try to hoist the load to a good spot */
+ for (checkLIR = PREV_LIR(thisLIR);
+ checkLIR != headLIR;
+ checkLIR = PREV_LIR(checkLIR)) {
- if (checkLIR->flags.isNop) continue;
+ if (checkLIR->flags.isNop) continue;
- /*
- * Check if the Dalvik register is previously accessed
- * with exactly the same type.
- */
- if ((isDalvikLoad(checkLIR) || isDalvikStore(checkLIR)) &&
- (checkLIR->aliasInfo == thisLIR->aliasInfo) &&
- (checkLIR->operands[0] == nativeRegId)) {
- /*
- * If it is previously accessed but with a different type,
- * the search will terminate later at the point checking
- * for partially overlapping stores.
- */
- thisLIR->flags.isNop = true;
- break;
- }
+ u8 checkMemMask = checkLIR->defMask & ENCODE_MEM;
+ u8 aliasCondition = stopUseAllMask & checkMemMask;
+ stopHere = false;
- /*
- * No earlier use/def can reach this load if:
- * 1) Head instruction is reached
- */
- if (checkLIR == headLIR) {
- break;
- }
-
- checkResult = (stopUseMask | stopDefMask) & checkLIR->defMask;
-
- /*
- * If both instructions are verified Dalvik accesses, clear the
- * may- and must-alias bits to detect true resource
- * dependencies.
- */
- if (checkResult & ENCODE_DALVIK_REG) {
- checkResult &= ~(ENCODE_DALVIK_REG | ENCODE_FRAME_REF);
- }
-
- /*
- * 2) load target register is clobbered
- * 3) A branch is seen (stopUseMask has the PC bit set).
- */
- if (checkResult) {
- break;
- }
-
- /* Store data partially clobbers the Dalvik register */
- if (isDalvikStore(checkLIR) &&
- isDalvikRegisterClobbered(thisLIR, checkLIR)) {
- break;
- }
- }
-
- /* The load has been eliminated */
- if (thisLIR->flags.isNop) continue;
-
- /*
- * The load cannot be eliminated. See if it can be hoisted to an
- * earlier spot.
- */
- for (checkLIR = PREV_LIR(thisLIR);
- /* empty by intention */;
- checkLIR = PREV_LIR(checkLIR)) {
-
- if (checkLIR->flags.isNop) continue;
-
- /*
- * Check if the "thisLIR" load is redundant
- * NOTE: At one point, we also triggered if the checkLIR
- * instruction was a load. However, that tended to insert
- * a load/use dependency because the full scheduler is
- * not yet complete. When it is, we chould also trigger
- * on loads.
- */
- if (isDalvikStore(checkLIR) &&
- (checkLIR->aliasInfo == thisLIR->aliasInfo) &&
- (REGTYPE(checkLIR->operands[0]) == REGTYPE(nativeRegId))) {
- /* Insert a move to replace the load */
- if (checkLIR->operands[0] != nativeRegId) {
- ArmLIR *moveLIR;
- moveLIR = dvmCompilerRegCopyNoInsert(
- cUnit, nativeRegId, checkLIR->operands[0]);
- /*
- * Convert *thisLIR* load into a move
- */
- dvmCompilerInsertLIRAfter((LIR *) checkLIR,
- (LIR *) moveLIR);
+ /* Potential WAR alias seen - check the exact relation */
+ if (checkMemMask != ENCODE_MEM && aliasCondition != 0) {
+ /* We can fully disambiguate Dalvik references */
+ if (aliasCondition == ENCODE_DALVIK_REG) {
+ /* Must alias or partually overlap */
+ if ((checkLIR->aliasInfo == thisLIR->aliasInfo) ||
+ isDalvikRegisterClobbered(thisLIR, checkLIR)) {
+ stopHere = true;
}
- thisLIR->flags.isNop = true;
- break;
-
- /* Find out if the load can be yanked past the checkLIR */
+ /* Conservatively treat all heap refs as may-alias */
} else {
- /* Last instruction reached */
- bool stopHere = (checkLIR == headLIR);
-
- /* Base address is clobbered by checkLIR */
- checkResult = stopUseMask & checkLIR->defMask;
- if (checkResult & ENCODE_DALVIK_REG) {
- checkResult &= ~(ENCODE_DALVIK_REG | ENCODE_FRAME_REF);
- }
- stopHere |= (checkResult != 0);
-
- /* Load target clobbers use/def in checkLIR */
- checkResult = stopDefMask &
- (checkLIR->useMask | checkLIR->defMask);
- if (checkResult & ENCODE_DALVIK_REG) {
- checkResult &= ~(ENCODE_DALVIK_REG | ENCODE_FRAME_REF);
- }
- stopHere |= (checkResult != 0);
-
- /* Store data partially clobbers the Dalvik register */
- if (stopHere == false &&
- (checkLIR->defMask & ENCODE_DALVIK_REG)) {
- stopHere = isDalvikRegisterClobbered(thisLIR, checkLIR);
- }
-
- /*
- * Stop at an earlier Dalvik load if the offset of checkLIR
- * is not less than thisLIR
- *
- * Experiments show that doing
- *
- * ldr r1, [r5, #16]
- * ldr r0, [r5, #20]
- *
- * is much faster than
- *
- * ldr r0, [r5, #20]
- * ldr r1, [r5, #16]
- */
- if (isDalvikLoad(checkLIR)) {
- int dRegId2 =
- DECODE_ALIAS_INFO_REG(checkLIR->aliasInfo);
- if (dRegId2 <= dRegId) {
- stopHere = true;
- }
- }
-
- /* Don't go too far */
- stopHere |= (hoistDistance >= maxHoist);
-
- /* Found a new place to put the load - move it here */
- if (stopHere == true) {
- DEBUG_OPT(dumpDependentInsnPair(thisLIR, checkLIR,
- "HOIST LOAD"));
- /* The load can be hoisted for at least one cycle */
- if (hoistDistance != 0) {
- ArmLIR *newLoadLIR =
- (ArmLIR *)dvmCompilerNew(sizeof(ArmLIR), true);
- *newLoadLIR = *thisLIR;
- newLoadLIR->flags.age = cUnit->optRound;
- /*
- * Stop point found - insert *after* the checkLIR
- * since the instruction list is scanned in the
- * bottom-up order.
- */
- dvmCompilerInsertLIRAfter((LIR *) checkLIR,
- (LIR *) newLoadLIR);
- thisLIR->flags.isNop = true;
- }
- break;
- }
-
- /*
- * Saw a real instruction that hosting the load is
- * beneficial
- */
- if (!isPseudoOpcode(checkLIR->opcode)) {
- hoistDistance++;
- }
+ assert(aliasCondition == ENCODE_HEAP_REF);
+ stopHere = true;
}
- }
- } else if (isLiteralLoad(thisLIR)) {
- int litVal = thisLIR->aliasInfo;
- int nativeRegId = thisLIR->operands[0];
- ArmLIR *checkLIR;
- int hoistDistance = 0;
- u8 stopUseMask = (ENCODE_REG_PC | thisLIR->useMask) &
- ~ENCODE_LITPOOL_REF;
- u8 stopDefMask = thisLIR->defMask & ~ENCODE_LITPOOL_REF;
-
- /* First check if the load can be completely elinimated */
- for (checkLIR = PREV_LIR(thisLIR);
- checkLIR != headLIR;
- checkLIR = PREV_LIR(checkLIR)) {
-
- if (checkLIR->flags.isNop) continue;
-
- /* Reloading same literal into same tgt reg? Eliminate if so */
- if (isLiteralLoad(checkLIR) &&
- (checkLIR->aliasInfo == litVal) &&
- (checkLIR->operands[0] == nativeRegId)) {
- thisLIR->flags.isNop = true;
- break;
- }
-
- /*
- * No earlier use/def can reach this load if:
- * 1) Head instruction is reached
- * 2) load target register is clobbered
- * 3) A branch is seen (stopUseMask has the PC bit set).
- */
- if ((checkLIR == headLIR) ||
- (stopUseMask | stopDefMask) & checkLIR->defMask) {
+ /* Memory content may be updated. Stop looking now. */
+ if (stopHere) {
+ prevInstList[nextSlot++] = checkLIR;
break;
}
}
- /* The load has been eliminated */
- if (thisLIR->flags.isNop) continue;
+ if (stopHere == false) {
+ stopHere = CHECK_REG_DEP(stopUseRegMask, stopDefRegMask,
+ checkLIR);
+ }
/*
- * The load cannot be eliminated. See if it can be hoisted to an
- * earlier spot.
+ * Store the dependent or non-pseudo/indepedent instruction to the
+ * list.
*/
- for (checkLIR = PREV_LIR(thisLIR);
- /* empty by intention */;
- checkLIR = PREV_LIR(checkLIR)) {
+ if (stopHere || !isPseudoOpcode(checkLIR->opcode)) {
+ prevInstList[nextSlot++] = checkLIR;
+ if (nextSlot == MAX_HOIST_DISTANCE) break;
+ }
- if (checkLIR->flags.isNop) continue;
+ /* Found a new place to put the load - move it here */
+ if (stopHere == true) {
+ DEBUG_OPT(dumpDependentInsnPair(checkLIR, thisLIR
+ "HOIST STOP"));
+ break;
+ }
+ }
+
+ /*
+ * Reached the top - use headLIR as the dependent marker as all labels
+ * are barriers.
+ */
+ if (stopHere == false && nextSlot < MAX_HOIST_DISTANCE) {
+ prevInstList[nextSlot++] = headLIR;
+ }
+
+ /*
+ * At least one independent instruction is found. Scan in the reversed
+ * direction to find a beneficial slot.
+ */
+ if (nextSlot >= 2) {
+ int firstSlot = nextSlot - 2;
+ int slot;
+ ArmLIR *depLIR = prevInstList[nextSlot-1];
+ /* If there is ld-ld dependency, wait LDLD_DISTANCE cycles */
+ if (!isPseudoOpcode(depLIR->opcode) &&
+ (EncodingMap[depLIR->opcode].flags & IS_LOAD)) {
+ firstSlot -= LDLD_DISTANCE;
+ }
+ /*
+ * Make sure we check slot >= 0 since firstSlot may be negative
+ * when the loop is first entered.
+ */
+ for (slot = firstSlot; slot >= 0; slot--) {
+ ArmLIR *curLIR = prevInstList[slot];
+ ArmLIR *prevLIR = prevInstList[slot+1];
+
+ /* Check the highest instruction */
+ if (prevLIR->defMask == ENCODE_ALL) {
+ /*
+ * If the first instruction is a load, don't hoist anything
+ * above it since it is unlikely to be beneficial.
+ */
+ if (EncodingMap[curLIR->opcode].flags & IS_LOAD) continue;
+ /*
+ * If the remaining number of slots is less than LD_LATENCY,
+ * insert the hoisted load here.
+ */
+ if (slot < LD_LATENCY) break;
+ }
/*
- * TUNING: once a full scheduler exists, check here
- * for conversion of a redundant load into a copy similar
- * to the way redundant loads are handled above.
+ * NOTE: now prevLIR is guaranteed to be a non-pseudo
+ * instruction (ie accessing EncodingMap[prevLIR->opcode] is
+ * safe).
+ *
+ * Try to find two instructions with load/use dependency until
+ * the remaining instructions are less than LD_LATENCY.
*/
-
- /* Find out if the load can be yanked past the checkLIR */
-
- /* Last instruction reached */
- bool stopHere = (checkLIR == headLIR);
-
- /* Base address is clobbered by checkLIR */
- stopHere |= ((stopUseMask & checkLIR->defMask) != 0);
-
- /* Load target clobbers use/def in checkLIR */
- stopHere |= ((stopDefMask &
- (checkLIR->useMask | checkLIR->defMask)) != 0);
-
- /* Avoid re-ordering literal pool loads */
- stopHere |= isLiteralLoad(checkLIR);
-
- /* Don't go too far */
- stopHere |= (hoistDistance >= maxHoist);
-
- /* Found a new place to put the load - move it here */
- if (stopHere == true) {
- DEBUG_OPT(dumpDependentInsnPair(thisLIR, checkLIR,
- "HOIST LOAD"));
- /* The store can be hoisted for at least one cycle */
- if (hoistDistance != 0) {
- ArmLIR *newLoadLIR =
- (ArmLIR *)dvmCompilerNew(sizeof(ArmLIR), true);
- *newLoadLIR = *thisLIR;
- newLoadLIR->flags.age = cUnit->optRound;
- /*
- * Insertion is guaranteed to succeed since checkLIR
- * is never the first LIR on the list
- */
- dvmCompilerInsertLIRAfter((LIR *) checkLIR,
- (LIR *) newLoadLIR);
- thisLIR->flags.isNop = true;
- }
+ if (((curLIR->useMask & prevLIR->defMask) &&
+ (EncodingMap[prevLIR->opcode].flags & IS_LOAD)) ||
+ (slot < LD_LATENCY)) {
break;
}
+ }
+ /* Found a slot to hoist to */
+ if (slot >= 0) {
+ ArmLIR *curLIR = prevInstList[slot];
+ ArmLIR *newLoadLIR = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR),
+ true);
+ *newLoadLIR = *thisLIR;
/*
- * Saw a real instruction that hosting the load is
- * beneficial
+ * Insertion is guaranteed to succeed since checkLIR
+ * is never the first LIR on the list
*/
- if (!isPseudoOpcode(checkLIR->opcode)) {
- hoistDistance++;
- }
+ dvmCompilerInsertLIRBefore((LIR *) curLIR,
+ (LIR *) newLoadLIR);
+ thisLIR->flags.isNop = true;
}
}
}