Add support to do suspend polling on backward branches in JIT'ed code.
The polling is expensive for now as it is done through three
instructions: ld/ld/branch. As a result, a bunch of bonus stuff has
been worked on to mitigate the extra overhead:
- Cleaned up resource flags for memory disambiguation.
- Rewrote load/store elimination and scheduler routines to hide
the ld/ld latency for GC flag. Seperate the dependency checking into
memory disambiguation part and resource conflict part.
- Allowed code motion for Dalvik/constant/non-aliasing loads to be
hoisted above branches for null/range checks.
- Created extended basic blocks following goto instructions so that
longer instruction streams can be optimized as a whole.
Without the bonus stuff, the performance dropped about ~5-10% on some
benchmarks because of the lack of headroom to hide the polling latency
in tight loops. With the bonus stuff, the performance delta is between
+/-5% with polling code generated. With the bonus stuff but disabling
polling, the new bonus stuff provides consistent performance
improvements:
CaffeineMark 3.6%
Linpack 11.1%
Scimark 9.7%
Sieve 33.0%
Checkers 6.0%
As a result, GC polling is disabled by default but can be turned on
through the -Xjitsuspendpoll flag for experimental purposes.
Change-Id: Ia81fc85de3e2b70e6cc93bc37c2b845892003cdb
diff --git a/vm/compiler/codegen/arm/LocalOptimizations.c b/vm/compiler/codegen/arm/LocalOptimizations.c
index ae98a56..4c0354a 100644
--- a/vm/compiler/codegen/arm/LocalOptimizations.c
+++ b/vm/compiler/codegen/arm/LocalOptimizations.c
@@ -21,33 +21,23 @@
#define DEBUG_OPT(X)
-ArmLIR* dvmCompilerGenCopy(CompilationUnit *cUnit, int rDest, int rSrc);
+/* Check RAW, WAR, and WAR dependency on the register operands */
+#define CHECK_REG_DEP(use, def, check) ((def & check->useMask) || \
+ ((use | def) & check->defMask))
-/* Is this a Dalvik register access? */
-static inline bool isDalvikLoad(ArmLIR *lir)
-{
- return (lir->useMask != ENCODE_ALL) && (lir->useMask & ENCODE_DALVIK_REG);
-}
-
-/* Is this a load from the literal pool? */
-static inline bool isLiteralLoad(ArmLIR *lir)
-{
- return (lir->useMask != ENCODE_ALL) && (lir->useMask & ENCODE_LITERAL);
-}
-
-static inline bool isDalvikStore(ArmLIR *lir)
-{
- return (lir->defMask != ENCODE_ALL) && (lir->defMask & ENCODE_DALVIK_REG);
-}
+/* Scheduler heuristics */
+#define MAX_HOIST_DISTANCE 20
+#define LDLD_DISTANCE 4
+#define LD_LATENCY 2
static inline bool isDalvikRegisterClobbered(ArmLIR *lir1, ArmLIR *lir2)
{
- int reg1Lo = DECODE_ALIAS_INFO_REG(lir1->aliasInfo);
- int reg1Hi = reg1Lo + DECODE_ALIAS_INFO_WIDE(lir1->aliasInfo);
- int reg2Lo = DECODE_ALIAS_INFO_REG(lir2->aliasInfo);
- int reg2Hi = reg2Lo + DECODE_ALIAS_INFO_WIDE(lir2->aliasInfo);
+ int reg1Lo = DECODE_ALIAS_INFO_REG(lir1->aliasInfo);
+ int reg1Hi = reg1Lo + DECODE_ALIAS_INFO_WIDE(lir1->aliasInfo);
+ int reg2Lo = DECODE_ALIAS_INFO_REG(lir2->aliasInfo);
+ int reg2Hi = reg2Lo + DECODE_ALIAS_INFO_WIDE(lir2->aliasInfo);
- return (reg1Lo == reg2Lo) || (reg1Lo == reg2Hi) || (reg1Hi == reg2Lo);
+ return (reg1Lo == reg2Lo) || (reg1Lo == reg2Hi) || (reg1Hi == reg2Lo);
}
#if 0
@@ -61,10 +51,39 @@
}
#endif
+/* Convert a more expensive instruction (ie load) into a move */
+static void convertMemOpIntoMove(CompilationUnit *cUnit, ArmLIR *origLIR,
+ int dest, int src)
+{
+ /* Insert a move to replace the load */
+ ArmLIR *moveLIR;
+ moveLIR = dvmCompilerRegCopyNoInsert( cUnit, dest, src);
+ /*
+ * Insert the converted instruction after the original since the
+ * optimization is scannng in the top-down order and the new instruction
+ * will need to be re-checked (eg the new dest clobbers the src used in
+ * thisLIR).
+ */
+ dvmCompilerInsertLIRAfter((LIR *) origLIR, (LIR *) moveLIR);
+}
+
/*
- * Perform a pass of top-down walk to
- * 1) Eliminate redundant loads and stores
- * 2) Sink stores to latest possible slot
+ * Perform a pass of top-down walk, from the second-last instruction in the
+ * superblock, to eliminate redundant loads and stores.
+ *
+ * An earlier load can eliminate a later load iff
+ * 1) They are must-aliases
+ * 2) The native register is not clobbered in between
+ * 3) The memory location is not written to in between
+ *
+ * An earlier store can eliminate a later load iff
+ * 1) They are must-aliases
+ * 2) The native register is not clobbered in between
+ * 3) The memory location is not written to in between
+ *
+ * A later store can be eliminated by an earlier store iff
+ * 1) They are must-aliases
+ * 2) The memory location is not written to in between
*/
static void applyLoadStoreElimination(CompilationUnit *cUnit,
ArmLIR *headLIR,
@@ -72,428 +91,347 @@
{
ArmLIR *thisLIR;
- cUnit->optRound++;
- for (thisLIR = headLIR;
- thisLIR != tailLIR;
- thisLIR = NEXT_LIR(thisLIR)) {
- /* Skip newly added instructions */
- if (thisLIR->flags.age >= cUnit->optRound) {
+ if (headLIR == tailLIR) return;
+
+ for (thisLIR = PREV_LIR(tailLIR);
+ thisLIR != headLIR;
+ thisLIR = PREV_LIR(thisLIR)) {
+ int sinkDistance = 0;
+
+ /* Skip non-interesting instructions */
+ if ((thisLIR->flags.isNop == true) ||
+ isPseudoOpcode(thisLIR->opcode) ||
+ !(EncodingMap[thisLIR->opcode].flags & (IS_LOAD | IS_STORE))) {
continue;
}
- if (isDalvikStore(thisLIR)) {
- int nativeRegId = thisLIR->operands[0];
- ArmLIR *checkLIR;
- int sinkDistance = 0;
+
+ int nativeRegId = thisLIR->operands[0];
+ bool isThisLIRLoad = EncodingMap[thisLIR->opcode].flags & IS_LOAD;
+ ArmLIR *checkLIR;
+ /* Use the mem mask to determine the rough memory location */
+ u8 thisMemMask = (thisLIR->useMask | thisLIR->defMask) & ENCODE_MEM;
+
+ /*
+ * Currently only eliminate redundant ld/st for constant and Dalvik
+ * register accesses.
+ */
+ if (!(thisMemMask & (ENCODE_LITERAL | ENCODE_DALVIK_REG))) continue;
+
+ /*
+ * Add r15 (pc) to the resource mask to prevent this instruction
+ * from sinking past branch instructions. Also take out the memory
+ * region bits since stopMask is used to check data/control
+ * dependencies.
+ */
+ u8 stopUseRegMask = (ENCODE_REG_PC | thisLIR->useMask) &
+ ~ENCODE_MEM;
+ u8 stopDefRegMask = thisLIR->defMask & ~ENCODE_MEM;
+
+ for (checkLIR = NEXT_LIR(thisLIR);
+ checkLIR != tailLIR;
+ checkLIR = NEXT_LIR(checkLIR)) {
+
+ u8 checkMemMask = (checkLIR->useMask | checkLIR->defMask) &
+ ENCODE_MEM;
+ u8 aliasCondition = thisMemMask & checkMemMask;
+ bool stopHere = false;
+
/*
- * Add r15 (pc) to the mask to prevent this instruction
- * from sinking past branch instructions. Unset the Dalvik register
- * bit when checking with native resource constraints.
+ * Potential aliases seen - check the alias relations
*/
- u8 stopMask = (ENCODE_REG_PC | thisLIR->useMask) &
- ~ENCODE_DALVIK_REG;
-
- for (checkLIR = NEXT_LIR(thisLIR);
- checkLIR != tailLIR;
- checkLIR = NEXT_LIR(checkLIR)) {
-
- /* Check if a Dalvik register load is redundant */
- if (isDalvikLoad(checkLIR) &&
- (checkLIR->aliasInfo == thisLIR->aliasInfo) &&
- (REGTYPE(checkLIR->operands[0]) == REGTYPE(nativeRegId))) {
- /* Insert a move to replace the load */
- if (checkLIR->operands[0] != nativeRegId) {
- ArmLIR *moveLIR;
- moveLIR = dvmCompilerRegCopyNoInsert(
- cUnit, checkLIR->operands[0], nativeRegId);
- /*
- * Insert the converted checkLIR instruction after the
- * the original checkLIR since the optimization is
- * scannng in the top-down order and the new instruction
- * will need to be checked.
- */
- dvmCompilerInsertLIRAfter((LIR *) checkLIR,
- (LIR *) moveLIR);
- }
- checkLIR->flags.isNop = true;
- continue;
-
- /*
- * Found a true output dependency - nuke the previous store.
- * The register type doesn't matter here.
- */
- } else if (isDalvikStore(checkLIR) &&
- (checkLIR->aliasInfo == thisLIR->aliasInfo)) {
- thisLIR->flags.isNop = true;
- break;
- /* Find out the latest slot that the store can be sunk into */
- } else {
- /* Last instruction reached */
- bool stopHere = (NEXT_LIR(checkLIR) == tailLIR);
-
- /* Store data is clobbered */
- stopHere |= ((stopMask & checkLIR->defMask) != 0);
-
- /* Store data partially clobbers the Dalvik register */
- if (stopHere == false &&
- ((checkLIR->useMask | checkLIR->defMask) &
- ENCODE_DALVIK_REG)) {
- stopHere = isDalvikRegisterClobbered(thisLIR, checkLIR);
- }
-
- /* Found a new place to put the store - move it here */
- if (stopHere == true) {
- DEBUG_OPT(dumpDependentInsnPair(thisLIR, checkLIR,
- "SINK STORE"));
- /* The store can be sunk for at least one cycle */
- if (sinkDistance != 0) {
- ArmLIR *newStoreLIR =
- (ArmLIR *)dvmCompilerNew(sizeof(ArmLIR), true);
- *newStoreLIR = *thisLIR;
- newStoreLIR->flags.age = cUnit->optRound;
- /*
- * Stop point found - insert *before* the checkLIR
- * since the instruction list is scanned in the
- * top-down order.
- */
- dvmCompilerInsertLIRBefore((LIR *) checkLIR,
- (LIR *) newStoreLIR);
- thisLIR->flags.isNop = true;
- }
- break;
- }
-
+ if (checkMemMask != ENCODE_MEM && aliasCondition != 0) {
+ bool isCheckLIRLoad = EncodingMap[checkLIR->opcode].flags &
+ IS_LOAD;
+ if (aliasCondition == ENCODE_LITERAL) {
/*
- * Saw a real instruction that the store can be sunk after
+ * Should only see literal loads in the instruction
+ * stream.
*/
- if (!isPseudoOpcode(checkLIR->opcode)) {
- sinkDistance++;
+ assert(!(EncodingMap[checkLIR->opcode].flags &
+ IS_STORE));
+ /* Same value && same register type */
+ if (checkLIR->aliasInfo == thisLIR->aliasInfo &&
+ REGTYPE(checkLIR->operands[0]) == REGTYPE(nativeRegId)){
+ /*
+ * Different destination register - insert
+ * a move
+ */
+ if (checkLIR->operands[0] != nativeRegId) {
+ convertMemOpIntoMove(cUnit, checkLIR,
+ checkLIR->operands[0],
+ nativeRegId);
+ }
+ checkLIR->flags.isNop = true;
+ }
+ } else if (aliasCondition == ENCODE_DALVIK_REG) {
+ /* Must alias */
+ if (checkLIR->aliasInfo == thisLIR->aliasInfo) {
+ /* Only optimize compatible registers */
+ bool regCompatible =
+ REGTYPE(checkLIR->operands[0]) ==
+ REGTYPE(nativeRegId);
+ if ((isThisLIRLoad && isCheckLIRLoad) ||
+ (!isThisLIRLoad && isCheckLIRLoad)) {
+ /* RAR or RAW */
+ if (regCompatible) {
+ /*
+ * Different destination register -
+ * insert a move
+ */
+ if (checkLIR->operands[0] !=
+ nativeRegId) {
+ convertMemOpIntoMove(cUnit,
+ checkLIR,
+ checkLIR->operands[0],
+ nativeRegId);
+ }
+ checkLIR->flags.isNop = true;
+ } else {
+ /*
+ * Destinaions are of different types -
+ * something complicated going on so
+ * stop looking now.
+ */
+ stopHere = true;
+ }
+ } else if (isThisLIRLoad && !isCheckLIRLoad) {
+ /* WAR - register value is killed */
+ stopHere = true;
+ } else if (!isThisLIRLoad && !isCheckLIRLoad) {
+ /* WAW - nuke the earlier store */
+ thisLIR->flags.isNop = true;
+ stopHere = true;
+ }
+ /* Partial overlap */
+ } else if (isDalvikRegisterClobbered(thisLIR, checkLIR)) {
+ /*
+ * It is actually ok to continue if checkLIR
+ * is a read. But it is hard to make a test
+ * case for this so we just stop here to be
+ * conservative.
+ */
+ stopHere = true;
}
}
+ /* Memory content may be updated. Stop looking now. */
+ if (stopHere) {
+ break;
+ /* The checkLIR has been transformed - check the next one */
+ } else if (checkLIR->flags.isNop) {
+ continue;
+ }
+ }
+
+
+ /*
+ * this and check LIRs have no memory dependency. Now check if
+ * their register operands have any RAW, WAR, and WAW
+ * dependencies. If so, stop looking.
+ */
+ if (stopHere == false) {
+ stopHere = CHECK_REG_DEP(stopUseRegMask, stopDefRegMask,
+ checkLIR);
+ }
+
+ if (stopHere == true) {
+ DEBUG_OPT(dumpDependentInsnPair(thisLIR, checkLIR,
+ "REG CLOBBERED"));
+ /* Only sink store instructions */
+ if (sinkDistance && !isThisLIRLoad) {
+ ArmLIR *newStoreLIR =
+ (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true);
+ *newStoreLIR = *thisLIR;
+ /*
+ * Stop point found - insert *before* the checkLIR
+ * since the instruction list is scanned in the
+ * top-down order.
+ */
+ dvmCompilerInsertLIRBefore((LIR *) checkLIR,
+ (LIR *) newStoreLIR);
+ thisLIR->flags.isNop = true;
+ }
+ break;
+ } else if (!checkLIR->flags.isNop) {
+ sinkDistance++;
}
}
}
}
+/*
+ * Perform a pass of bottom-up walk, from the second instruction in the
+ * superblock, to try to hoist loads to earlier slots.
+ */
static void applyLoadHoisting(CompilationUnit *cUnit,
ArmLIR *headLIR,
ArmLIR *tailLIR)
{
- ArmLIR *thisLIR;
+ ArmLIR *thisLIR, *checkLIR;
/*
- * Don't want to hoist in front of first load following a barrier (or
- * first instruction of the block.
+ * Store the list of independent instructions that can be hoisted past.
+ * Will decide the best place to insert later.
*/
- bool firstLoad = true;
- int maxHoist = dvmCompilerTargetOptHint(kMaxHoistDistance);
+ ArmLIR *prevInstList[MAX_HOIST_DISTANCE];
- cUnit->optRound++;
- for (thisLIR = headLIR;
+ /* Empty block */
+ if (headLIR == tailLIR) return;
+
+ /* Start from the second instruction */
+ for (thisLIR = NEXT_LIR(headLIR);
thisLIR != tailLIR;
thisLIR = NEXT_LIR(thisLIR)) {
- /* Skip newly added instructions */
- if (thisLIR->flags.age >= cUnit->optRound ||
- thisLIR->flags.isNop == true) {
+
+ /* Skip non-interesting instructions */
+ if ((thisLIR->flags.isNop == true) ||
+ isPseudoOpcode(thisLIR->opcode) ||
+ !(EncodingMap[thisLIR->opcode].flags & IS_LOAD)) {
continue;
}
- if (firstLoad && (EncodingMap[thisLIR->opcode].flags & IS_LOAD)) {
- /*
- * Ensure nothing will be hoisted in front of this load because
- * it's result will likely be needed soon.
- */
- thisLIR->defMask |= ENCODE_MEM_USE;
- firstLoad = false;
+ u8 stopUseAllMask = thisLIR->useMask;
+
+ /*
+ * Branches for null/range checks are marked with the true resource
+ * bits, and loads to Dalvik registers, constant pools, and non-alias
+ * locations are safe to be hoisted. So only mark the heap references
+ * conservatively here.
+ */
+ if (stopUseAllMask & ENCODE_HEAP_REF) {
+ stopUseAllMask |= ENCODE_REG_PC;
}
- firstLoad |= (thisLIR->defMask == ENCODE_ALL);
+ /* Similar as above, but just check for pure register dependency */
+ u8 stopUseRegMask = stopUseAllMask & ~ENCODE_MEM;
+ u8 stopDefRegMask = thisLIR->defMask & ~ENCODE_MEM;
- if (isDalvikLoad(thisLIR)) {
- int dRegId = DECODE_ALIAS_INFO_REG(thisLIR->aliasInfo);
- int nativeRegId = thisLIR->operands[0];
- ArmLIR *checkLIR;
- int hoistDistance = 0;
- u8 stopUseMask = (ENCODE_REG_PC | thisLIR->useMask);
- u8 stopDefMask = thisLIR->defMask;
- u8 checkResult;
+ int nextSlot = 0;
+ bool stopHere;
- /* First check if the load can be completely elinimated */
- for (checkLIR = PREV_LIR(thisLIR);
- checkLIR != headLIR;
- checkLIR = PREV_LIR(checkLIR)) {
+ /* Try to hoist the load to a good spot */
+ for (checkLIR = PREV_LIR(thisLIR);
+ checkLIR != headLIR;
+ checkLIR = PREV_LIR(checkLIR)) {
- if (checkLIR->flags.isNop) continue;
+ if (checkLIR->flags.isNop) continue;
- /*
- * Check if the Dalvik register is previously accessed
- * with exactly the same type.
- */
- if ((isDalvikLoad(checkLIR) || isDalvikStore(checkLIR)) &&
- (checkLIR->aliasInfo == thisLIR->aliasInfo) &&
- (checkLIR->operands[0] == nativeRegId)) {
- /*
- * If it is previously accessed but with a different type,
- * the search will terminate later at the point checking
- * for partially overlapping stores.
- */
- thisLIR->flags.isNop = true;
- break;
- }
+ u8 checkMemMask = checkLIR->defMask & ENCODE_MEM;
+ u8 aliasCondition = stopUseAllMask & checkMemMask;
+ stopHere = false;
- /*
- * No earlier use/def can reach this load if:
- * 1) Head instruction is reached
- */
- if (checkLIR == headLIR) {
- break;
- }
-
- checkResult = (stopUseMask | stopDefMask) & checkLIR->defMask;
-
- /*
- * If both instructions are verified Dalvik accesses, clear the
- * may- and must-alias bits to detect true resource
- * dependencies.
- */
- if (checkResult & ENCODE_DALVIK_REG) {
- checkResult &= ~(ENCODE_DALVIK_REG | ENCODE_FRAME_REF);
- }
-
- /*
- * 2) load target register is clobbered
- * 3) A branch is seen (stopUseMask has the PC bit set).
- */
- if (checkResult) {
- break;
- }
-
- /* Store data partially clobbers the Dalvik register */
- if (isDalvikStore(checkLIR) &&
- isDalvikRegisterClobbered(thisLIR, checkLIR)) {
- break;
- }
- }
-
- /* The load has been eliminated */
- if (thisLIR->flags.isNop) continue;
-
- /*
- * The load cannot be eliminated. See if it can be hoisted to an
- * earlier spot.
- */
- for (checkLIR = PREV_LIR(thisLIR);
- /* empty by intention */;
- checkLIR = PREV_LIR(checkLIR)) {
-
- if (checkLIR->flags.isNop) continue;
-
- /*
- * Check if the "thisLIR" load is redundant
- * NOTE: At one point, we also triggered if the checkLIR
- * instruction was a load. However, that tended to insert
- * a load/use dependency because the full scheduler is
- * not yet complete. When it is, we chould also trigger
- * on loads.
- */
- if (isDalvikStore(checkLIR) &&
- (checkLIR->aliasInfo == thisLIR->aliasInfo) &&
- (REGTYPE(checkLIR->operands[0]) == REGTYPE(nativeRegId))) {
- /* Insert a move to replace the load */
- if (checkLIR->operands[0] != nativeRegId) {
- ArmLIR *moveLIR;
- moveLIR = dvmCompilerRegCopyNoInsert(
- cUnit, nativeRegId, checkLIR->operands[0]);
- /*
- * Convert *thisLIR* load into a move
- */
- dvmCompilerInsertLIRAfter((LIR *) checkLIR,
- (LIR *) moveLIR);
+ /* Potential WAR alias seen - check the exact relation */
+ if (checkMemMask != ENCODE_MEM && aliasCondition != 0) {
+ /* We can fully disambiguate Dalvik references */
+ if (aliasCondition == ENCODE_DALVIK_REG) {
+ /* Must alias or partually overlap */
+ if ((checkLIR->aliasInfo == thisLIR->aliasInfo) ||
+ isDalvikRegisterClobbered(thisLIR, checkLIR)) {
+ stopHere = true;
}
- thisLIR->flags.isNop = true;
- break;
-
- /* Find out if the load can be yanked past the checkLIR */
+ /* Conservatively treat all heap refs as may-alias */
} else {
- /* Last instruction reached */
- bool stopHere = (checkLIR == headLIR);
-
- /* Base address is clobbered by checkLIR */
- checkResult = stopUseMask & checkLIR->defMask;
- if (checkResult & ENCODE_DALVIK_REG) {
- checkResult &= ~(ENCODE_DALVIK_REG | ENCODE_FRAME_REF);
- }
- stopHere |= (checkResult != 0);
-
- /* Load target clobbers use/def in checkLIR */
- checkResult = stopDefMask &
- (checkLIR->useMask | checkLIR->defMask);
- if (checkResult & ENCODE_DALVIK_REG) {
- checkResult &= ~(ENCODE_DALVIK_REG | ENCODE_FRAME_REF);
- }
- stopHere |= (checkResult != 0);
-
- /* Store data partially clobbers the Dalvik register */
- if (stopHere == false &&
- (checkLIR->defMask & ENCODE_DALVIK_REG)) {
- stopHere = isDalvikRegisterClobbered(thisLIR, checkLIR);
- }
-
- /*
- * Stop at an earlier Dalvik load if the offset of checkLIR
- * is not less than thisLIR
- *
- * Experiments show that doing
- *
- * ldr r1, [r5, #16]
- * ldr r0, [r5, #20]
- *
- * is much faster than
- *
- * ldr r0, [r5, #20]
- * ldr r1, [r5, #16]
- */
- if (isDalvikLoad(checkLIR)) {
- int dRegId2 =
- DECODE_ALIAS_INFO_REG(checkLIR->aliasInfo);
- if (dRegId2 <= dRegId) {
- stopHere = true;
- }
- }
-
- /* Don't go too far */
- stopHere |= (hoistDistance >= maxHoist);
-
- /* Found a new place to put the load - move it here */
- if (stopHere == true) {
- DEBUG_OPT(dumpDependentInsnPair(thisLIR, checkLIR,
- "HOIST LOAD"));
- /* The load can be hoisted for at least one cycle */
- if (hoistDistance != 0) {
- ArmLIR *newLoadLIR =
- (ArmLIR *)dvmCompilerNew(sizeof(ArmLIR), true);
- *newLoadLIR = *thisLIR;
- newLoadLIR->flags.age = cUnit->optRound;
- /*
- * Stop point found - insert *after* the checkLIR
- * since the instruction list is scanned in the
- * bottom-up order.
- */
- dvmCompilerInsertLIRAfter((LIR *) checkLIR,
- (LIR *) newLoadLIR);
- thisLIR->flags.isNop = true;
- }
- break;
- }
-
- /*
- * Saw a real instruction that hosting the load is
- * beneficial
- */
- if (!isPseudoOpcode(checkLIR->opcode)) {
- hoistDistance++;
- }
+ assert(aliasCondition == ENCODE_HEAP_REF);
+ stopHere = true;
}
- }
- } else if (isLiteralLoad(thisLIR)) {
- int litVal = thisLIR->aliasInfo;
- int nativeRegId = thisLIR->operands[0];
- ArmLIR *checkLIR;
- int hoistDistance = 0;
- u8 stopUseMask = (ENCODE_REG_PC | thisLIR->useMask) &
- ~ENCODE_LITPOOL_REF;
- u8 stopDefMask = thisLIR->defMask & ~ENCODE_LITPOOL_REF;
-
- /* First check if the load can be completely elinimated */
- for (checkLIR = PREV_LIR(thisLIR);
- checkLIR != headLIR;
- checkLIR = PREV_LIR(checkLIR)) {
-
- if (checkLIR->flags.isNop) continue;
-
- /* Reloading same literal into same tgt reg? Eliminate if so */
- if (isLiteralLoad(checkLIR) &&
- (checkLIR->aliasInfo == litVal) &&
- (checkLIR->operands[0] == nativeRegId)) {
- thisLIR->flags.isNop = true;
- break;
- }
-
- /*
- * No earlier use/def can reach this load if:
- * 1) Head instruction is reached
- * 2) load target register is clobbered
- * 3) A branch is seen (stopUseMask has the PC bit set).
- */
- if ((checkLIR == headLIR) ||
- (stopUseMask | stopDefMask) & checkLIR->defMask) {
+ /* Memory content may be updated. Stop looking now. */
+ if (stopHere) {
+ prevInstList[nextSlot++] = checkLIR;
break;
}
}
- /* The load has been eliminated */
- if (thisLIR->flags.isNop) continue;
+ if (stopHere == false) {
+ stopHere = CHECK_REG_DEP(stopUseRegMask, stopDefRegMask,
+ checkLIR);
+ }
/*
- * The load cannot be eliminated. See if it can be hoisted to an
- * earlier spot.
+ * Store the dependent or non-pseudo/indepedent instruction to the
+ * list.
*/
- for (checkLIR = PREV_LIR(thisLIR);
- /* empty by intention */;
- checkLIR = PREV_LIR(checkLIR)) {
+ if (stopHere || !isPseudoOpcode(checkLIR->opcode)) {
+ prevInstList[nextSlot++] = checkLIR;
+ if (nextSlot == MAX_HOIST_DISTANCE) break;
+ }
- if (checkLIR->flags.isNop) continue;
+ /* Found a new place to put the load - move it here */
+ if (stopHere == true) {
+ DEBUG_OPT(dumpDependentInsnPair(checkLIR, thisLIR
+ "HOIST STOP"));
+ break;
+ }
+ }
+
+ /*
+ * Reached the top - use headLIR as the dependent marker as all labels
+ * are barriers.
+ */
+ if (stopHere == false && nextSlot < MAX_HOIST_DISTANCE) {
+ prevInstList[nextSlot++] = headLIR;
+ }
+
+ /*
+ * At least one independent instruction is found. Scan in the reversed
+ * direction to find a beneficial slot.
+ */
+ if (nextSlot >= 2) {
+ int firstSlot = nextSlot - 2;
+ int slot;
+ ArmLIR *depLIR = prevInstList[nextSlot-1];
+ /* If there is ld-ld dependency, wait LDLD_DISTANCE cycles */
+ if (!isPseudoOpcode(depLIR->opcode) &&
+ (EncodingMap[depLIR->opcode].flags & IS_LOAD)) {
+ firstSlot -= LDLD_DISTANCE;
+ }
+ /*
+ * Make sure we check slot >= 0 since firstSlot may be negative
+ * when the loop is first entered.
+ */
+ for (slot = firstSlot; slot >= 0; slot--) {
+ ArmLIR *curLIR = prevInstList[slot];
+ ArmLIR *prevLIR = prevInstList[slot+1];
+
+ /* Check the highest instruction */
+ if (prevLIR->defMask == ENCODE_ALL) {
+ /*
+ * If the first instruction is a load, don't hoist anything
+ * above it since it is unlikely to be beneficial.
+ */
+ if (EncodingMap[curLIR->opcode].flags & IS_LOAD) continue;
+ /*
+ * If the remaining number of slots is less than LD_LATENCY,
+ * insert the hoisted load here.
+ */
+ if (slot < LD_LATENCY) break;
+ }
/*
- * TUNING: once a full scheduler exists, check here
- * for conversion of a redundant load into a copy similar
- * to the way redundant loads are handled above.
+ * NOTE: now prevLIR is guaranteed to be a non-pseudo
+ * instruction (ie accessing EncodingMap[prevLIR->opcode] is
+ * safe).
+ *
+ * Try to find two instructions with load/use dependency until
+ * the remaining instructions are less than LD_LATENCY.
*/
-
- /* Find out if the load can be yanked past the checkLIR */
-
- /* Last instruction reached */
- bool stopHere = (checkLIR == headLIR);
-
- /* Base address is clobbered by checkLIR */
- stopHere |= ((stopUseMask & checkLIR->defMask) != 0);
-
- /* Load target clobbers use/def in checkLIR */
- stopHere |= ((stopDefMask &
- (checkLIR->useMask | checkLIR->defMask)) != 0);
-
- /* Avoid re-ordering literal pool loads */
- stopHere |= isLiteralLoad(checkLIR);
-
- /* Don't go too far */
- stopHere |= (hoistDistance >= maxHoist);
-
- /* Found a new place to put the load - move it here */
- if (stopHere == true) {
- DEBUG_OPT(dumpDependentInsnPair(thisLIR, checkLIR,
- "HOIST LOAD"));
- /* The store can be hoisted for at least one cycle */
- if (hoistDistance != 0) {
- ArmLIR *newLoadLIR =
- (ArmLIR *)dvmCompilerNew(sizeof(ArmLIR), true);
- *newLoadLIR = *thisLIR;
- newLoadLIR->flags.age = cUnit->optRound;
- /*
- * Insertion is guaranteed to succeed since checkLIR
- * is never the first LIR on the list
- */
- dvmCompilerInsertLIRAfter((LIR *) checkLIR,
- (LIR *) newLoadLIR);
- thisLIR->flags.isNop = true;
- }
+ if (((curLIR->useMask & prevLIR->defMask) &&
+ (EncodingMap[prevLIR->opcode].flags & IS_LOAD)) ||
+ (slot < LD_LATENCY)) {
break;
}
+ }
+ /* Found a slot to hoist to */
+ if (slot >= 0) {
+ ArmLIR *curLIR = prevInstList[slot];
+ ArmLIR *newLoadLIR = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR),
+ true);
+ *newLoadLIR = *thisLIR;
/*
- * Saw a real instruction that hosting the load is
- * beneficial
+ * Insertion is guaranteed to succeed since checkLIR
+ * is never the first LIR on the list
*/
- if (!isPseudoOpcode(checkLIR->opcode)) {
- hoistDistance++;
- }
+ dvmCompilerInsertLIRBefore((LIR *) curLIR,
+ (LIR *) newLoadLIR);
+ thisLIR->flags.isNop = true;
}
}
}