Loop detection, improved reg allocation
Detect loops and loop nesting depth, and use the latter to
weight register uses (which are then used to determine which
registers to promote).
Also:
o Fixed typo that prevented squashing of useless fp reg copies
o Rescheduled array access checks to hide latency of limit load.
o Add basic-block optimization pass to remove duplicate range
checks.
o Fixed bug that prevented recognition of redundant null
checks following iput-wide and aput-wide.
Change-Id: Icfbae39e89b1d14b8703ad6bbb0b29c0635fed1e
diff --git a/src/compiler/codegen/RallocUtil.cc b/src/compiler/codegen/RallocUtil.cc
index afbefff..c08b2e8 100644
--- a/src/compiler/codegen/RallocUtil.cc
+++ b/src/compiler/codegen/RallocUtil.cc
@@ -1043,52 +1043,31 @@
void oatCountRefs(CompilationUnit *cUnit, BasicBlock* bb,
RefCounts* coreCounts, RefCounts* fpCounts)
{
- MIR* mir;
- if (bb->blockType != kDalvikByteCode && bb->blockType != kEntryBlock &&
- bb->blockType != kExitBlock)
+ if ((cUnit->disableOpt & (1 << kPromoteRegs)) ||
+ !((bb->blockType == kEntryBlock) || (bb->blockType == kExitBlock) ||
+ (bb->blockType == kDalvikByteCode))) {
return;
-
- for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
- SSARepresentation *ssaRep = mir->ssaRep;
- if (ssaRep) {
- for (int i = 0; i < ssaRep->numDefs;) {
- RegLocation loc = cUnit->regLocation[ssaRep->defs[i]];
- RefCounts* counts = loc.fp ? fpCounts : coreCounts;
- int vReg = SRegToVReg(cUnit, ssaRep->defs[i]);
- if (loc.defined) {
- counts[vReg].count++;
- }
- if (loc.wide) {
- if (loc.defined) {
- if (loc.fp) {
- counts[vReg].doubleStart = true;
- }
- counts[vReg+1].count++;
- }
- i += 2;
- } else {
- i++;
+ }
+ for (int i = 0; i < cUnit->numSSARegs;) {
+ RegLocation loc = cUnit->regLocation[i];
+ RefCounts* counts = loc.fp ? fpCounts : coreCounts;
+ int vReg = SRegToVReg(cUnit, loc.sRegLow);
+ if (vReg < 0) {
+ vReg = cUnit->numDalvikRegisters - (vReg + 1);
+ }
+ if (loc.defined) {
+ counts[vReg].count += cUnit->useCounts.elemList[i];
+ }
+ if (loc.wide) {
+ if (loc.defined) {
+ if (loc.fp) {
+ counts[vReg].doubleStart = true;
+ counts[vReg+1].count += cUnit->useCounts.elemList[i+1];
}
}
- for (int i = 0; i < ssaRep->numUses;) {
- RegLocation loc = cUnit->regLocation[ssaRep->uses[i]];
- RefCounts* counts = loc.fp ? fpCounts : coreCounts;
- int vReg = SRegToVReg(cUnit, ssaRep->uses[i]);
- if (loc.defined) {
- counts[vReg].count++;
- }
- if (loc.wide) {
- if (loc.defined) {
- if (loc.fp) {
- counts[vReg].doubleStart = true;
- }
- counts[vReg+1].count++;
- }
- i += 2;
- } else {
- i++;
- }
- }
+ i += 2;
+ } else {
+ i++;
}
}
}
@@ -1115,7 +1094,9 @@
*/
extern void oatDoPromotion(CompilationUnit* cUnit)
{
- int numRegs = cUnit->numDalvikRegisters;
+ int regBias = cUnit->numCompilerTemps + 1;
+ int dalvikRegs = cUnit->numDalvikRegisters;
+ int numRegs = dalvikRegs + regBias;
// Allow target code to add any special registers
oatAdjustSpillMask(cUnit);
@@ -1135,9 +1116,14 @@
oatNew(cUnit, sizeof(RefCounts) * numRegs, true, kAllocRegAlloc);
RefCounts *fpRegs = (RefCounts *)
oatNew(cUnit, sizeof(RefCounts) * numRegs, true, kAllocRegAlloc);
- for (int i = 0; i < numRegs; i++) {
+ // Set ssa names for original Dalvik registers
+ for (int i = 0; i < dalvikRegs; i++) {
coreRegs[i].sReg = fpRegs[i].sReg = i;
}
+ // Set ssa names for Method* and compiler temps
+ for (int i = 0; i < regBias; i++) {
+ coreRegs[dalvikRegs + i].sReg = fpRegs[dalvikRegs + i].sReg = (-1 - i);
+ }
GrowableListIterator iterator;
oatGrowableListIteratorInit(&cUnit->blockList, &iterator);
while (true) {