Loop detection, improved reg allocation
Detect loops and loop nesting depth, and use the latter to
weight register uses (which are then used to determine which
registers to promote).
Also:
o Fixed typo that prevented squashing of useless fp reg copies
o Rescheduled array access checks to hide latency of limit load.
o Add basic-block optimization pass to remove duplicate range
checks.
o Fixed bug that prevented recognition of redundant null
checks following iput-wide and aput-wide.
Change-Id: Icfbae39e89b1d14b8703ad6bbb0b29c0635fed1e
diff --git a/src/compiler/codegen/CodegenUtil.cc b/src/compiler/codegen/CodegenUtil.cc
index 27433ca..cc31b29 100644
--- a/src/compiler/codegen/CodegenUtil.cc
+++ b/src/compiler/codegen/CodegenUtil.cc
@@ -371,6 +371,7 @@
LOG(INFO) << "Outs : " << cUnit->numOuts;
LOG(INFO) << "CoreSpills : " << cUnit->numCoreSpills;
LOG(INFO) << "FPSpills : " << cUnit->numFPSpills;
+ LOG(INFO) << "CompilerTemps : " << cUnit->numCompilerTemps;
LOG(INFO) << "Frame size : " << cUnit->frameSize;
LOG(INFO) << "code size is " << cUnit->totalSize <<
" bytes, Dalvik size is " << insnsSize * 2;
diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc
index bfc5639..34d9fb9 100644
--- a/src/compiler/codegen/GenCommon.cc
+++ b/src/compiler/codegen/GenCommon.cc
@@ -1341,22 +1341,23 @@
opRegCopy(cUnit, regPtr, rlArray.lowReg);
}
- if (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
- int regLen = oatAllocTemp(cUnit);
+ bool needsRangeCheck = (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK));
+ int regLen = INVALID_REG;
+ if (needsRangeCheck) {
+ regLen = oatAllocTemp(cUnit);
//NOTE: max live temps(4) here.
/* Get len */
loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
- /* regPtr -> array data */
- opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
+ }
+ /* regPtr -> array data */
+ opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
+ /* at this point, regPtr points to array, 2 live temps */
+ rlSrc = loadValue(cUnit, rlSrc, regClass);
+ if (needsRangeCheck) {
genRegRegCheck(cUnit, kCondCs, rlIndex.lowReg, regLen, mir,
kThrowArrayBounds);
oatFreeTemp(cUnit, regLen);
- } else {
- /* regPtr -> array data */
- opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
}
- /* at this point, regPtr points to array, 2 live temps */
- rlSrc = loadValue(cUnit, rlSrc, regClass);
storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
scale, kWord);
}
@@ -1406,21 +1407,15 @@
}
#else
int regPtr = oatAllocTemp(cUnit);
- if (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
- int regLen = oatAllocTemp(cUnit);
+ bool needsRangeCheck = (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK));
+ int regLen = INVALID_REG;
+ if (needsRangeCheck) {
+ regLen = oatAllocTemp(cUnit);
/* Get len */
loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
- /* regPtr -> array data */
- opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
- // TODO: change kCondCS to a more meaningful name, is the sense of
- // carry-set/clear flipped?
- genRegRegCheck(cUnit, kCondCs, rlIndex.lowReg, regLen, mir,
- kThrowArrayBounds);
- oatFreeTemp(cUnit, regLen);
- } else {
- /* regPtr -> array data */
- opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
}
+ /* regPtr -> array data */
+ opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
oatFreeTemp(cUnit, rlArray.lowReg);
if ((size == kLong) || (size == kDouble)) {
if (scale) {
@@ -1434,6 +1429,13 @@
oatFreeTemp(cUnit, rlIndex.lowReg);
rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
+ if (needsRangeCheck) {
+ // TODO: change kCondCS to a more meaningful name, is the sense of
+ // carry-set/clear flipped?
+ genRegRegCheck(cUnit, kCondCs, rlIndex.lowReg, regLen, mir,
+ kThrowArrayBounds);
+ oatFreeTemp(cUnit, regLen);
+ }
loadPair(cUnit, regPtr, rlResult.lowReg, rlResult.highReg);
oatFreeTemp(cUnit, regPtr);
@@ -1441,6 +1443,13 @@
} else {
rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
+ if (needsRangeCheck) {
+ // TODO: change kCondCS to a more meaningful name, is the sense of
+ // carry-set/clear flipped?
+ genRegRegCheck(cUnit, kCondCs, rlIndex.lowReg, regLen, mir,
+ kThrowArrayBounds);
+ oatFreeTemp(cUnit, regLen);
+ }
loadBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlResult.lowReg,
scale, size);
@@ -1483,20 +1492,16 @@
/* null object? */
genNullCheck(cUnit, rlArray.sRegLow, rlArray.lowReg, mir);
- if (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
- int regLen = oatAllocTemp(cUnit);
+ bool needsRangeCheck = (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK));
+ int regLen = INVALID_REG;
+ if (needsRangeCheck) {
+ regLen = oatAllocTemp(cUnit);
//NOTE: max live temps(4) here.
/* Get len */
loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
- /* regPtr -> array data */
- opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
- genRegRegCheck(cUnit, kCondCs, rlIndex.lowReg, regLen, mir,
- kThrowArrayBounds);
- oatFreeTemp(cUnit, regLen);
- } else {
- /* regPtr -> array data */
- opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
}
+ /* regPtr -> array data */
+ opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
/* at this point, regPtr points to array, 2 live temps */
if ((size == kLong) || (size == kDouble)) {
//TUNING: specific wide routine that can handle fp regs
@@ -1510,12 +1515,22 @@
}
rlSrc = loadValueWide(cUnit, rlSrc, regClass);
+ if (needsRangeCheck) {
+ genRegRegCheck(cUnit, kCondCs, rlIndex.lowReg, regLen, mir,
+ kThrowArrayBounds);
+ oatFreeTemp(cUnit, regLen);
+ }
+
storePair(cUnit, regPtr, rlSrc.lowReg, rlSrc.highReg);
oatFreeTemp(cUnit, regPtr);
} else {
rlSrc = loadValue(cUnit, rlSrc, regClass);
-
+ if (needsRangeCheck) {
+ genRegRegCheck(cUnit, kCondCs, rlIndex.lowReg, regLen, mir,
+ kThrowArrayBounds);
+ oatFreeTemp(cUnit, regLen);
+ }
storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
scale, size);
}
diff --git a/src/compiler/codegen/MethodCodegenDriver.cc b/src/compiler/codegen/MethodCodegenDriver.cc
index 64f55c6..45a0c75 100644
--- a/src/compiler/codegen/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/MethodCodegenDriver.cc
@@ -757,9 +757,12 @@
newLIR1(cUnit, kPseudoSSARep, (int) ssaString);
break;
}
- case kMirOpCopy:
- UNIMPLEMENTED(FATAL) << "Need kMirOpCopy";
+ case kMirOpCopy: {
+ RegLocation rlSrc = oatGetSrc(cUnit, mir, 0);
+ RegLocation rlDest = oatGetDest(cUnit, mir, 0);
+ storeValue(cUnit, rlDest, rlSrc);
break;
+ }
default:
break;
}
diff --git a/src/compiler/codegen/RallocUtil.cc b/src/compiler/codegen/RallocUtil.cc
index afbefff..c08b2e8 100644
--- a/src/compiler/codegen/RallocUtil.cc
+++ b/src/compiler/codegen/RallocUtil.cc
@@ -1043,52 +1043,31 @@
void oatCountRefs(CompilationUnit *cUnit, BasicBlock* bb,
RefCounts* coreCounts, RefCounts* fpCounts)
{
- MIR* mir;
- if (bb->blockType != kDalvikByteCode && bb->blockType != kEntryBlock &&
- bb->blockType != kExitBlock)
+ if ((cUnit->disableOpt & (1 << kPromoteRegs)) ||
+ !((bb->blockType == kEntryBlock) || (bb->blockType == kExitBlock) ||
+ (bb->blockType == kDalvikByteCode))) {
return;
-
- for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
- SSARepresentation *ssaRep = mir->ssaRep;
- if (ssaRep) {
- for (int i = 0; i < ssaRep->numDefs;) {
- RegLocation loc = cUnit->regLocation[ssaRep->defs[i]];
- RefCounts* counts = loc.fp ? fpCounts : coreCounts;
- int vReg = SRegToVReg(cUnit, ssaRep->defs[i]);
- if (loc.defined) {
- counts[vReg].count++;
- }
- if (loc.wide) {
- if (loc.defined) {
- if (loc.fp) {
- counts[vReg].doubleStart = true;
- }
- counts[vReg+1].count++;
- }
- i += 2;
- } else {
- i++;
+ }
+ for (int i = 0; i < cUnit->numSSARegs;) {
+ RegLocation loc = cUnit->regLocation[i];
+ RefCounts* counts = loc.fp ? fpCounts : coreCounts;
+ int vReg = SRegToVReg(cUnit, loc.sRegLow);
+ if (vReg < 0) {
+ vReg = cUnit->numDalvikRegisters - (vReg + 1);
+ }
+ if (loc.defined) {
+ counts[vReg].count += cUnit->useCounts.elemList[i];
+ }
+ if (loc.wide) {
+ if (loc.defined) {
+ if (loc.fp) {
+ counts[vReg].doubleStart = true;
+ counts[vReg+1].count += cUnit->useCounts.elemList[i+1];
}
}
- for (int i = 0; i < ssaRep->numUses;) {
- RegLocation loc = cUnit->regLocation[ssaRep->uses[i]];
- RefCounts* counts = loc.fp ? fpCounts : coreCounts;
- int vReg = SRegToVReg(cUnit, ssaRep->uses[i]);
- if (loc.defined) {
- counts[vReg].count++;
- }
- if (loc.wide) {
- if (loc.defined) {
- if (loc.fp) {
- counts[vReg].doubleStart = true;
- }
- counts[vReg+1].count++;
- }
- i += 2;
- } else {
- i++;
- }
- }
+ i += 2;
+ } else {
+ i++;
}
}
}
@@ -1115,7 +1094,9 @@
*/
extern void oatDoPromotion(CompilationUnit* cUnit)
{
- int numRegs = cUnit->numDalvikRegisters;
+ int regBias = cUnit->numCompilerTemps + 1;
+ int dalvikRegs = cUnit->numDalvikRegisters;
+ int numRegs = dalvikRegs + regBias;
// Allow target code to add any special registers
oatAdjustSpillMask(cUnit);
@@ -1135,9 +1116,14 @@
oatNew(cUnit, sizeof(RefCounts) * numRegs, true, kAllocRegAlloc);
RefCounts *fpRegs = (RefCounts *)
oatNew(cUnit, sizeof(RefCounts) * numRegs, true, kAllocRegAlloc);
- for (int i = 0; i < numRegs; i++) {
+ // Set ssa names for original Dalvik registers
+ for (int i = 0; i < dalvikRegs; i++) {
coreRegs[i].sReg = fpRegs[i].sReg = i;
}
+ // Set ssa names for Method* and compiler temps
+ for (int i = 0; i < regBias; i++) {
+ coreRegs[dalvikRegs + i].sReg = fpRegs[dalvikRegs + i].sReg = (-1 - i);
+ }
GrowableListIterator iterator;
oatGrowableListIteratorInit(&cUnit->blockList, &iterator);
while (true) {
diff --git a/src/compiler/codegen/arm/Thumb2/Factory.cc b/src/compiler/codegen/arm/Thumb2/Factory.cc
index fdf0ca2..c79f7c6 100644
--- a/src/compiler/codegen/arm/Thumb2/Factory.cc
+++ b/src/compiler/codegen/arm/Thumb2/Factory.cc
@@ -1034,7 +1034,7 @@
}
}
LIR* res = rawLIR(cUnit, cUnit->currentDalvikOffset, opcode, rDest, rSrc);
- if (!(cUnit->disableOpt && (1 << kSafeOptimizations)) && rDest == rSrc) {
+ if (!(cUnit->disableOpt & (1 << kSafeOptimizations)) && rDest == rSrc) {
res->flags.isNop = true;
}
return res;
diff --git a/src/compiler/codegen/mips/Mips32/Gen.cc b/src/compiler/codegen/mips/Mips32/Gen.cc
index e86a942..dc98508 100644
--- a/src/compiler/codegen/mips/Mips32/Gen.cc
+++ b/src/compiler/codegen/mips/Mips32/Gen.cc
@@ -450,7 +450,7 @@
#endif
LIR* res = rawLIR(cUnit, cUnit->currentDalvikOffset, kMipsMove,
rDest, rSrc);
- if (!(cUnit->disableOpt && (1 << kSafeOptimizations)) && rDest == rSrc) {
+ if (!(cUnit->disableOpt & (1 << kSafeOptimizations)) && rDest == rSrc) {
res->flags.isNop = true;
}
return res;