Register usage cleanup
I plan to enable some of the old-world basic block optimizations.
Those care about temp register status, so we needed a bit of
cleanup on the temp tracking.
Change-Id: I317bce1b91a73ec9589c20ed5bfe00d53994991a
diff --git a/src/compiler/codegen/arm/ArchUtility.cc b/src/compiler/codegen/arm/ArchUtility.cc
index be1ab1e..3ceffae 100644
--- a/src/compiler/codegen/arm/ArchUtility.cc
+++ b/src/compiler/codegen/arm/ArchUtility.cc
@@ -306,8 +306,6 @@
void oatDumpLIRInsn(CompilationUnit* cUnit, LIR* arg, unsigned char* baseAddr)
{
ArmLIR* lir = (ArmLIR*) arg;
- if (lir->flags.isNop)
- return;
int offset = lir->generic.offset;
int dest = lir->operands[0];
const bool dumpNop = false;
@@ -374,8 +372,10 @@
buildInsnString(EncodingMap[lir->opcode].fmt, lir, opOperands,
baseAddr, 256);
char tBuf[256];
- snprintf(tBuf, 256, "%p (%04x): %-9s%s%s", baseAddr + offset, offset,
- opName, opOperands, lir->flags.isNop ? "(nop)" : "");
+ snprintf(tBuf, 256, "%p (%04x): %-9s%s%s%s",
+ baseAddr + offset, offset,
+ opName, opOperands, lir->flags.isNop ? "(nop)" : "",
+ lir->flags.squashed ? "(squashed)" : "");
LOG(INFO) << tBuf;
}
break;
diff --git a/src/compiler/codegen/arm/ArmLIR.h b/src/compiler/codegen/arm/ArmLIR.h
index 5308f7c..20fa0a2 100644
--- a/src/compiler/codegen/arm/ArmLIR.h
+++ b/src/compiler/codegen/arm/ArmLIR.h
@@ -840,9 +840,10 @@
struct {
bool isNop:1; // LIR is optimized away
bool insertWrapper:1; // insert branch to emulate memory accesses
+ bool squashed:1; // Elminated def
unsigned int age:4; // default is 0, set lazily by the optimizer
unsigned int size:3; // bytes (2 for thumb, 2/4 for thumb2)
- unsigned int unused:23;
+ unsigned int unused:22;
} flags;
int aliasInfo; // For Dalvik register & litpool disambiguation
u8 useMask; // Resource mask for use
diff --git a/src/compiler/codegen/arm/ArmRallocUtil.cc b/src/compiler/codegen/arm/ArmRallocUtil.cc
index 3de0e79..84c3792 100644
--- a/src/compiler/codegen/arm/ArmRallocUtil.cc
+++ b/src/compiler/codegen/arm/ArmRallocUtil.cc
@@ -277,7 +277,7 @@
}
/* Clobber all regs that might be used by an external C call */
-extern void oatClobberCallRegs(CompilationUnit *cUnit)
+extern void oatClobberCalleeSave(CompilationUnit *cUnit)
{
oatClobber(cUnit, r0);
oatClobber(cUnit, r1);
diff --git a/src/compiler/codegen/arm/MethodCodegenDriver.cc b/src/compiler/codegen/arm/MethodCodegenDriver.cc
index aeb0134..cfeb16a 100644
--- a/src/compiler/codegen/arm/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/arm/MethodCodegenDriver.cc
@@ -20,8 +20,23 @@
INVALID_REG, INVALID_SREG, 0,
kLocDalvikFrame, INVALID_REG, INVALID_REG,
INVALID_OFFSET};
-STATIC const RegLocation retLoc = LOC_DALVIK_RETURN_VAL;
-STATIC const RegLocation retLocWide = LOC_DALVIK_RETURN_VAL_WIDE;
+
+/* Mark register usage state and return long retloc */
+STATIC RegLocation getRetLocWide(CompilationUnit* cUnit)
+{
+ RegLocation res = LOC_DALVIK_RETURN_VAL_WIDE;
+ oatLockTemp(cUnit, res.lowReg);
+ oatLockTemp(cUnit, res.highReg);
+ oatMarkPair(cUnit, res.lowReg, res.highReg);
+ return res;
+}
+
+STATIC RegLocation getRetLoc(CompilationUnit* cUnit)
+{
+ RegLocation res = LOC_DALVIK_RETURN_VAL;
+ oatLockTemp(cUnit, res.lowReg);
+ return res;
+}
/*
* Let helper function take care of everything. Will call
@@ -38,7 +53,6 @@
loadConstant(cUnit, r0, mir->dalvikInsn.vC); // arg0 <- type_id
loadValueDirectFixed(cUnit, rlSrc, r2); // arg2 <- count
callRuntimeHelper(cUnit, rLR);
- oatClobberCallRegs(cUnit);
RegLocation rlResult = oatGetReturn(cUnit);
storeValue(cUnit, rlDest, rlResult);
}
@@ -170,19 +184,18 @@
int fieldIdx = mir->dalvikInsn.vB;
uint32_t typeIdx;
Field* field = FindFieldWithResolvedStaticStorage(cUnit->method, fieldIdx, typeIdx);
+ oatFlushAllRegs(cUnit);
if (SLOW_FIELD_PATH || field == NULL) {
// Slow path
LOG(INFO) << "Field " << fieldNameFromIndex(cUnit->method, fieldIdx)
<< " unresolved at compile time";
int funcOffset = isObject ? OFFSETOF_MEMBER(Thread, pSetObjStatic)
: OFFSETOF_MEMBER(Thread, pSet32Static);
- oatFlushAllRegs(cUnit);
loadWordDisp(cUnit, rSELF, funcOffset, rLR);
loadConstant(cUnit, r0, mir->dalvikInsn.vB);
loadCurrMethodDirect(cUnit, r1);
loadValueDirect(cUnit, rlSrc, r2);
callRuntimeHelper(cUnit, rLR);
- oatClobberCallRegs(cUnit);
} else {
// fast path
int fieldOffset = field->GetOffset().Int32Value();
@@ -227,16 +240,15 @@
int fieldIdx = mir->dalvikInsn.vB;
uint32_t typeIdx;
Field* field = FindFieldWithResolvedStaticStorage(cUnit->method, fieldIdx, typeIdx);
+ oatFlushAllRegs(cUnit);
if (SLOW_FIELD_PATH || field == NULL) {
LOG(INFO) << "Field " << fieldNameFromIndex(cUnit->method, fieldIdx)
<< " unresolved at compile time";
- oatFlushAllRegs(cUnit);
loadWordDisp(cUnit, rSELF, OFFSETOF_MEMBER(Thread, pSet64Static), rLR);
loadConstant(cUnit, r0, mir->dalvikInsn.vB);
loadCurrMethodDirect(cUnit, r1);
loadValueDirectWideFixed(cUnit, rlSrc, r2, r3);
callRuntimeHelper(cUnit, rLR);
- oatClobberCallRegs(cUnit);
} else {
// fast path
int fieldOffset = field->GetOffset().Int32Value();
@@ -280,10 +292,10 @@
int fieldIdx = mir->dalvikInsn.vB;
uint32_t typeIdx;
Field* field = FindFieldWithResolvedStaticStorage(cUnit->method, fieldIdx, typeIdx);
+ oatFlushAllRegs(cUnit);
if (SLOW_FIELD_PATH || field == NULL) {
LOG(INFO) << "Field " << fieldNameFromIndex(cUnit->method, fieldIdx)
<< " unresolved at compile time";
- oatFlushAllRegs(cUnit);
loadWordDisp(cUnit, rSELF, OFFSETOF_MEMBER(Thread, pGet64Static), rLR);
loadConstant(cUnit, r0, mir->dalvikInsn.vB);
loadCurrMethodDirect(cUnit, r1);
@@ -335,13 +347,13 @@
Field* field = FindFieldWithResolvedStaticStorage(cUnit->method, fieldIdx, typeIdx);
bool isObject = ((mir->dalvikInsn.opcode == OP_SGET_OBJECT) ||
(mir->dalvikInsn.opcode == OP_SGET_OBJECT_VOLATILE));
+ oatFlushAllRegs(cUnit);
if (SLOW_FIELD_PATH || field == NULL) {
LOG(INFO) << "Field " << fieldNameFromIndex(cUnit->method, fieldIdx)
<< " unresolved at compile time";
// Slow path
int funcOffset = isObject ? OFFSETOF_MEMBER(Thread, pGetObjStatic)
: OFFSETOF_MEMBER(Thread, pGet32Static);
- oatFlushAllRegs(cUnit);
loadWordDisp(cUnit, rSELF, funcOffset, rLR);
loadConstant(cUnit, r0, mir->dalvikInsn.vB);
loadCurrMethodDirect(cUnit, r1);
@@ -950,6 +962,7 @@
genShowTarget(cUnit);
#endif
opReg(cUnit, kOpBlx, rLR);
+ oatClobberCalleeSave(cUnit);
}
/*
@@ -982,6 +995,7 @@
genShowTarget(cUnit);
#endif
opReg(cUnit, kOpBlx, rLR);
+ oatClobberCalleeSave(cUnit);
}
STATIC void genInvokeSuper(CompilationUnit* cUnit, MIR* mir)
@@ -1034,6 +1048,7 @@
genShowTarget(cUnit);
#endif
opReg(cUnit, kOpBlx, rLR);
+ oatClobberCalleeSave(cUnit);
}
STATIC void genInvokeVirtual(CompilationUnit* cUnit, MIR* mir)
@@ -1073,6 +1088,7 @@
genShowTarget(cUnit);
#endif
opReg(cUnit, kOpBlx, rLR);
+ oatClobberCalleeSave(cUnit);
}
STATIC bool compileDalvikInstruction(CompilationUnit* cUnit, MIR* mir,
@@ -1140,44 +1156,25 @@
case OP_RETURN:
case OP_RETURN_OBJECT:
genSuspendPoll(cUnit, mir);
- storeValue(cUnit, retLoc, rlSrc[0]);
+ storeValue(cUnit, getRetLoc(cUnit), rlSrc[0]);
break;
case OP_RETURN_WIDE:
genSuspendPoll(cUnit, mir);
- rlDest = retLocWide;
- rlDest.fp = rlSrc[0].fp;
- storeValueWide(cUnit, rlDest, rlSrc[0]);
+ storeValueWide(cUnit, getRetLocWide(cUnit), rlSrc[0]);
break;
case OP_MOVE_RESULT_WIDE:
if (mir->optimizationFlags & MIR_INLINED)
break; // Nop - combined w/ previous invoke
- /*
- * Somewhat hacky here. Because we're now passing
- * return values in registers, we have to let the
- * register allocation utilities know that the return
- * registers are live and may not be used for address
- * formation in storeValueWide.
- */
- DCHECK(retLocWide.lowReg == r0);
- DCHECK(retLocWide.highReg == r1);
- oatLockTemp(cUnit, retLocWide.lowReg);
- oatLockTemp(cUnit, retLocWide.highReg);
- storeValueWide(cUnit, rlDest, retLocWide);
- oatFreeTemp(cUnit, retLocWide.lowReg);
- oatFreeTemp(cUnit, retLocWide.highReg);
+ storeValueWide(cUnit, rlDest, getRetLocWide(cUnit));
break;
case OP_MOVE_RESULT:
case OP_MOVE_RESULT_OBJECT:
if (mir->optimizationFlags & MIR_INLINED)
break; // Nop - combined w/ previous invoke
- /* See comment for OP_MOVE_RESULT_WIDE */
- DCHECK(retLoc.lowReg == r0);
- oatLockTemp(cUnit, retLoc.lowReg);
- storeValue(cUnit, rlDest, retLoc);
- oatFreeTemp(cUnit, retLoc.lowReg);
+ storeValue(cUnit, rlDest, getRetLoc(cUnit));
break;
case OP_MOVE:
@@ -1848,7 +1845,10 @@
labelList[blockId].opcode = kArmPseudoNormalBlockLabel;
oatAppendLIR(cUnit, (LIR*) &labelList[blockId]);
+ /* Reset local optimization data on block boundaries */
+ oatResetRegPool(cUnit);
oatClobberAllRegs(cUnit);
+ oatResetDefTracking(cUnit);
ArmLIR* headLIR = NULL;
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
index e3893d3..5c83660 100644
--- a/src/compiler/codegen/arm/Thumb2/Gen.cc
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -57,6 +57,7 @@
STATIC ArmLIR* callRuntimeHelper(CompilationUnit* cUnit, int reg)
{
+ oatClobberCalleeSave(cUnit);
return opReg(cUnit, kOpBlx, reg);
}
@@ -373,7 +374,6 @@
// Materialize a pointer to the fill data image
newLIR3(cUnit, kThumb2Adr, r1, 0, (intptr_t)tabRec);
callRuntimeHelper(cUnit, rLR);
- oatClobberCallRegs(cUnit);
}
/*
@@ -406,6 +406,7 @@
STATIC void getFieldOffset(CompilationUnit* cUnit, MIR* mir)
{
int fieldIdx = mir->dalvikInsn.vC;
+ oatFlushAllRegs(cUnit);
LOG(INFO) << "Field " << fieldNameFromIndex(cUnit->method, fieldIdx)
<< " unresolved at compile time";
oatLockCallTemps(cUnit); // Explicit register usage
@@ -607,6 +608,7 @@
storeValue(cUnit, rlDest, rlResult);
} else {
// Slow path. Must test at runtime
+ oatFlushAllRegs(cUnit);
ArmLIR* branch1 = genCmpImmBranch(cUnit, kArmCondEq, rlResult.lowReg,
0);
// Resolved, store and hop over following code
@@ -621,7 +623,6 @@
genRegCopy(cUnit, r1, mReg);
loadConstant(cUnit, r0, mir->dalvikInsn.vB);
callRuntimeHelper(cUnit, rLR);
- oatClobberCallRegs(cUnit);
RegLocation rlResult = oatGetReturn(cUnit);
storeValue(cUnit, rlDest, rlResult);
// Rejoin code paths
@@ -663,13 +664,13 @@
loadCurrMethodDirect(cUnit, r1); // arg1 <= Method*
loadConstant(cUnit, r0, mir->dalvikInsn.vB); // arg0 <- type_id
callRuntimeHelper(cUnit, rLR);
- oatClobberCallRegs(cUnit);
RegLocation rlResult = oatGetReturn(cUnit);
storeValue(cUnit, rlDest, rlResult);
}
void genThrow(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
{
+ oatFlushAllRegs(cUnit);
loadWordDisp(cUnit, rSELF,
OFFSETOF_MEMBER(Thread, pDeliverException), rLR);
loadValueDirectFixed(cUnit, rlSrc, r0); // Get exception object
@@ -679,6 +680,7 @@
STATIC void genInstanceof(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
RegLocation rlSrc)
{
+ oatFlushAllRegs(cUnit);
// May generate a call - use explicit registers
oatLockCallTemps(cUnit);
art::Class* classPtr = cUnit->method->GetDexCacheResolvedTypes()->
@@ -721,7 +723,6 @@
genRegCopy(cUnit, r0, r3);
genRegCopy(cUnit, r1, r2);
callRuntimeHelper(cUnit, rLR);
- oatClobberCallRegs(cUnit);
/* branch target here */
ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
target->defMask = ENCODE_ALL;
@@ -733,6 +734,7 @@
STATIC void genCheckCast(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
{
+ oatFlushAllRegs(cUnit);
// May generate a call - use explicit registers
oatLockCallTemps(cUnit);
art::Class* classPtr = cUnit->method->GetDexCacheResolvedTypes()->
@@ -773,7 +775,6 @@
genRegCopy(cUnit, r0, r1);
genRegCopy(cUnit, r1, r2);
callRuntimeHelper(cUnit, rLR);
- oatClobberCallRegs(cUnit);
/* branch target here */
ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
target->defMask = ENCODE_ALL;
@@ -805,10 +806,12 @@
STATIC void freeRegLocTemps(CompilationUnit* cUnit, RegLocation rlKeep,
RegLocation rlFree)
{
- if ((rlFree.lowReg != rlKeep.lowReg) && (rlFree.lowReg != rlKeep.highReg))
+ if ((rlFree.lowReg != rlKeep.lowReg) && (rlFree.lowReg != rlKeep.highReg) &&
+ (rlFree.highReg != rlKeep.lowReg) && (rlFree.highReg != rlKeep.highReg)) {
+ // No overlap, free both
oatFreeTemp(cUnit, rlFree.lowReg);
- if ((rlFree.highReg != rlKeep.lowReg) && (rlFree.highReg != rlKeep.highReg))
- oatFreeTemp(cUnit, rlFree.lowReg);
+ oatFreeTemp(cUnit, rlFree.highReg);
+ }
}
STATIC void genLong3Addr(CompilationUnit* cUnit, MIR* mir, OpKind firstOp,
@@ -1108,7 +1111,6 @@
loadValueDirectWideFixed(cUnit, rlSrc, r0, r1);
}
callRuntimeHelper(cUnit, rLR);
- oatClobberCallRegs(cUnit);
if (tgtSize == 1) {
RegLocation rlResult;
rlDest = oatGetDest(cUnit, mir, 0);
@@ -1163,7 +1165,6 @@
loadValueDirectFixed(cUnit, rlSrc1, r0);
loadValueDirectFixed(cUnit, rlSrc2, r1);
callRuntimeHelper(cUnit, rLR);
- oatClobberCallRegs(cUnit);
rlResult = oatGetReturn(cUnit);
storeValue(cUnit, rlDest, rlResult);
return false;
@@ -1209,7 +1210,6 @@
loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
loadValueDirectWideFixed(cUnit, rlSrc2, r2, r3);
callRuntimeHelper(cUnit, rLR);
- oatClobberCallRegs(cUnit);
rlResult = oatGetReturnWide(cUnit);
storeValueWide(cUnit, rlDest, rlResult);
return false;
@@ -1278,6 +1278,7 @@
int lenOffset = Array::LengthOffset().Int32Value();
int dataOffset = Array::DataOffset().Int32Value();
+ oatFlushAllRegs(cUnit);
/* Make sure it's a legal object Put. Use direct regs at first */
loadValueDirectFixed(cUnit, rlArray, r1);
loadValueDirectFixed(cUnit, rlSrc, r0);
@@ -1289,7 +1290,8 @@
/* Get the array's clazz */
loadWordDisp(cUnit, r1, Object::ClassOffset().Int32Value(), r1);
callRuntimeHelper(cUnit, rLR);
- oatClobberCallRegs(cUnit);
+ oatFreeTemp(cUnit, r0);
+ oatFreeTemp(cUnit, r1);
// Now, redo loadValues in case they didn't survive the call
@@ -1479,7 +1481,6 @@
loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
loadValueDirect(cUnit, rlShift, r2);
callRuntimeHelper(cUnit, rLR);
- oatClobberCallRegs(cUnit);
RegLocation rlResult = oatGetReturnWide(cUnit);
storeValueWide(cUnit, rlDest, rlResult);
return false;
@@ -1593,7 +1594,6 @@
loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
loadValueDirectWideFixed(cUnit, rlSrc2, r2, r3);
callRuntimeHelper(cUnit, rLR);
- oatClobberCallRegs(cUnit);
if (retReg == r0)
rlResult = oatGetReturnWide(cUnit);
else
@@ -1715,7 +1715,6 @@
genImmedCheck(cUnit, kArmCondEq, r1, 0, mir, kArmThrowDivZero);
}
callRuntimeHelper(cUnit, rLR);
- oatClobberCallRegs(cUnit);
if (retReg == r0)
rlResult = oatGetReturn(cUnit);
else
@@ -1731,6 +1730,7 @@
if (NO_SUSPEND || mir->optimizationFlags & MIR_IGNORE_SUSPEND_CHECK) {
return;
}
+ oatFlushAllRegs(cUnit);
newLIR2(cUnit, kThumbSubRI8, rSUSPEND, 1);
ArmLIR* branch = opCondBranch(cUnit, kArmCondEq);
ArmLIR* retLab = newLIR0(cUnit, kArmPseudoTargetLabel);
@@ -1750,6 +1750,7 @@
if (NO_SUSPEND || mir->optimizationFlags & MIR_IGNORE_SUSPEND_CHECK) {
return;
}
+ oatFlushAllRegs(cUnit);
oatLockCallTemps(cUnit); // Explicit register usage
int rSuspendCount = r1;
ArmLIR* ld;
@@ -2003,7 +2004,6 @@
loadWordDisp(cUnit, rSELF, funcOffset, rLR);
loadConstant(cUnit, r1, lit);
callRuntimeHelper(cUnit, rLR);
- oatClobberCallRegs(cUnit);
if (isDiv)
rlResult = oatGetReturn(cUnit);
else