Move 64-bit multiplication to helper
We're right on the edge for supporting inline 64-bit arithmetic
with our current temp register pool allocation. Moving 64-bit multiplication
out of line to sidestep the problem, and added some temp frees to
3-operand long ops. In the latter case there was a potential problem
if the result long was located in a part of the frame not in the range
of a single base+displacement store.
Change-Id: I6f8e0a11b440ed35e08f2e3457de6cbea89cfccc
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
index 992eca6..2bdeee6 100644
--- a/src/compiler/codegen/arm/Thumb2/Gen.cc
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -556,6 +556,10 @@
Class* classPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
GetResolvedType(mir->dalvikInsn.vB);
+ /*
+ * Need new routine that passes Method*, type index.
+ * Call unconditionally.
+ */
if (classPtr == NULL) {
/* Shouldn't happen */
LOG(FATAL) << "Unexpected null class pointer";
@@ -678,32 +682,13 @@
storeValueWide(cUnit, rlDest, rlResult);
}
-/*
- * To avoid possible conflicts, we use a lot of temps here. Note that
- * our usage of Thumb2 instruction forms avoids the problems with register
- * reuse for multiply instructions prior to arm6.
- */
-static void genMulLong(CompilationUnit* cUnit, RegLocation rlDest,
- RegLocation rlSrc1, RegLocation rlSrc2)
+static void freeRegLocTemps(CompilationUnit* cUnit, RegLocation rlKeep,
+ RegLocation rlFree)
{
- RegLocation rlResult;
- int resLo = oatAllocTemp(cUnit);
- int resHi = oatAllocTemp(cUnit);
- int tmp1 = oatAllocTemp(cUnit);
-
- rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
- rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
-
- newLIR3(cUnit, kThumb2MulRRR, tmp1, rlSrc2.lowReg, rlSrc1.highReg);
- newLIR4(cUnit, kThumb2Umull, resLo, resHi, rlSrc2.lowReg, rlSrc1.lowReg);
- newLIR4(cUnit, kThumb2Mla, tmp1, rlSrc1.lowReg, rlSrc2.highReg, tmp1);
- newLIR4(cUnit, kThumb2AddRRR, resHi, tmp1, resHi, 0);
- oatFreeTemp(cUnit, tmp1);
-
- rlResult = oatGetReturnWide(cUnit);
- rlResult.lowReg = resLo;
- rlResult.highReg = resHi;
- storeValueWide(cUnit, rlDest, rlResult);
+ if ((rlFree.lowReg != rlKeep.lowReg) && (rlFree.lowReg != rlKeep.highReg))
+ oatFreeTemp(cUnit, rlFree.lowReg);
+ if ((rlFree.highReg != rlKeep.lowReg) && (rlFree.highReg != rlKeep.highReg))
+ oatFreeTemp(cUnit, rlFree.lowReg);
}
static void genLong3Addr(CompilationUnit* cUnit, MIR* mir, OpKind firstOp,
@@ -727,6 +712,15 @@
opRegRegReg(cUnit, firstOp, rlResult.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);
opRegRegReg(cUnit, secondOp, rlResult.highReg, rlSrc1.highReg,
rlSrc2.highReg);
+ /*
+ * NOTE: If rlDest refers to a frame variable in a large frame, the
+ * following storeValueWide might need to allocate a temp register.
+ * To further work around the lack of a spill capability, explicitly
+ * free any temps from rlSrc1 & rlSrc2 that aren't still live in rlResult.
+ * Remove when spill is functional.
+ */
+ freeRegLocTemps(cUnit, rlResult, rlSrc1);
+ freeRegLocTemps(cUnit, rlResult, rlSrc2);
storeValueWide(cUnit, rlDest, rlResult);
oatClobber(cUnit, rLR);
oatUnmarkTemp(cUnit, rLR); // Remove lr from the temp pool
@@ -1398,8 +1392,10 @@
break;
case OP_MUL_LONG:
case OP_MUL_LONG_2ADDR:
- genMulLong(cUnit, rlDest, rlSrc1, rlSrc2);
- return false;
+ callOut = true;
+ retReg = r0;
+ funcOffset = OFFSETOF_MEMBER(Thread, pLmul);
+ break;
case OP_DIV_LONG:
case OP_DIV_LONG_2ADDR:
callOut = true;