Optimize idiv-int/lit for powers of 2.
before:
DivideIntByConstant10 32 XXXXXXXXXXXXXXXXXXXXXXXXXXXX|
DivideIntByConstant100 32 XXXXXXXXXXXXXXXXXXXXXXXXXXXX|
DivideIntByConstant100_HandOptimized 34 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
DivideIntByConstant2 32 XXXXXXXXXXXXXXXXXXXXXXXXXXXX|
DivideIntByConstant2048 22 XXXXXXXXXXXXXXXXXXX|||||||
DivideIntByConstant8 20 XXXXXXXXXXXXXXXXX||||||||
DivideIntByVariable10 21 XXXXXXXXXXXXXXXXXX|||||||
DivideIntByVariable2 21 XXXXXXXXXXXXXXXXXX|||||||
after:
benchmark ns logarithmic runtime
DivideIntByConstant10 32 XXXXXXXXXXXXXXXXXXXXXXXXXXXXX
DivideIntByConstant100 32 XXXXXXXXXXXXXXXXXXXXXXXXXXXXX
DivideIntByConstant100_HandOptimized 33 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
DivideIntByConstant2 11 XXXXXXXXX|||||||||||
DivideIntByConstant2048 13 XXXXXXXXXXX|||||||||||
DivideIntByConstant8 13 XXXXXXXXXXX|||||||||||
DivideIntByVariable10 21 XXXXXXXXXXXXXXXXXXX|||||||
DivideIntByVariable2 22 XXXXXXXXXXXXXXXXXXXX||||||
Bug: 2614702
Change-Id: I9dde73d80580446a362cdcc9b82959a4b6bfb384
diff --git a/vm/compiler/codegen/arm/CodegenDriver.c b/vm/compiler/codegen/arm/CodegenDriver.c
index 8d63c66..ef7de28 100644
--- a/vm/compiler/codegen/arm/CodegenDriver.c
+++ b/vm/compiler/codegen/arm/CodegenDriver.c
@@ -1898,6 +1898,37 @@
return bit_posn;
}
+// Returns true if it added instructions to 'cUnit' to divide 'rlSrc' by 'lit'
+// and store the result in 'rlDest'.
+static bool handleEasyDivide(CompilationUnit *cUnit,
+ RegLocation rlSrc, RegLocation rlDest, int lit)
+{
+ if (lit < 2 || !isPowerOfTwo(lit)) {
+ return false;
+ }
+ int k = lowestSetBit(lit);
+ if (k >= 30) {
+ // Avoid special cases.
+ return false;
+ }
+ rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
+ RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true);
+ int tReg = dvmCompilerAllocTemp(cUnit);
+ if (lit == 2) {
+ // Division by 2 is by far the most common division by constant.
+ opRegRegImm(cUnit, kOpLsr, tReg, rlSrc.lowReg, 32 - k);
+ opRegRegReg(cUnit, kOpAdd, tReg, tReg, rlSrc.lowReg);
+ opRegRegImm(cUnit, kOpAsr, rlResult.lowReg, tReg, k);
+ } else {
+ opRegRegImm(cUnit, kOpAsr, tReg, rlSrc.lowReg, 31);
+ opRegRegImm(cUnit, kOpLsr, tReg, tReg, 32 - k);
+ opRegRegReg(cUnit, kOpAdd, tReg, tReg, rlSrc.lowReg);
+ opRegRegImm(cUnit, kOpAsr, rlResult.lowReg, tReg, k);
+ }
+ storeValue(cUnit, rlDest, rlResult);
+ return true;
+}
+
// Returns true if it added instructions to 'cUnit' to multiply 'rlSrc' by 'lit'
// and store the result in 'rlDest'.
static bool handleEasyMultiply(CompilationUnit *cUnit,
@@ -2019,6 +2050,9 @@
genInterpSingleStep(cUnit, mir);
return false;
}
+ if (handleEasyDivide(cUnit, rlSrc, rlDest, lit)) {
+ return false;
+ }
dvmCompilerFlushAllRegs(cUnit); /* Everything to home location */
loadValueDirectFixed(cUnit, rlSrc, r0);
dvmCompilerClobber(cUnit, r0);
@@ -3947,13 +3981,13 @@
/* Start compilation with maximally allowed trace length */
res = dvmCompileTrace(work->info, JIT_MAX_TRACE_LEN, &work->result,
work->bailPtr);
- gDvmJit.printMe = oldPrintMe;;
+ gDvmJit.printMe = oldPrintMe;
break;
}
default:
res = false;
LOGE("Jit: unknown work order type");
- assert(0); // Bail if debug build, discard oteherwise
+ assert(0); // Bail if debug build, discard otherwise
}
return res;
}