Optimize rem-int/lit too.
Bryan hitting the bug in my div-int/lit optimization (that caused it to
try to rewrite rem-int/lit too) shows that I was wrong in assuming % wasn't
worth doing because it wouldn't be hot enough.
Before:
benchmark ns logarithmic runtime
RemainderIntByConstant2 44 XXXXXXXXXXXXXXXXXXXXXXXXXXXXX
RemainderIntByConstant2048 34 XXXXXXXXXXXXXXXXXXXXXX|||||
RemainderIntByConstant8 44 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
RemainderIntByVariable2 40 XXXXXXXXXXXXXXXXXXXXXXXXXXX||
After:
benchmark ns logarithmic runtime
RemainderIntByConstant2 13 XXXXXXXXX|||||||||||
RemainderIntByConstant2048 16 XXXXXXXXXXXX||||||||||
RemainderIntByConstant8 16 XXXXXXXXXXXX||||||||||
RemainderIntByVariable2 40 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Bug: 2614702
Change-Id: I719fc8765feececd5b73c3cb2e44dd3cf20c45ce
diff --git a/vm/compiler/codegen/arm/CodegenDriver.c b/vm/compiler/codegen/arm/CodegenDriver.c
index c691d15..68d5b84 100644
--- a/vm/compiler/codegen/arm/CodegenDriver.c
+++ b/vm/compiler/codegen/arm/CodegenDriver.c
@@ -1903,9 +1903,6 @@
static bool handleEasyDivide(CompilationUnit *cUnit, OpCode dalvikOpCode,
RegLocation rlSrc, RegLocation rlDest, int lit)
{
- if (dalvikOpCode != OP_DIV_INT_LIT8 && dalvikOpCode != OP_DIV_INT_LIT16) {
- return false;
- }
if (lit < 2 || !isPowerOfTwo(lit)) {
return false;
}
@@ -1914,19 +1911,39 @@
// Avoid special cases.
return false;
}
+ bool div = (dalvikOpCode == OP_DIV_INT_LIT8 || dalvikOpCode == OP_DIV_INT_LIT16);
rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true);
- int tReg = dvmCompilerAllocTemp(cUnit);
- if (lit == 2) {
- // Division by 2 is by far the most common division by constant.
- opRegRegImm(cUnit, kOpLsr, tReg, rlSrc.lowReg, 32 - k);
- opRegRegReg(cUnit, kOpAdd, tReg, tReg, rlSrc.lowReg);
- opRegRegImm(cUnit, kOpAsr, rlResult.lowReg, tReg, k);
+ if (div) {
+ int tReg = dvmCompilerAllocTemp(cUnit);
+ if (lit == 2) {
+ // Division by 2 is by far the most common division by constant.
+ opRegRegImm(cUnit, kOpLsr, tReg, rlSrc.lowReg, 32 - k);
+ opRegRegReg(cUnit, kOpAdd, tReg, tReg, rlSrc.lowReg);
+ opRegRegImm(cUnit, kOpAsr, rlResult.lowReg, tReg, k);
+ } else {
+ opRegRegImm(cUnit, kOpAsr, tReg, rlSrc.lowReg, 31);
+ opRegRegImm(cUnit, kOpLsr, tReg, tReg, 32 - k);
+ opRegRegReg(cUnit, kOpAdd, tReg, tReg, rlSrc.lowReg);
+ opRegRegImm(cUnit, kOpAsr, rlResult.lowReg, tReg, k);
+ }
} else {
- opRegRegImm(cUnit, kOpAsr, tReg, rlSrc.lowReg, 31);
- opRegRegImm(cUnit, kOpLsr, tReg, tReg, 32 - k);
- opRegRegReg(cUnit, kOpAdd, tReg, tReg, rlSrc.lowReg);
- opRegRegImm(cUnit, kOpAsr, rlResult.lowReg, tReg, k);
+ int cReg = dvmCompilerAllocTemp(cUnit);
+ loadConstant(cUnit, cReg, lit - 1);
+ int tReg1 = dvmCompilerAllocTemp(cUnit);
+ int tReg2 = dvmCompilerAllocTemp(cUnit);
+ if (lit == 2) {
+ opRegRegImm(cUnit, kOpLsr, tReg1, rlSrc.lowReg, 32 - k);
+ opRegRegReg(cUnit, kOpAdd, tReg2, tReg1, rlSrc.lowReg);
+ opRegRegReg(cUnit, kOpAnd, tReg2, tReg2, cReg);
+ opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg2, tReg1);
+ } else {
+ opRegRegImm(cUnit, kOpAsr, tReg1, rlSrc.lowReg, 31);
+ opRegRegImm(cUnit, kOpLsr, tReg1, tReg1, 32 - k);
+ opRegRegReg(cUnit, kOpAdd, tReg2, tReg1, rlSrc.lowReg);
+ opRegRegReg(cUnit, kOpAnd, tReg2, tReg2, cReg);
+ opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg2, tReg1);
+ }
}
storeValue(cUnit, rlDest, rlResult);
return true;