Improved method invocation performance: 1.5x for virtual and 2.8x for interface.
- Implemented predicted chaining for invoke virtual and interface.
- Eliminated a little bit of fat for invoke native.
- Added 078-polymorphic-virtual for stress tests.
diff --git a/tests/078-polymorphic-virtual/expected.txt b/tests/078-polymorphic-virtual/expected.txt
new file mode 100644
index 0000000..0d29728
--- /dev/null
+++ b/tests/078-polymorphic-virtual/expected.txt
@@ -0,0 +1,3 @@
+10000000
+20000000
+30000000
diff --git a/tests/078-polymorphic-virtual/info.txt b/tests/078-polymorphic-virtual/info.txt
new file mode 100644
index 0000000..7c8a561
--- /dev/null
+++ b/tests/078-polymorphic-virtual/info.txt
@@ -0,0 +1,2 @@
+Stress test predicted chaining for overloaded virtual callsite with 3 resolved
+calless invoked 10,000,000 times each in three threads.
diff --git a/tests/078-polymorphic-virtual/src/Base.java b/tests/078-polymorphic-virtual/src/Base.java
new file mode 100644
index 0000000..ec3aadd
--- /dev/null
+++ b/tests/078-polymorphic-virtual/src/Base.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Base extends Thread {
+ int value;
+
+ public void run() {
+ for (int i = 0; i < 10000000; i++) {
+ incrimentValue();
+ }
+ }
+
+ public void incrimentValue() {
+ }
+
+ public int getValue() {
+ return value;
+ }
+}
diff --git a/tests/078-polymorphic-virtual/src/Derived1.java b/tests/078-polymorphic-virtual/src/Derived1.java
new file mode 100644
index 0000000..57bd3b0
--- /dev/null
+++ b/tests/078-polymorphic-virtual/src/Derived1.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Derived1 extends Base {
+ public void incrimentValue() {
+ value += 1;
+ }
+}
diff --git a/tests/078-polymorphic-virtual/src/Derived2.java b/tests/078-polymorphic-virtual/src/Derived2.java
new file mode 100644
index 0000000..1d7de57
--- /dev/null
+++ b/tests/078-polymorphic-virtual/src/Derived2.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Derived2 extends Base {
+ public void incrimentValue() {
+ value += 2;
+ }
+}
diff --git a/tests/078-polymorphic-virtual/src/Derived3.java b/tests/078-polymorphic-virtual/src/Derived3.java
new file mode 100644
index 0000000..c2594d2
--- /dev/null
+++ b/tests/078-polymorphic-virtual/src/Derived3.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Derived3 extends Base {
+ public void incrimentValue() {
+ value += 3;
+ }
+}
diff --git a/tests/078-polymorphic-virtual/src/Main.java b/tests/078-polymorphic-virtual/src/Main.java
new file mode 100644
index 0000000..0514e53
--- /dev/null
+++ b/tests/078-polymorphic-virtual/src/Main.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+ public static void main(String args[]) {
+ Derived1 derived1 = new Derived1();
+ Derived2 derived2 = new Derived2();
+ Derived3 derived3 = new Derived3();
+
+ derived1.start();
+ derived2.start();
+ derived3.start();
+
+ try {
+ derived1.join();
+ derived2.join();
+ derived3.join();
+ } catch (Exception e) {
+ System.out.println(e);
+ return;
+ }
+
+ System.out.println(derived1.getValue());
+ System.out.println(derived2.getValue());
+ System.out.println(derived3.getValue());
+ }
+}
diff --git a/tests/etc/local-run-test-jar b/tests/etc/local-run-test-jar
index ee3f856..6155e3f 100755
--- a/tests/etc/local-run-test-jar
+++ b/tests/etc/local-run-test-jar
@@ -6,6 +6,7 @@
# Options:
# --quiet -- don't chatter
# --fast -- use the fast interpreter (the default)
+# --jit -- use the jit
# --portable -- use the portable interpreter
# --debug -- wait for debugger to attach
# --valgrind -- use valgrind
@@ -32,6 +33,10 @@
if [ "x$1" = "x--quiet" ]; then
QUIET="y"
shift
+ elif [ "x$1" = "x--jit" ]; then
+ INTERP="jit"
+ msg "Using jit"
+ shift
elif [ "x$1" = "x--fast" ]; then
INTERP="fast"
msg "Using fast interpreter"
diff --git a/tests/etc/push-and-run-test-jar b/tests/etc/push-and-run-test-jar
index db7addc..df66a8e 100755
--- a/tests/etc/push-and-run-test-jar
+++ b/tests/etc/push-and-run-test-jar
@@ -6,6 +6,7 @@
# Options:
# --quiet -- don't chatter
# --fast -- use the fast interpreter (the default)
+# --jit -- use the jit
# --portable -- use the portable interpreter
# --debug -- wait for debugger to attach
# --zygote -- use the zygote (if so, all other options are ignored)
@@ -36,6 +37,10 @@
INTERP="fast"
msg "Using fast interpreter"
shift
+ elif [ "x$1" = "x--jit" ]; then
+ INTERP="jit"
+ msg "Using jit"
+ shift
elif [ "x$1" = "x--portable" ]; then
INTERP="portable"
msg "Using portable interpreter"
diff --git a/tests/run-test b/tests/run-test
index 25bfb4e..c5e2090 100755
--- a/tests/run-test
+++ b/tests/run-test
@@ -57,6 +57,9 @@
elif [ "x$1" = "x--reference" ]; then
RUN="${progdir}/etc/reference-run-test-classes"
shift
+ elif [ "x$1" = "x--jit" ]; then
+ run_args="${run_args} --jit"
+ shift
elif [ "x$1" = "x--fast" ]; then
run_args="${run_args} --fast"
shift
@@ -144,6 +147,7 @@
"current directory."
echo " Runtime Options:"
echo " --fast Use the fast interpreter (the default)."
+ echo " --jit Use the jit."
echo " --portable Use the portable interpreter."
echo " --debug Wait for a debugger to attach."
#echo " --gdb Run under gdb; incompatible with some tests."
diff --git a/vm/Globals.h b/vm/Globals.h
index d43a77e..e4ad4c6 100644
--- a/vm/Globals.h
+++ b/vm/Globals.h
@@ -621,6 +621,7 @@
extern struct DvmGlobals gDvm;
#if defined(WITH_JIT)
+
/*
* JIT-specific global state
*/
@@ -683,8 +684,9 @@
int normalExit;
int puntExit;
int translationChains;
- int invokeNoOpt;
- int InvokeChain;
+ int invokeChain;
+ int invokePredictedChain;
+ int invokeNative;
int returnOp;
/* Compiled code cache */
diff --git a/vm/SignalCatcher.c b/vm/SignalCatcher.c
index 495fbf2..309ad5d 100644
--- a/vm/SignalCatcher.c
+++ b/vm/SignalCatcher.c
@@ -264,6 +264,8 @@
} else if (rcvd == SIGUSR2) {
gDvmJit.printMe ^= true;
dvmCompilerDumpStats();
+ /* Stress-test unchain all */
+ dvmJitUnchainAll();
#endif
} else {
LOGE("unexpected signal %d\n", rcvd);
diff --git a/vm/compiler/CompilerIR.h b/vm/compiler/CompilerIR.h
index 8a2028a..201b0cc 100644
--- a/vm/compiler/CompilerIR.h
+++ b/vm/compiler/CompilerIR.h
@@ -23,7 +23,8 @@
/* For coding convenience reasons chaining cell types should appear first */
CHAINING_CELL_NORMAL = 0,
CHAINING_CELL_HOT,
- CHAINING_CELL_INVOKE,
+ CHAINING_CELL_INVOKE_SINGLETON,
+ CHAINING_CELL_INVOKE_PREDICTED,
CHAINING_CELL_LAST,
DALVIK_BYTECODE,
PC_RECONSTRUCTION,
diff --git a/vm/compiler/Frontend.c b/vm/compiler/Frontend.c
index 76d9312..77548d9 100644
--- a/vm/compiler/Frontend.c
+++ b/vm/compiler/Frontend.c
@@ -433,24 +433,31 @@
kInstrInvoke)) == 0;
/* Target block not included in the trace */
- if (targetOffset != curOffset && curBB->taken == NULL) {
+ if (curBB->taken == NULL &&
+ (isInvoke || (targetOffset != curOffset))) {
+ BasicBlock *newBB;
if (isInvoke) {
- lastBB->next = dvmCompilerNewBB(CHAINING_CELL_INVOKE);
+ /* Monomorphic callee */
+ if (callee) {
+ newBB = dvmCompilerNewBB(CHAINING_CELL_INVOKE_SINGLETON);
+ newBB->startOffset = 0;
+ newBB->containingMethod = callee;
+ /* Will resolve at runtime */
+ } else {
+ newBB = dvmCompilerNewBB(CHAINING_CELL_INVOKE_PREDICTED);
+ newBB->startOffset = 0;
+ }
/* For unconditional branches, request a hot chaining cell */
} else {
- lastBB->next = dvmCompilerNewBB(flags & kInstrUnconditional ?
+ newBB = dvmCompilerNewBB(flags & kInstrUnconditional ?
CHAINING_CELL_HOT :
CHAINING_CELL_NORMAL);
+ newBB->startOffset = targetOffset;
}
- lastBB = lastBB->next;
- lastBB->id = numBlocks++;
- if (isInvoke) {
- lastBB->startOffset = 0;
- lastBB->containingMethod = callee;
- } else {
- lastBB->startOffset = targetOffset;
- }
- curBB->taken = lastBB;
+ newBB->id = numBlocks++;
+ curBB->taken = newBB;
+ lastBB->next = newBB;
+ lastBB = newBB;
}
/* Fallthrough block not included in the trace */
diff --git a/vm/compiler/codegen/armv5te/ArchUtility.c b/vm/compiler/codegen/armv5te/ArchUtility.c
index 7d7f119..e45c572 100644
--- a/vm/compiler/codegen/armv5te/ArchUtility.c
+++ b/vm/compiler/codegen/armv5te/ArchUtility.c
@@ -167,8 +167,11 @@
case ARMV5TE_PSEUDO_CHAINING_CELL_HOT:
LOGD("-------- chaining cell (hot): 0x%04x\n", dest);
break;
- case ARMV5TE_PSEUDO_CHAINING_CELL_INVOKE:
- LOGD("-------- chaining cell (invoke): %s/%p\n",
+ case ARMV5TE_PSEUDO_CHAINING_CELL_INVOKE_PREDICTED:
+ LOGD("-------- chaining cell (predicted)\n");
+ break;
+ case ARMV5TE_PSEUDO_CHAINING_CELL_INVOKE_SINGLETON:
+ LOGD("-------- chaining cell (invoke singleton): %s/%p\n",
((Method *)dest)->name,
((Method *)dest)->insns);
break;
diff --git a/vm/compiler/codegen/armv5te/Armv5teLIR.h b/vm/compiler/codegen/armv5te/Armv5teLIR.h
index f0a3f42..c1dbf33 100644
--- a/vm/compiler/codegen/armv5te/Armv5teLIR.h
+++ b/vm/compiler/codegen/armv5te/Armv5teLIR.h
@@ -69,9 +69,10 @@
* Assemble.c.
*/
typedef enum Armv5teOpCode {
- ARMV5TE_PSEUDO_TARGET_LABEL = -10,
- ARMV5TE_PSEUDO_CHAINING_CELL_HOT = -9,
- ARMV5TE_PSEUDO_CHAINING_CELL_INVOKE = -8,
+ ARMV5TE_PSEUDO_TARGET_LABEL = -11,
+ ARMV5TE_PSEUDO_CHAINING_CELL_HOT = -10,
+ ARMV5TE_PSEUDO_CHAINING_CELL_INVOKE_PREDICTED = -9,
+ ARMV5TE_PSEUDO_CHAINING_CELL_INVOKE_SINGLETON = -8,
ARMV5TE_PSEUDO_CHAINING_CELL_NORMAL = -7,
ARMV5TE_PSEUDO_DALVIK_BYTECODE_BOUNDARY = -6,
ARMV5TE_PSEUDO_ALIGN4 = -5,
@@ -128,9 +129,9 @@
ARMV5TE_LSRV, /* lsr(2) [0100000011] rs[5..3] rd[2..0] */
ARMV5TE_MOV_IMM, /* mov(1) [00100] rd[10..8] imm_8[7..0] */
ARMV5TE_MOV_RR, /* mov(2) [0001110000] rn[5..3] rd[2..0] */
- ARMV5TE_MOV_RR_HL, /* mov(3) [01000110] H12[10] rm[5..3] rd[2..0] */
- ARMV5TE_MOV_RR_LH, /* mov(3) [01000101] H12[01] rm[5..3] rd[2..0] */
- ARMV5TE_MOV_RR_HH, /* mov(3) [01000111] H12[11] rm[5..3] rd[2..0] */
+ ARMV5TE_MOV_RR_H2H, /* mov(3) [01000111] H12[11] rm[5..3] rd[2..0] */
+ ARMV5TE_MOV_RR_H2L, /* mov(3) [01000110] H12[01] rm[5..3] rd[2..0] */
+ ARMV5TE_MOV_RR_L2H, /* mov(3) [01000101] H12[10] rm[5..3] rd[2..0] */
ARMV5TE_MUL, /* mul [0100001101] rm[5..3] rd[2..0] */
ARMV5TE_MVN, /* mvn [0100001111] rm[5..3] rd[2..0] */
ARMV5TE_NEG, /* neg [0100001001] rm[5..3] rd[2..0] */
@@ -195,6 +196,29 @@
int age; // default is 0, set lazily by the optimizer
} Armv5teLIR;
+/* Chain cell for predicted method invocation */
+typedef struct PredictedChainingCell {
+ u4 branch; /* Branch to chained destination */
+ const ClassObject *clazz; /* key #1 for prediction */
+ const Method *method; /* key #2 to lookup native PC from dalvik PC */
+ u4 counter; /* counter to patch the chaining cell */
+} PredictedChainingCell;
+
+/* Init values when a predicted chain is initially assembled */
+#define PREDICTED_CHAIN_BX_PAIR_INIT 0
+#define PREDICTED_CHAIN_CLAZZ_INIT 0
+#define PREDICTED_CHAIN_METHOD_INIT 0
+#define PREDICTED_CHAIN_COUNTER_INIT 0
+
+/* Used when the callee is not compiled yet */
+#define PREDICTED_CHAIN_COUNTER_DELAY 16
+
+/* Rechain after this many mis-predictions have happened */
+#define PREDICTED_CHAIN_COUNTER_RECHAIN 1024
+
+/* Used if the resolved callee is a native method */
+#define PREDICTED_CHAIN_COUNTER_AVOID 0x7fffffff
+
/* Utility macros to traverse the LIR/Armv5teLIR list */
#define NEXT_LIR(lir) ((Armv5teLIR *) lir->generic.next)
#define PREV_LIR(lir) ((Armv5teLIR *) lir->generic.prev)
diff --git a/vm/compiler/codegen/armv5te/Assemble.c b/vm/compiler/codegen/armv5te/Assemble.c
index 9b4595d..9642c7d 100644
--- a/vm/compiler/codegen/armv5te/Assemble.c
+++ b/vm/compiler/codegen/armv5te/Assemble.c
@@ -91,7 +91,7 @@
IS_BINARY_OP | CLOBBER_DEST,
"add", "r!0d, r!1d"),
ENCODING_MAP(ARMV5TE_ADD_PC_REL, 0xa000, 10, 8, 7, 0, -1, -1,
- IS_BINARY_OP | CLOBBER_DEST,
+ IS_TERTIARY_OP | CLOBBER_DEST,
"add", "r!0d, pc, #!1E"),
ENCODING_MAP(ARMV5TE_ADD_SP_REL, 0xa800, 10, 8, 7, 0, -1, -1,
IS_BINARY_OP | CLOBBER_DEST,
@@ -169,7 +169,7 @@
IS_TERTIARY_OP | CLOBBER_DEST,
"ldr", "r!0d, [r!1d, r!2d]"),
ENCODING_MAP(ARMV5TE_LDR_PC_REL, 0x4800, 10, 8, 7, 0, -1, -1,
- IS_BINARY_OP | CLOBBER_DEST,
+ IS_TERTIARY_OP | CLOBBER_DEST,
"ldr", "r!0d, [pc, #!1E]"),
ENCODING_MAP(ARMV5TE_LDR_SP_REL, 0x9800, 10, 8, 7, 0, -1, -1,
IS_BINARY_OP | CLOBBER_DEST,
@@ -210,15 +210,15 @@
ENCODING_MAP(ARMV5TE_MOV_RR, 0x1c00, 2, 0, 5, 3, -1, -1,
IS_BINARY_OP | CLOBBER_DEST,
"mov", "r!0d, r!1d"),
- ENCODING_MAP(ARMV5TE_MOV_RR_LH, 0x4640, 2, 0, 5, 3, -1, -1,
- IS_BINARY_OP | CLOBBER_DEST,
- "mov", "r!0D, r!1d"),
- ENCODING_MAP(ARMV5TE_MOV_RR_HL, 0x4680, 2, 0, 5, 3, -1, -1,
- IS_BINARY_OP | CLOBBER_DEST,
- "mov", "r!0d, r!1D"),
- ENCODING_MAP(ARMV5TE_MOV_RR_HH, 0x46c0, 2, 0, 5, 3, -1, -1,
+ ENCODING_MAP(ARMV5TE_MOV_RR_H2H, 0x46c0, 2, 0, 5, 3, -1, -1,
IS_BINARY_OP | CLOBBER_DEST,
"mov", "r!0D, r!1D"),
+ ENCODING_MAP(ARMV5TE_MOV_RR_H2L, 0x4640, 2, 0, 5, 3, -1, -1,
+ IS_BINARY_OP | CLOBBER_DEST,
+ "mov", "r!0d, r!1D"),
+ ENCODING_MAP(ARMV5TE_MOV_RR_L2H, 0x4680, 2, 0, 5, 3, -1, -1,
+ IS_BINARY_OP | CLOBBER_DEST,
+ "mov", "r!0D, r!1d"),
ENCODING_MAP(ARMV5TE_MUL, 0x4340, 2, 0, 5, 3, -1, -1,
IS_BINARY_OP | CLOBBER_DEST,
"mul", "r!0d, r!1d"),
@@ -335,7 +335,12 @@
lir->opCode == ARMV5TE_ADD_PC_REL) {
Armv5teLIR *lirTarget = (Armv5teLIR *) lir->generic.target;
intptr_t pc = (lir->generic.offset + 4) & ~3;
- intptr_t target = lirTarget->generic.offset;
+ /*
+ * Allow an offset (stored in operands[2] to be added to the
+ * PC-relative target. Useful to get to a fixed field inside a
+ * chaining cell.
+ */
+ intptr_t target = lirTarget->generic.offset + lir->operands[2];
int delta = target - pc;
if (delta & 0x3) {
LOGE("PC-rel distance is not multiples of 4: %d\n", delta);
@@ -468,7 +473,7 @@
/* Add space for chain cell counts & trace description */
u4 chainCellOffset = offset;
- Armv5teLIR *chainCellOffsetLIR = cUnit->chainCellOffsetLIR;
+ Armv5teLIR *chainCellOffsetLIR = (Armv5teLIR *) cUnit->chainCellOffsetLIR;
assert(chainCellOffsetLIR);
assert(chainCellOffset < 0x10000);
assert(chainCellOffsetLIR->opCode == ARMV5TE_16BIT_DATA &&
@@ -544,6 +549,21 @@
(long)(cUnit->baseAddr + offset), 0);
}
+static u4 assembleBXPair(int branchOffset)
+{
+ u4 thumb1, thumb2;
+
+ if ((branchOffset < -2048) | (branchOffset > 2046)) {
+ thumb1 = (0xf000 | ((branchOffset>>12) & 0x7ff));
+ thumb2 = (0xf800 | ((branchOffset>> 1) & 0x7ff));
+ } else {
+ thumb1 = (0xe000 | ((branchOffset>> 1) & 0x7ff));
+ thumb2 = 0x4300; /* nop -> or r0, r0 */
+ }
+
+ return thumb2<<16 | thumb1;
+}
+
/*
* Perform translation chain operation.
* For ARM, we'll use a pair of thumb instructions to generate
@@ -560,8 +580,6 @@
{
int baseAddr = (u4) branchAddr + 4;
int branchOffset = (int) tgtAddr - baseAddr;
- u4 thumb1;
- u4 thumb2;
u4 newInst;
if (gDvm.sumThreadSuspendCount == 0) {
@@ -572,15 +590,9 @@
COMPILER_TRACE_CHAINING(
LOGD("Jit Runtime: chaining 0x%x to 0x%x\n",
(int) branchAddr, (int) tgtAddr & -2));
- if ((branchOffset < -2048) | (branchOffset > 2046)) {
- thumb1 = (0xf000 | ((branchOffset>>12) & 0x7ff));
- thumb2 = (0xf800 | ((branchOffset>> 1) & 0x7ff));
- } else {
- thumb1 = (0xe000 | ((branchOffset>> 1) & 0x7ff));
- thumb2 = 0x4300; /* nop -> or r0, r0 */
- }
- newInst = thumb2<<16 | thumb1;
+ newInst = assembleBXPair(branchOffset);
+
*branchAddr = newInst;
cacheflush((long)branchAddr, (long)branchAddr + 4, 0);
}
@@ -589,6 +601,83 @@
}
/*
+ * This method is called from the invoke templates for virtual and interface
+ * methods to speculatively setup a chain to the callee. The templates are
+ * written in assembly and have setup method, cell, and clazz at r0, r2, and
+ * r3 respectively, so there is a unused argument in the list. Upon return one
+ * of the following three results may happen:
+ * 1) Chain is not setup because the callee is native. Reset the rechain
+ * count to a big number so that it will take a long time before the next
+ * rechain attempt to happen.
+ * 2) Chain is not setup because the callee has not been created yet. Reset
+ * the rechain count to a small number and retry in the near future.
+ * 3) Ask all other threads to stop before patching this chaining cell.
+ * This is required because another thread may have passed the class check
+ * but hasn't reached the chaining cell yet to follow the chain. If we
+ * patch the content before halting the other thread, there could be a
+ * small window for race conditions to happen that it may follow the new
+ * but wrong chain to invoke a different method.
+ */
+const Method *dvmJitToPatchPredictedChain(const Method *method,
+ void *unused,
+ PredictedChainingCell *cell,
+ const ClassObject *clazz)
+{
+ /* Don't come back here for a long time if the method is native */
+ if (dvmIsNativeMethod(method)) {
+ cell->counter = PREDICTED_CHAIN_COUNTER_AVOID;
+ cacheflush((long) cell, (long) (cell+1), 0);
+ COMPILER_TRACE_CHAINING(
+ LOGD("Jit Runtime: predicted chain %p to native method %s ignored",
+ cell, method->name));
+ goto done;
+ }
+ int tgtAddr = (int) dvmJitGetCodeAddr(method->insns);
+
+ /*
+ * Compilation not made yet for the callee. Reset the counter to a small
+ * value and come back to check soon.
+ */
+ if (tgtAddr == 0) {
+ /*
+ * Wait for a few invocations (currently set to be 16) before trying
+ * to setup the chain again.
+ */
+ cell->counter = PREDICTED_CHAIN_COUNTER_DELAY;
+ cacheflush((long) cell, (long) (cell+1), 0);
+ COMPILER_TRACE_CHAINING(
+ LOGD("Jit Runtime: predicted chain %p to method %s delayed",
+ cell, method->name));
+ goto done;
+ }
+
+ /* Stop the world */
+ dvmSuspendAllThreads(SUSPEND_FOR_JIT);
+
+ int baseAddr = (int) cell + 4; // PC is cur_addr + 4
+ int branchOffset = tgtAddr - baseAddr;
+
+ COMPILER_TRACE_CHAINING(
+ LOGD("Jit Runtime: predicted chain %p from %s to %s (%s) patched",
+ cell, cell->clazz ? cell->clazz->descriptor : "NULL",
+ clazz->descriptor,
+ method->name));
+
+ cell->branch = assembleBXPair(branchOffset);
+ cell->clazz = clazz;
+ cell->method = method;
+ cell->counter = PREDICTED_CHAIN_COUNTER_RECHAIN;
+
+ cacheflush((long) cell, (long) (cell+1), 0);
+
+ /* All done - resume all other threads */
+ dvmResumeAllThreads(SUSPEND_FOR_JIT);
+
+done:
+ return method;
+}
+
+/*
* Unchain a trace given the starting address of the translation
* in the code cache. Refer to the diagram in dvmCompilerAssembleLIR.
* Returns the address following the last cell unchained. Note that
@@ -601,24 +690,34 @@
u2 chainCellOffset = *pChainCellOffset;
ChainCellCounts *pChainCellCounts =
(ChainCellCounts*)((char*)codeAddr + chainCellOffset -3);
- int cellCount;
+ int cellSize;
u4* pChainCells;
u4* pStart;
u4 thumb1;
u4 thumb2;
u4 newInst;
int i,j;
+ PredictedChainingCell *predChainCell;
/* Get total count of chain cells */
- for (i = 0, cellCount = 0; i < CHAINING_CELL_LAST; i++) {
- cellCount += pChainCellCounts->u.count[i];
+ for (i = 0, cellSize = 0; i < CHAINING_CELL_LAST; i++) {
+ if (i != CHAINING_CELL_INVOKE_PREDICTED) {
+ cellSize += pChainCellCounts->u.count[i] * 2;
+ } else {
+ cellSize += pChainCellCounts->u.count[i] * 4;
+ }
}
/* Locate the beginning of the chain cell region */
- pStart = pChainCells = (u4*)((char*)pChainCellCounts - (cellCount * 8));
+ pStart = pChainCells = ((u4 *) pChainCellCounts) - cellSize;
/* The cells are sorted in order - walk through them and reset */
for (i = 0; i < CHAINING_CELL_LAST; i++) {
+ int elemSize = 2; /* Most chaining cell has two words */
+ if (i == CHAINING_CELL_INVOKE_PREDICTED) {
+ elemSize = 4;
+ }
+
for (j = 0; j < pChainCellCounts->u.count[i]; j++) {
int targetOffset;
switch(i) {
@@ -627,26 +726,38 @@
jitToInterpEntries.dvmJitToInterpNormal);
break;
case CHAINING_CELL_HOT:
- case CHAINING_CELL_INVOKE:
+ case CHAINING_CELL_INVOKE_SINGLETON:
targetOffset = offsetof(InterpState,
jitToInterpEntries.dvmJitToTraceSelect);
break;
+ case CHAINING_CELL_INVOKE_PREDICTED:
+ targetOffset = 0;
+ predChainCell = (PredictedChainingCell *) pChainCells;
+ /* Reset the cell to the init state */
+ predChainCell->branch = PREDICTED_CHAIN_BX_PAIR_INIT;
+ predChainCell->clazz = PREDICTED_CHAIN_CLAZZ_INIT;
+ predChainCell->method = PREDICTED_CHAIN_METHOD_INIT;
+ predChainCell->counter = PREDICTED_CHAIN_COUNTER_INIT;
+ break;
default:
dvmAbort();
}
+ COMPILER_TRACE_CHAINING(
+ LOGD("Jit Runtime: unchaining 0x%x", (int)pChainCells));
/*
- * Arm code sequence for a chaining cell is:
+ * Thumb code sequence for a chaining cell is:
* ldr r0, rGLUE, #<word offset>
* blx r0
*/
- COMPILER_TRACE_CHAINING(
- LOGD("Jit Runtime: unchaining 0x%x", (int)pChainCells));
- targetOffset = targetOffset >> 2; /* convert to word offset */
- thumb1 = 0x6800 | (targetOffset << 6) | (rGLUE << 3) | (r0 << 0);
- thumb2 = 0x4780 | (r0 << 3);
- newInst = thumb2<<16 | thumb1;
- *pChainCells = newInst;
- pChainCells += 2; /* Advance by 2 words */
+ if (i != CHAINING_CELL_INVOKE_PREDICTED) {
+ targetOffset = targetOffset >> 2; /* convert to word offset */
+ thumb1 = 0x6800 | (targetOffset << 6) |
+ (rGLUE << 3) | (r0 << 0);
+ thumb2 = 0x4780 | (r0 << 3);
+ newInst = thumb2<<16 | thumb1;
+ *pChainCells = newInst;
+ }
+ pChainCells += elemSize; /* Advance by a fixed number of words */
}
}
return pChainCells;
diff --git a/vm/compiler/codegen/armv5te/Codegen.c b/vm/compiler/codegen/armv5te/Codegen.c
index 10589e1..3ba3cc6 100644
--- a/vm/compiler/codegen/armv5te/Codegen.c
+++ b/vm/compiler/codegen/armv5te/Codegen.c
@@ -325,7 +325,7 @@
{
genDispatchToHandler(cUnit, TEMPLATE_RETURN);
#if defined(INVOKE_STATS)
- gDvmJit.jitReturn++;
+ gDvmJit.returnOp++;
#endif
int dPC = (int) (cUnit->method->insns + mir->offset);
Armv5teLIR *branch = newLIR0(cUnit, ARMV5TE_B_UNCOND);
@@ -355,7 +355,7 @@
if (vSrc <= 64) {
/* Sneak 4 into the base address first */
newLIR3(cUnit, ARMV5TE_ADD_RRI3, rDestLo, rFP, 4);
- newLIR2(cUnit, ARMV5TE_ADD_RI8, rDestHi, (vSrc-1)*4);
+ newLIR2(cUnit, ARMV5TE_ADD_RI8, rDestLo, (vSrc-1)*4);
} else {
/* Offset too far from rFP */
loadConstant(cUnit, rDestLo, vSrc*4);
@@ -988,7 +988,7 @@
loadConstant(cUnit, reg3, 0);
newLIR3(cUnit, ARMV5TE_SUB_RRR, reg2, reg3, reg0);
newLIR2(cUnit, ARMV5TE_SBC, reg3, reg1);
- storeValuePair(cUnit, r0, reg3, vDest, reg0);
+ storeValuePair(cUnit, reg2, reg3, vDest, reg0);
return false;
}
default:
@@ -1323,40 +1323,225 @@
}
}
-static void genInvokeCommon(CompilationUnit *cUnit, MIR *mir, BasicBlock *bb,
- Armv5teLIR *labelList, Armv5teLIR *pcrLabel,
- const Method *calleeMethod)
+/*
+ * Generate code to setup the call stack then jump to the chaining cell if it
+ * is not a native method.
+ */
+static void genInvokeSingletonCommon(CompilationUnit *cUnit, MIR *mir,
+ BasicBlock *bb, Armv5teLIR *labelList,
+ Armv5teLIR *pcrLabel,
+ const Method *calleeMethod)
{
Armv5teLIR *retChainingCell = &labelList[bb->fallThrough->id];
/* r1 = &retChainingCell */
- Armv5teLIR *addrRetChain = newLIR2(cUnit, ARMV5TE_ADD_PC_REL,
- r1, 0);
+ Armv5teLIR *addrRetChain = newLIR3(cUnit, ARMV5TE_ADD_PC_REL,
+ r1, 0, 0);
/* r4PC = dalvikCallsite */
loadConstant(cUnit, r4PC,
(int) (cUnit->method->insns + mir->offset));
addrRetChain->generic.target = (LIR *) retChainingCell;
/*
- * r0 = calleeMethod (loaded upon calling genInvokeCommon)
+ * r0 = calleeMethod (loaded upon calling genInvokeSingletonCommon)
* r1 = &ChainingCell
* r4PC = callsiteDPC
*/
if (dvmIsNativeMethod(calleeMethod)) {
- genDispatchToHandler(cUnit, TEMPLATE_INVOKE_METHOD_NO_OPT);
+ genDispatchToHandler(cUnit, TEMPLATE_INVOKE_METHOD_NATIVE);
#if defined(INVOKE_STATS)
- gDvmJit.invokeNoOpt++;
+ gDvmJit.invokeNative++;
#endif
} else {
genDispatchToHandler(cUnit, TEMPLATE_INVOKE_METHOD_CHAIN);
#if defined(INVOKE_STATS)
gDvmJit.invokeChain++;
#endif
+ /* Branch to the chaining cell */
genUnconditionalBranch(cUnit, &labelList[bb->taken->id]);
}
/* Handle exceptions using the interpreter */
genTrap(cUnit, mir->offset, pcrLabel);
}
+/*
+ * Generate code to check the validity of a predicted chain and take actions
+ * based on the result.
+ *
+ * 0x426a99aa : ldr r4, [pc, #72] --> r4 <- dalvikPC of this invoke
+ * 0x426a99ac : add r1, pc, #32 --> r1 <- &retChainingCell
+ * 0x426a99ae : add r2, pc, #40 --> r2 <- &predictedChainingCell
+ * 0x426a99b0 : blx_1 0x426a918c --+ TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN
+ * 0x426a99b2 : blx_2 see above --+
+ * 0x426a99b4 : b 0x426a99d8 --> off to the predicted chain
+ * 0x426a99b6 : b 0x426a99c8 --> punt to the interpreter
+ * 0x426a99b8 : ldr r0, [r7, #44] --> r0 <- this->class->vtable[methodIdx]
+ * 0x426a99ba : cmp r1, #0 --> compare r1 (rechain count) against 0
+ * 0x426a99bc : bgt 0x426a99c2 --> >=0? don't rechain
+ * 0x426a99be : ldr r7, [r6, #96] --+ dvmJitToPatchPredictedChain
+ * 0x426a99c0 : blx r7 --+
+ * 0x426a99c2 : add r1, pc, #12 --> r1 <- &retChainingCell
+ * 0x426a99c4 : blx_1 0x426a9098 --+ TEMPLATE_INVOKE_METHOD_NO_OPT
+ * 0x426a99c6 : blx_2 see above --+
+ */
+static void genInvokeVirtualCommon(CompilationUnit *cUnit, MIR *mir,
+ int methodIndex,
+ Armv5teLIR *retChainingCell,
+ Armv5teLIR *predChainingCell,
+ Armv5teLIR *pcrLabel)
+{
+ /* "this" is already left in r0 by genProcessArgs* */
+
+ /* r4PC = dalvikCallsite */
+ loadConstant(cUnit, r4PC,
+ (int) (cUnit->method->insns + mir->offset));
+
+ /* r1 = &retChainingCell */
+ Armv5teLIR *addrRetChain = newLIR2(cUnit, ARMV5TE_ADD_PC_REL,
+ r1, 0);
+ addrRetChain->generic.target = (LIR *) retChainingCell;
+
+ /* r2 = &predictedChainingCell */
+ Armv5teLIR *predictedChainingCell =
+ newLIR2(cUnit, ARMV5TE_ADD_PC_REL, r2, 0);
+ predictedChainingCell->generic.target = (LIR *) predChainingCell;
+
+ genDispatchToHandler(cUnit, TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN);
+
+ /* return through lr - jump to the chaining cell */
+ genUnconditionalBranch(cUnit, predChainingCell);
+
+ /*
+ * null-check on "this" may have been eliminated, but we still need a PC-
+ * reconstruction label for stack overflow bailout.
+ */
+ if (pcrLabel == NULL) {
+ int dPC = (int) (cUnit->method->insns + mir->offset);
+ pcrLabel = dvmCompilerNew(sizeof(Armv5teLIR), true);
+ pcrLabel->opCode = ARMV5TE_PSEUDO_PC_RECONSTRUCTION_CELL;
+ pcrLabel->operands[0] = dPC;
+ pcrLabel->operands[1] = mir->offset;
+ /* Insert the place holder to the growable list */
+ dvmInsertGrowableList(&cUnit->pcReconstructionList, pcrLabel);
+ }
+
+ /* return through lr+2 - punt to the interpreter */
+ genUnconditionalBranch(cUnit, pcrLabel);
+
+ /*
+ * return through lr+4 - fully resolve the callee method.
+ * r1 <- count
+ * r2 <- &predictedChainCell
+ * r3 <- this->class
+ * r4 <- dPC
+ * r7 <- this->class->vtable
+ */
+
+ /* r0 <- calleeMethod */
+ if (methodIndex < 32) {
+ newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r7, methodIndex);
+ } else {
+ loadConstant(cUnit, r0, methodIndex<<2);
+ newLIR3(cUnit, ARMV5TE_LDR_RRR, r0, r7, r0);
+ }
+
+ /* Check if rechain limit is reached */
+ newLIR2(cUnit, ARMV5TE_CMP_RI8, r1, 0);
+
+ Armv5teLIR *bypassRechaining =
+ newLIR2(cUnit, ARMV5TE_B_COND, 0, ARM_COND_GT);
+
+ newLIR3(cUnit, ARMV5TE_LDR_RRI5, r7, rGLUE,
+ offsetof(InterpState,
+ jitToInterpEntries.dvmJitToPatchPredictedChain)
+ >> 2);
+
+ /*
+ * r0 = calleeMethod
+ * r2 = &predictedChainingCell
+ * r3 = class
+ *
+ * &returnChainingCell has been loaded into r1 but is not needed
+ * when patching the chaining cell and will be clobbered upon
+ * returning so it will be reconstructed again.
+ */
+ newLIR1(cUnit, ARMV5TE_BLX_R, r7);
+
+ /* r1 = &retChainingCell */
+ addrRetChain = newLIR3(cUnit, ARMV5TE_ADD_PC_REL, r1, 0, 0);
+ addrRetChain->generic.target = (LIR *) retChainingCell;
+
+ bypassRechaining->generic.target = (LIR *) addrRetChain;
+ /*
+ * r0 = calleeMethod,
+ * r1 = &ChainingCell,
+ * r4PC = callsiteDPC,
+ */
+ genDispatchToHandler(cUnit, TEMPLATE_INVOKE_METHOD_NO_OPT);
+#if defined(INVOKE_STATS)
+ gDvmJit.invokePredictedChain++;
+#endif
+ /* Handle exceptions using the interpreter */
+ genTrap(cUnit, mir->offset, pcrLabel);
+}
+
+/*
+ * Up calling this function, "this" is stored in r0. The actual class will be
+ * chased down off r0 and the predicted one will be retrieved through
+ * predictedChainingCell then a comparison is performed to see whether the
+ * previously established chaining is still valid.
+ *
+ * The return LIR is a branch based on the comparison result. The actual branch
+ * target will be setup in the caller.
+ */
+static Armv5teLIR *genCheckPredictedChain(CompilationUnit *cUnit,
+ Armv5teLIR *predChainingCell,
+ Armv5teLIR *retChainingCell,
+ MIR *mir)
+{
+ /* r3 now contains this->clazz */
+ newLIR3(cUnit, ARMV5TE_LDR_RRI5, r3, r0,
+ offsetof(Object, clazz) >> 2);
+
+ /*
+ * r2 now contains predicted class. The starting offset of the
+ * cached value is 4 bytes into the chaining cell.
+ */
+ Armv5teLIR *getPredictedClass =
+ newLIR3(cUnit, ARMV5TE_LDR_PC_REL, r2, 0,
+ offsetof(PredictedChainingCell, clazz));
+ getPredictedClass->generic.target = (LIR *) predChainingCell;
+
+ /*
+ * r0 now contains predicted method. The starting offset of the
+ * cached value is 8 bytes into the chaining cell.
+ */
+ Armv5teLIR *getPredictedMethod =
+ newLIR3(cUnit, ARMV5TE_LDR_PC_REL, r0, 0,
+ offsetof(PredictedChainingCell, method));
+ getPredictedMethod->generic.target = (LIR *) predChainingCell;
+
+ /* Load the stats counter to see if it is time to unchain and refresh */
+ Armv5teLIR *getRechainingRequestCount =
+ newLIR3(cUnit, ARMV5TE_LDR_PC_REL, r7, 0,
+ offsetof(PredictedChainingCell, counter));
+ getRechainingRequestCount->generic.target =
+ (LIR *) predChainingCell;
+
+ /* r4PC = dalvikCallsite */
+ loadConstant(cUnit, r4PC,
+ (int) (cUnit->method->insns + mir->offset));
+
+ /* r1 = &retChainingCell */
+ Armv5teLIR *addrRetChain = newLIR3(cUnit, ARMV5TE_ADD_PC_REL,
+ r1, 0, 0);
+ addrRetChain->generic.target = (LIR *) retChainingCell;
+
+ /* Check if r2 (predicted class) == r3 (actual class) */
+ newLIR2(cUnit, ARMV5TE_CMP_RR, r2, r3);
+
+ return newLIR2(cUnit, ARMV5TE_B_COND, 0, ARM_COND_EQ);
+}
+
/* Geneate a branch to go back to the interpreter */
static void genPuntToInterp(CompilationUnit *cUnit, unsigned int offset)
{
@@ -2449,6 +2634,7 @@
*/
case OP_INVOKE_VIRTUAL:
case OP_INVOKE_VIRTUAL_RANGE: {
+ Armv5teLIR *predChainingCell = &labelList[bb->taken->id];
int methodIndex =
cUnit->method->clazz->pDvmDex->pResMethods[dInsn->vB]->
methodIndex;
@@ -2458,39 +2644,10 @@
else
genProcessArgsRange(cUnit, mir, dInsn, &pcrLabel);
- /* r0 now contains this->clazz */
- newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0,
- offsetof(Object, clazz) >> 2);
- /* r1 = &retChainingCell */
- Armv5teLIR *addrRetChain = newLIR2(cUnit, ARMV5TE_ADD_PC_REL,
- r1, 0);
- /* r4PC = dalvikCallsite */
- loadConstant(cUnit, r4PC,
- (int) (cUnit->method->insns + mir->offset));
-
- /* r0 now contains this->clazz->vtable */
- newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0,
- offsetof(ClassObject, vtable) >> 2);
- addrRetChain->generic.target = (LIR *) retChainingCell;
-
- if (methodIndex < 32) {
- newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0, methodIndex);
- } else {
- loadConstant(cUnit, r7, methodIndex<<2);
- newLIR3(cUnit, ARMV5TE_LDR_RRR, r0, r0, r7);
- }
-
- /*
- * r0 = calleeMethod,
- * r1 = &ChainingCell,
- * r4PC = callsiteDPC,
- */
- genDispatchToHandler(cUnit, TEMPLATE_INVOKE_METHOD_NO_OPT);
-#if defined(INVOKE_STATS)
- gDvmJit.invokeNoOpt++;
-#endif
- /* Handle exceptions using the interpreter */
- genTrap(cUnit, mir->offset, pcrLabel);
+ genInvokeVirtualCommon(cUnit, mir, methodIndex,
+ retChainingCell,
+ predChainingCell,
+ pcrLabel);
break;
}
/*
@@ -2513,8 +2670,8 @@
/* r0 = calleeMethod */
loadConstant(cUnit, r0, (int) calleeMethod);
- genInvokeCommon(cUnit, mir, bb, labelList, pcrLabel,
- calleeMethod);
+ genInvokeSingletonCommon(cUnit, mir, bb, labelList, pcrLabel,
+ calleeMethod);
break;
}
/* calleeMethod = method->clazz->pDvmDex->pResMethods[BBBB] */
@@ -2531,8 +2688,8 @@
/* r0 = calleeMethod */
loadConstant(cUnit, r0, (int) calleeMethod);
- genInvokeCommon(cUnit, mir, bb, labelList, pcrLabel,
- calleeMethod);
+ genInvokeSingletonCommon(cUnit, mir, bb, labelList, pcrLabel,
+ calleeMethod);
break;
}
/* calleeMethod = method->clazz->pDvmDex->pResMethods[BBBB] */
@@ -2551,16 +2708,77 @@
/* r0 = calleeMethod */
loadConstant(cUnit, r0, (int) calleeMethod);
- genInvokeCommon(cUnit, mir, bb, labelList, pcrLabel,
- calleeMethod);
+ genInvokeSingletonCommon(cUnit, mir, bb, labelList, pcrLabel,
+ calleeMethod);
break;
}
/*
* calleeMethod = dvmFindInterfaceMethodInCache(this->clazz,
* BBBB, method, method->clazz->pDvmDex)
+ *
+ * Given "invoke-interface {v0}", the following is the generated code:
+ *
+ * 0x426a9abe : ldr r0, [r5, #0] --+
+ * 0x426a9ac0 : mov r7, r5 |
+ * 0x426a9ac2 : sub r7, #24 |
+ * 0x426a9ac4 : cmp r0, #0 | genProcessArgsNoRange
+ * 0x426a9ac6 : beq 0x426a9afe |
+ * 0x426a9ac8 : stmia r7, <r0> --+
+ * 0x426a9aca : ldr r4, [pc, #104] --> r4 <- dalvikPC of this invoke
+ * 0x426a9acc : add r1, pc, #52 --> r1 <- &retChainingCell
+ * 0x426a9ace : add r2, pc, #60 --> r2 <- &predictedChainingCell
+ * 0x426a9ad0 : blx_1 0x426a918c --+ TEMPLATE_INVOKE_METHOD_
+ * 0x426a9ad2 : blx_2 see above --+ PREDICTED_CHAIN
+ * 0x426a9ad4 : b 0x426a9b0c --> off to the predicted chain
+ * 0x426a9ad6 : b 0x426a9afe --> punt to the interpreter
+ * 0x426a9ad8 : mov r9, r1 --+
+ * 0x426a9ada : mov r10, r2 |
+ * 0x426a9adc : mov r12, r3 |
+ * 0x426a9ade : mov r0, r3 |
+ * 0x426a9ae0 : mov r1, #74 | dvmFindInterfaceMethodInCache
+ * 0x426a9ae2 : ldr r2, [pc, #76] |
+ * 0x426a9ae4 : ldr r3, [pc, #68] |
+ * 0x426a9ae6 : ldr r7, [pc, #64] |
+ * 0x426a9ae8 : blx r7 --+
+ * 0x426a9aea : mov r1, r9 --> r1 <- rechain count
+ * 0x426a9aec : cmp r1, #0 --> compare against 0
+ * 0x426a9aee : bgt 0x426a9af8 --> >=0? don't rechain
+ * 0x426a9af0 : ldr r7, [r6, #96] --+
+ * 0x426a9af2 : mov r2, r10 | dvmJitToPatchPredictedChain
+ * 0x426a9af4 : mov r3, r12 |
+ * 0x426a9af6 : blx r7 --+
+ * 0x426a9af8 : add r1, pc, #8 --> r1 <- &retChainingCell
+ * 0x426a9afa : blx_1 0x426a9098 --+ TEMPLATE_INVOKE_METHOD_NO_OPT
+ * 0x426a9afc : blx_2 see above --+
+ * -------- reconstruct dalvik PC : 0x428b786c @ +0x001e
+ * 0x426a9afe (0042): ldr r0, [pc, #52]
+ * Exception_Handling:
+ * 0x426a9b00 (0044): ldr r1, [r6, #84]
+ * 0x426a9b02 (0046): blx r1
+ * 0x426a9b04 (0048): .align4
+ * -------- chaining cell (hot): 0x0021
+ * 0x426a9b04 (0048): ldr r0, [r6, #92]
+ * 0x426a9b06 (004a): blx r0
+ * 0x426a9b08 (004c): data 0x7872(30834)
+ * 0x426a9b0a (004e): data 0x428b(17035)
+ * 0x426a9b0c (0050): .align4
+ * -------- chaining cell (predicted)
+ * 0x426a9b0c (0050): data 0x0000(0) --> will be patched into bx
+ * 0x426a9b0e (0052): data 0x0000(0)
+ * 0x426a9b10 (0054): data 0x0000(0) --> class
+ * 0x426a9b12 (0056): data 0x0000(0)
+ * 0x426a9b14 (0058): data 0x0000(0) --> method
+ * 0x426a9b16 (005a): data 0x0000(0)
+ * 0x426a9b18 (005c): data 0x0000(0) --> reset count
+ * 0x426a9b1a (005e): data 0x0000(0)
+ * 0x426a9b28 (006c): .word (0xad0392a5)
+ * 0x426a9b2c (0070): .word (0x6e750)
+ * 0x426a9b30 (0074): .word (0x4109a618)
+ * 0x426a9b34 (0078): .word (0x428b786c)
*/
case OP_INVOKE_INTERFACE:
case OP_INVOKE_INTERFACE_RANGE: {
+ Armv5teLIR *predChainingCell = &labelList[bb->taken->id];
int methodIndex = dInsn->vB;
if (mir->dalvikInsn.opCode == OP_INVOKE_INTERFACE)
@@ -2568,9 +2786,60 @@
else
genProcessArgsRange(cUnit, mir, dInsn, &pcrLabel);
+ /* "this" is already left in r0 by genProcessArgs* */
+
+ /* r4PC = dalvikCallsite */
+ loadConstant(cUnit, r4PC,
+ (int) (cUnit->method->insns + mir->offset));
+
+ /* r1 = &retChainingCell */
+ Armv5teLIR *addrRetChain = newLIR2(cUnit, ARMV5TE_ADD_PC_REL,
+ r1, 0);
+ addrRetChain->generic.target = (LIR *) retChainingCell;
+
+ /* r2 = &predictedChainingCell */
+ Armv5teLIR *predictedChainingCell =
+ newLIR2(cUnit, ARMV5TE_ADD_PC_REL, r2, 0);
+ predictedChainingCell->generic.target = (LIR *) predChainingCell;
+
+ genDispatchToHandler(cUnit, TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN);
+
+ /* return through lr - jump to the chaining cell */
+ genUnconditionalBranch(cUnit, predChainingCell);
+
+ /*
+ * null-check on "this" may have been eliminated, but we still need
+ * a PC-reconstruction label for stack overflow bailout.
+ */
+ if (pcrLabel == NULL) {
+ int dPC = (int) (cUnit->method->insns + mir->offset);
+ pcrLabel = dvmCompilerNew(sizeof(Armv5teLIR), true);
+ pcrLabel->opCode = ARMV5TE_PSEUDO_PC_RECONSTRUCTION_CELL;
+ pcrLabel->operands[0] = dPC;
+ pcrLabel->operands[1] = mir->offset;
+ /* Insert the place holder to the growable list */
+ dvmInsertGrowableList(&cUnit->pcReconstructionList, pcrLabel);
+ }
+
+ /* return through lr+2 - punt to the interpreter */
+ genUnconditionalBranch(cUnit, pcrLabel);
+
+ /*
+ * return through lr+4 - fully resolve the callee method.
+ * r1 <- count
+ * r2 <- &predictedChainCell
+ * r3 <- this->class
+ * r4 <- dPC
+ * r7 <- this->class->vtable
+ */
+
+ /* Save count, &predictedChainCell, and class to high regs first */
+ newLIR2(cUnit, ARMV5TE_MOV_RR_L2H, r9 & THUMB_REG_MASK, r1);
+ newLIR2(cUnit, ARMV5TE_MOV_RR_L2H, r10 & THUMB_REG_MASK, r2);
+ newLIR2(cUnit, ARMV5TE_MOV_RR_L2H, r12 & THUMB_REG_MASK, r3);
+
/* r0 now contains this->clazz */
- newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0,
- offsetof(Object, clazz) >> 2);
+ newLIR2(cUnit, ARMV5TE_MOV_RR, r0, r3);
/* r1 = BBBB */
loadConstant(cUnit, r1, dInsn->vB);
@@ -2587,14 +2856,40 @@
/* r0 = calleeMethod (returned from dvmFindInterfaceMethodInCache */
- /* r1 = &retChainingCell */
- Armv5teLIR *addrRetChain = newLIR2(cUnit, ARMV5TE_ADD_PC_REL,
- r1, 0);
- /* r4PC = dalvikCallsite */
- loadConstant(cUnit, r4PC,
- (int) (cUnit->method->insns + mir->offset));
+ newLIR2(cUnit, ARMV5TE_MOV_RR_H2L, r1, r9 & THUMB_REG_MASK);
+ /* Check if rechain limit is reached */
+ newLIR2(cUnit, ARMV5TE_CMP_RI8, r1, 0);
+
+ Armv5teLIR *bypassRechaining =
+ newLIR2(cUnit, ARMV5TE_B_COND, 0, ARM_COND_GT);
+
+ newLIR3(cUnit, ARMV5TE_LDR_RRI5, r7, rGLUE,
+ offsetof(InterpState,
+ jitToInterpEntries.dvmJitToPatchPredictedChain)
+ >> 2);
+
+ newLIR2(cUnit, ARMV5TE_MOV_RR_H2L, r2, r10 & THUMB_REG_MASK);
+ newLIR2(cUnit, ARMV5TE_MOV_RR_H2L, r3, r12 & THUMB_REG_MASK);
+
+ /*
+ * r0 = calleeMethod
+ * r2 = &predictedChainingCell
+ * r3 = class
+ *
+ * &returnChainingCell has been loaded into r1 but is not needed
+ * when patching the chaining cell and will be clobbered upon
+ * returning so it will be reconstructed again.
+ */
+ newLIR1(cUnit, ARMV5TE_BLX_R, r7);
+
+ /* r1 = &retChainingCell */
+ addrRetChain = newLIR3(cUnit, ARMV5TE_ADD_PC_REL,
+ r1, 0, 0);
addrRetChain->generic.target = (LIR *) retChainingCell;
+
+ bypassRechaining->generic.target = (LIR *) addrRetChain;
+
/*
* r0 = this, r1 = calleeMethod,
* r1 = &ChainingCell,
@@ -2602,7 +2897,7 @@
*/
genDispatchToHandler(cUnit, TEMPLATE_INVOKE_METHOD_NO_OPT);
#if defined(INVOKE_STATS)
- gDvmJit.invokeNoOpt++;
+ gDvmJit.invokePredictedChain++;
#endif
/* Handle exceptions using the interpreter */
genTrap(cUnit, mir->offset, pcrLabel);
@@ -2628,6 +2923,7 @@
BasicBlock *bb, Armv5teLIR *labelList)
{
Armv5teLIR *retChainingCell = &labelList[bb->fallThrough->id];
+ Armv5teLIR *predChainingCell = &labelList[bb->taken->id];
Armv5teLIR *pcrLabel = NULL;
DecodedInstruction *dInsn = &mir->dalvikInsn;
@@ -2641,37 +2937,10 @@
else
genProcessArgsRange(cUnit, mir, dInsn, &pcrLabel);
- /* r0 now contains this->clazz */
- newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0,
- offsetof(Object, clazz) >> 2);
- /* r1 = &retChainingCell */
- Armv5teLIR *addrRetChain = newLIR2(cUnit, ARMV5TE_ADD_PC_REL,
- r1, 0);
- /* r4PC = dalvikCallsite */
- loadConstant(cUnit, r4PC,
- (int) (cUnit->method->insns + mir->offset));
-
- /* r0 now contains this->clazz->vtable */
- newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0,
- offsetof(ClassObject, vtable) >> 2);
- addrRetChain->generic.target = (LIR *) retChainingCell;
-
- if (methodIndex < 32) {
- newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, r0, methodIndex);
- } else {
- loadConstant(cUnit, r7, methodIndex<<2);
- newLIR3(cUnit, ARMV5TE_LDR_RRR, r0, r0, r7);
- }
-
- /*
- * r0 = calleeMethod,
- * r1 = &ChainingCell,
- * r4PC = callsiteDPC,
- */
- genDispatchToHandler(cUnit, TEMPLATE_INVOKE_METHOD_NO_OPT);
-#if defined(INVOKE_STATS)
- gDvmJit.invokeNoOpt++;
-#endif
+ genInvokeVirtualCommon(cUnit, mir, methodIndex,
+ retChainingCell,
+ predChainingCell,
+ pcrLabel);
break;
}
/* calleeMethod = method->clazz->super->vtable[BBBB] */
@@ -2688,16 +2957,15 @@
/* r0 = calleeMethod */
loadConstant(cUnit, r0, (int) calleeMethod);
- genInvokeCommon(cUnit, mir, bb, labelList, pcrLabel,
- calleeMethod);
+ genInvokeSingletonCommon(cUnit, mir, bb, labelList, pcrLabel,
+ calleeMethod);
+ /* Handle exceptions using the interpreter */
+ genTrap(cUnit, mir->offset, pcrLabel);
break;
}
- /* calleeMethod = method->clazz->super->vtable[BBBB] */
default:
return true;
}
- /* Handle exceptions using the interpreter */
- genTrap(cUnit, mir->offset, pcrLabel);
return false;
}
@@ -2799,8 +3067,8 @@
}
/* Chaining cell for monomorphic method invocations. */
-static void handleInvokeChainingCell(CompilationUnit *cUnit,
- const Method *callee)
+static void handleInvokeSingletonChainingCell(CompilationUnit *cUnit,
+ const Method *callee)
{
newLIR3(cUnit, ARMV5TE_LDR_RRI5, r0, rGLUE,
offsetof(InterpState, jitToInterpEntries.dvmJitToTraceSelect) >> 2);
@@ -2808,6 +3076,23 @@
addWordData(cUnit, (int) (callee->insns), true);
}
+/* Chaining cell for monomorphic method invocations. */
+static void handleInvokePredictedChainingCell(CompilationUnit *cUnit)
+{
+
+ /* Should not be executed in the initial state */
+ addWordData(cUnit, PREDICTED_CHAIN_BX_PAIR_INIT, true);
+ /* To be filled: class */
+ addWordData(cUnit, PREDICTED_CHAIN_CLAZZ_INIT, true);
+ /* To be filled: method */
+ addWordData(cUnit, PREDICTED_CHAIN_METHOD_INIT, true);
+ /*
+ * Rechain count. The initial value of 0 here will trigger chaining upon
+ * the first invocation of this callsite.
+ */
+ addWordData(cUnit, PREDICTED_CHAIN_COUNTER_INIT, true);
+}
+
/* Load the Dalvik PC into r0 and jump to the specified target */
static void handlePCReconstruction(CompilationUnit *cUnit,
Armv5teLIR *targetLabel)
@@ -2834,8 +3119,7 @@
int i;
/*
- * Initialize the three chaining lists for generic, post-invoke, and invoke
- * chains.
+ * Initialize various types chaining lists.
*/
for (i = 0; i < CHAINING_CELL_LAST; i++) {
dvmInitGrowableList(&chainingListByType[i], 2);
@@ -2864,7 +3148,7 @@
cUnit->chainCellOffsetLIR =
(LIR *) newLIR1(cUnit, ARMV5TE_16BIT_DATA, CHAIN_CELL_OFFSET_TAG);
cUnit->headerSize = 6;
- newLIR2(cUnit, ARMV5TE_MOV_RR_HL, r0, rpc & THUMB_REG_MASK);
+ newLIR2(cUnit, ARMV5TE_MOV_RR_H2L, r0, rpc & THUMB_REG_MASK);
newLIR2(cUnit, ARMV5TE_SUB_RI8, r0, 10);
newLIR3(cUnit, ARMV5TE_LDR_RRI5, r1, r0, 0);
newLIR2(cUnit, ARMV5TE_ADD_RI8, r1, 1);
@@ -2903,13 +3187,23 @@
dvmInsertGrowableList(
&chainingListByType[CHAINING_CELL_NORMAL], (void *) i);
break;
- case CHAINING_CELL_INVOKE:
- labelList[i].opCode = ARMV5TE_PSEUDO_CHAINING_CELL_INVOKE;
+ case CHAINING_CELL_INVOKE_SINGLETON:
+ labelList[i].opCode =
+ ARMV5TE_PSEUDO_CHAINING_CELL_INVOKE_SINGLETON;
labelList[i].operands[0] =
(int) blockList[i]->containingMethod;
/* handle the codegen later */
dvmInsertGrowableList(
- &chainingListByType[CHAINING_CELL_INVOKE], (void *) i);
+ &chainingListByType[CHAINING_CELL_INVOKE_SINGLETON],
+ (void *) i);
+ break;
+ case CHAINING_CELL_INVOKE_PREDICTED:
+ labelList[i].opCode =
+ ARMV5TE_PSEUDO_CHAINING_CELL_INVOKE_PREDICTED;
+ /* handle the codegen later */
+ dvmInsertGrowableList(
+ &chainingListByType[CHAINING_CELL_INVOKE_PREDICTED],
+ (void *) i);
break;
case CHAINING_CELL_HOT:
labelList[i].opCode =
@@ -3105,10 +3399,13 @@
handleNormalChainingCell(cUnit,
blockList[blockId]->startOffset);
break;
- case CHAINING_CELL_INVOKE:
- handleInvokeChainingCell(cUnit,
+ case CHAINING_CELL_INVOKE_SINGLETON:
+ handleInvokeSingletonChainingCell(cUnit,
blockList[blockId]->containingMethod);
break;
+ case CHAINING_CELL_INVOKE_PREDICTED:
+ handleInvokePredictedChainingCell(cUnit);
+ break;
case CHAINING_CELL_HOT:
handleHotChainingCell(cUnit,
blockList[blockId]->startOffset);
diff --git a/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S
index 6994f26..20bb3ab 100644
--- a/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S
+++ b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S
@@ -4,6 +4,8 @@
* method through a dedicated chaining cell.
*/
@ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+ @ methodToCall is guaranteed to be non-native
+.LinvokeChain:
ldrh r7, [r0, #offMethod_registersSize] @ r7<- methodToCall->regsSize
ldrh r2, [r0, #offMethod_outsSize] @ r2<- methodToCall->outsSize
ldr r9, [rGLUE, #offGlue_interpStackEnd] @ r9<- interpStackEnd
@@ -19,7 +21,6 @@
bxlt r12 @ return to raise stack overflow excep.
@ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
ldr r9, [r0, #offMethod_clazz] @ r9<- method->clazz
- ldr r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
str rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
str rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
ldr rPC, [r0, #offMethod_insns] @ rPC<- methodToCall->insns
@@ -31,14 +32,6 @@
str r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
cmp r8, #0 @ suspendCount != 0
bxne r12 @ bail to the interpreter
- tst r10, #ACC_NATIVE
- bne .LinvokeNative
- /*
- * If we want to punt to the interpreter for native call, swap the bne with
- * the following
- * bxne r12
- */
-
ldr r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
ldr r2, [rGLUE, #offGlue_self] @ r2<- glue->self
diff --git a/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S
new file mode 100644
index 0000000..5e59991
--- /dev/null
+++ b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S
@@ -0,0 +1,51 @@
+ @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+ ldrh r7, [r0, #offMethod_registersSize] @ r7<- methodToCall->regsSize
+ ldr r9, [rGLUE, #offGlue_interpStackEnd] @ r9<- interpStackEnd
+ ldr r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+ add r3, r1, #1 @ Thumb addr is odd
+ SAVEAREA_FROM_FP(r1, rFP) @ r1<- stack save area
+ sub r1, r1, r7, lsl #2 @ r1<- newFp (old savearea - regsSize)
+ SAVEAREA_FROM_FP(r10, r1) @ r10<- stack save area
+ ldr r8, [r8] @ r3<- suspendCount (int)
+ cmp r10, r9 @ bottom < interpStackEnd?
+ bxlt lr @ return to raise stack overflow excep.
+ @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
+ str rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
+ str rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
+ ldr rPC, [r0, #offMethod_insns] @ rPC<- methodToCall->insns
+
+
+ @ set up newSaveArea
+ str rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
+ str r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
+ ldr r3, [rGLUE, #offGlue_self] @ r3<- glue->self
+ str r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
+ cmp r8, #0 @ suspendCount != 0
+ ldr r8, [r0, #offMethod_nativeFunc] @ r8<- method->nativeFunc
+ bxne lr @ bail to the interpreter
+
+ @ go ahead and transfer control to the native code
+ ldr r9, [r3, #offThread_jniLocal_nextEntry] @ r9<- thread->refNext
+ str r1, [r3, #offThread_curFrame] @ self->curFrame = newFp
+ str r9, [r1, #(offStackSaveArea_localRefTop - sizeofStackSaveArea)]
+ @ newFp->localRefTop=refNext
+ mov r9, r3 @ r9<- glue->self (preserve)
+ SAVEAREA_FROM_FP(r10, r1) @ r10<- new stack save area
+
+ mov r2, r0 @ r2<- methodToCall
+ mov r0, r1 @ r0<- newFP
+ add r1, rGLUE, #offGlue_retval @ r1<- &retval
+
+ blx r8 @ off to the native code
+
+ @ native return; r9=self, r10=newSaveArea
+ @ equivalent to dvmPopJniLocals
+ ldr r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
+ ldr r0, [r10, #offStackSaveArea_localRefTop] @ r0<- newSave->localRefTop
+ ldr r1, [r9, #offThread_exception] @ check for exception
+ str rFP, [r9, #offThread_curFrame] @ self->curFrame = fp
+ cmp r1, #0 @ null?
+ str r0, [r9, #offThread_jniLocal_nextEntry] @ self->refNext<- r0
+ bne .LhandleException @ no, handle exception
+ bx r2
+
diff --git a/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S
index 003459d..0ac7cf8 100644
--- a/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S
+++ b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S
@@ -32,12 +32,6 @@
bxne lr @ bail to the interpreter
tst r10, #ACC_NATIVE
bne .LinvokeNative
- /*
- * If we want to punt to the interpreter for native call, swap the bne with
- * the following
- * bxne lr
- */
-
ldr r10, .LdvmJitToInterpNoChain
ldr r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
diff --git a/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S
new file mode 100644
index 0000000..dcbadde
--- /dev/null
+++ b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S
@@ -0,0 +1,42 @@
+ /*
+ * For polymorphic callsite, check whether the cached class pointer matches
+ * the current one. If so setup the Dalvik frame and return to the
+ * Thumb code through the link register to transfer control to the callee
+ * method through a dedicated chaining cell.
+ *
+ * The predicted chaining cell is declared in Armv5teLIR.h with the
+ * following layout:
+ *
+ * typedef struct PredictedChainingCell {
+ * u4 branch;
+ * const ClassObject *clazz;
+ * const Method *method;
+ * u4 counter;
+ * } PredictedChainingCell;
+ *
+ * Upon returning to the callsite:
+ * - lr : to branch to the chaining cell
+ * - lr+2: to punt to the interpreter
+ * - lr+4: to fully resolve the callee and may rechain.
+ * r3 <- class
+ * r9 <- counter
+ */
+ @ r0 = this, r1 = returnCell, r2 = predictedChainCell, rPC = dalvikCallsite
+ ldr r3, [r0, #offObject_clazz] @ r3 <- this->class
+ ldr r8, [r2, #4] @ r8 <- predictedChainCell->clazz
+ ldr r0, [r2, #8] @ r0 <- predictedChainCell->method
+ ldr r9, [r2, #12] @ r9 <- predictedChainCell->counter
+ cmp r3, r8 @ predicted class == actual class?
+ beq .LinvokeChain @ predicted chain is valid
+ ldr r7, [r3, #offClassObject_vtable] @ r7 <- this->class->vtable
+ sub r1, r9, #1 @ count--
+ str r1, [r2, #12] @ write back to PredictedChainingCell->counter
+ add lr, lr, #4 @ return to fully-resolve landing pad
+ /*
+ * r1 <- count
+ * r2 <- &predictedChainCell
+ * r3 <- this->class
+ * r4 <- dPC
+ * r7 <- this->class->vtable
+ */
+ bx lr
diff --git a/vm/compiler/template/armv5te/TemplateOpList.h b/vm/compiler/template/armv5te/TemplateOpList.h
index f41900e..3201dfd 100644
--- a/vm/compiler/template/armv5te/TemplateOpList.h
+++ b/vm/compiler/template/armv5te/TemplateOpList.h
@@ -25,6 +25,8 @@
JIT_TEMPLATE(RETURN)
JIT_TEMPLATE(INVOKE_METHOD_NO_OPT)
JIT_TEMPLATE(INVOKE_METHOD_CHAIN)
+JIT_TEMPLATE(INVOKE_METHOD_PREDICTED_CHAIN)
+JIT_TEMPLATE(INVOKE_METHOD_NATIVE)
JIT_TEMPLATE(CMPG_DOUBLE)
JIT_TEMPLATE(CMPL_DOUBLE)
JIT_TEMPLATE(CMPG_FLOAT)
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S b/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
index 0831100..ff0a953 100644
--- a/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
+++ b/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
@@ -247,12 +247,6 @@
bxne lr @ bail to the interpreter
tst r10, #ACC_NATIVE
bne .LinvokeNative
- /*
- * If we want to punt to the interpreter for native call, swap the bne with
- * the following
- * bxne lr
- */
-
ldr r10, .LdvmJitToInterpNoChain
ldr r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
@@ -278,6 +272,8 @@
* method through a dedicated chaining cell.
*/
@ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+ @ methodToCall is guaranteed to be non-native
+.LinvokeChain:
ldrh r7, [r0, #offMethod_registersSize] @ r7<- methodToCall->regsSize
ldrh r2, [r0, #offMethod_outsSize] @ r2<- methodToCall->outsSize
ldr r9, [rGLUE, #offGlue_interpStackEnd] @ r9<- interpStackEnd
@@ -293,7 +289,6 @@
bxlt r12 @ return to raise stack overflow excep.
@ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
ldr r9, [r0, #offMethod_clazz] @ r9<- method->clazz
- ldr r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
str rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
str rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
ldr rPC, [r0, #offMethod_insns] @ rPC<- methodToCall->insns
@@ -305,14 +300,6 @@
str r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
cmp r8, #0 @ suspendCount != 0
bxne r12 @ bail to the interpreter
- tst r10, #ACC_NATIVE
- bne .LinvokeNative
- /*
- * If we want to punt to the interpreter for native call, swap the bne with
- * the following
- * bxne r12
- */
-
ldr r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
ldr r2, [rGLUE, #offGlue_self] @ r2<- glue->self
@@ -329,6 +316,111 @@
/* ------------------------------ */
.balign 4
+ .global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN
+dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN:
+/* File: armv5te/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */
+ /*
+ * For polymorphic callsite, check whether the cached class pointer matches
+ * the current one. If so setup the Dalvik frame and return to the
+ * Thumb code through the link register to transfer control to the callee
+ * method through a dedicated chaining cell.
+ *
+ * The predicted chaining cell is declared in Armv5teLIR.h with the
+ * following layout:
+ *
+ * typedef struct PredictedChainingCell {
+ * u4 branch;
+ * const ClassObject *clazz;
+ * const Method *method;
+ * u4 counter;
+ * } PredictedChainingCell;
+ *
+ * Upon returning to the callsite:
+ * - lr : to branch to the chaining cell
+ * - lr+2: to punt to the interpreter
+ * - lr+4: to fully resolve the callee and may rechain.
+ * r3 <- class
+ * r9 <- counter
+ */
+ @ r0 = this, r1 = returnCell, r2 = predictedChainCell, rPC = dalvikCallsite
+ ldr r3, [r0, #offObject_clazz] @ r3 <- this->class
+ ldr r8, [r2, #4] @ r8 <- predictedChainCell->clazz
+ ldr r0, [r2, #8] @ r0 <- predictedChainCell->method
+ ldr r9, [r2, #12] @ r9 <- predictedChainCell->counter
+ cmp r3, r8 @ predicted class == actual class?
+ beq .LinvokeChain @ predicted chain is valid
+ ldr r7, [r3, #offClassObject_vtable] @ r7 <- this->class->vtable
+ sub r1, r9, #1 @ count--
+ str r1, [r2, #12] @ write back to PredictedChainingCell->counter
+ add lr, lr, #4 @ return to fully-resolve landing pad
+ /*
+ * r1 <- count
+ * r2 <- &predictedChainCell
+ * r3 <- this->class
+ * r4 <- dPC
+ * r7 <- this->class->vtable
+ */
+ bx lr
+
+/* ------------------------------ */
+ .balign 4
+ .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE
+dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE:
+/* File: armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S */
+ @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+ ldrh r7, [r0, #offMethod_registersSize] @ r7<- methodToCall->regsSize
+ ldr r9, [rGLUE, #offGlue_interpStackEnd] @ r9<- interpStackEnd
+ ldr r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+ add r3, r1, #1 @ Thumb addr is odd
+ SAVEAREA_FROM_FP(r1, rFP) @ r1<- stack save area
+ sub r1, r1, r7, lsl #2 @ r1<- newFp (old savearea - regsSize)
+ SAVEAREA_FROM_FP(r10, r1) @ r10<- stack save area
+ ldr r8, [r8] @ r3<- suspendCount (int)
+ cmp r10, r9 @ bottom < interpStackEnd?
+ bxlt lr @ return to raise stack overflow excep.
+ @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
+ str rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
+ str rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
+ ldr rPC, [r0, #offMethod_insns] @ rPC<- methodToCall->insns
+
+
+ @ set up newSaveArea
+ str rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
+ str r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
+ ldr r3, [rGLUE, #offGlue_self] @ r3<- glue->self
+ str r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
+ cmp r8, #0 @ suspendCount != 0
+ ldr r8, [r0, #offMethod_nativeFunc] @ r8<- method->nativeFunc
+ bxne lr @ bail to the interpreter
+
+ @ go ahead and transfer control to the native code
+ ldr r9, [r3, #offThread_jniLocal_nextEntry] @ r9<- thread->refNext
+ str r1, [r3, #offThread_curFrame] @ self->curFrame = newFp
+ str r9, [r1, #(offStackSaveArea_localRefTop - sizeofStackSaveArea)]
+ @ newFp->localRefTop=refNext
+ mov r9, r3 @ r9<- glue->self (preserve)
+ SAVEAREA_FROM_FP(r10, r1) @ r10<- new stack save area
+
+ mov r2, r0 @ r2<- methodToCall
+ mov r0, r1 @ r0<- newFP
+ add r1, rGLUE, #offGlue_retval @ r1<- &retval
+
+ blx r8 @ off to the native code
+
+ @ native return; r9=self, r10=newSaveArea
+ @ equivalent to dvmPopJniLocals
+ ldr r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
+ ldr r0, [r10, #offStackSaveArea_localRefTop] @ r0<- newSave->localRefTop
+ ldr r1, [r9, #offThread_exception] @ check for exception
+ str rFP, [r9, #offThread_curFrame] @ self->curFrame = fp
+ cmp r1, #0 @ null?
+ str r0, [r9, #offThread_jniLocal_nextEntry] @ self->refNext<- r0
+ bne .LhandleException @ no, handle exception
+ bx r2
+
+
+/* ------------------------------ */
+ .balign 4
.global dvmCompiler_TEMPLATE_CMPG_DOUBLE
dvmCompiler_TEMPLATE_CMPG_DOUBLE:
/* File: armv5te/TEMPLATE_CMPG_DOUBLE.S */
@@ -1130,7 +1222,7 @@
bne .LhandleException @ no, handle exception
bx r2
-/* FIXME - untested */
+/* NOTE - this path can be exercised if the JIT threshold is set to 5 */
.LhandleException:
ldr r0, .LdvmMterpCommonExceptionThrown @ PIC way of getting &func
ldr rIBASE, .LdvmAsmInstructionStart @ same as above
diff --git a/vm/interp/Interp.c b/vm/interp/Interp.c
index 20395cc..f45e21a 100644
--- a/vm/interp/Interp.c
+++ b/vm/interp/Interp.c
@@ -180,7 +180,7 @@
const StackSaveArea* saveArea;
void* fp;
void* prevFp = NULL;
-
+
for (fp = thread->curFrame; fp != NULL; fp = saveArea->prevFrame) {
const Method* method;
@@ -225,7 +225,7 @@
} else {
pCtrl->line = dvmLineNumFromPC(saveArea->method,
saveArea->xtra.currentPc - saveArea->method->insns);
- pCtrl->pAddressSet
+ pCtrl->pAddressSet
= dvmAddressSetForLine(saveArea->method, pCtrl->line);
}
pCtrl->frameDepth = dvmComputeVagueFrameDepth(thread, thread->curFrame);
@@ -374,7 +374,7 @@
* ===========================================================================
*/
-/*
+/*
* Construct an s4 from two consecutive half-words of switch data.
* This needs to check endianness because the DEX optimizer only swaps
* half-words in instruction stream.
@@ -479,7 +479,7 @@
size = *switchData++;
assert(size > 0);
-
+
/* The keys are guaranteed to be aligned on a 32-bit boundary;
* we can treat them as a native int array.
*/
@@ -867,8 +867,9 @@
extern void dvmJitToInterpPunt();
extern void dvmJitToInterpSingleStep();
extern void dvmJitToTraceSelect();
+ extern void dvmJitToPatchPredictedChain();
- /*
+ /*
* Reserve a static entity here to quickly setup runtime contents as
* gcc will issue block copy instructions.
*/
@@ -878,6 +879,7 @@
dvmJitToInterpPunt,
dvmJitToInterpSingleStep,
dvmJitToTraceSelect,
+ dvmJitToPatchPredictedChain,
};
#endif
diff --git a/vm/interp/InterpDefs.h b/vm/interp/InterpDefs.h
index 23f1fe8..dcff850 100644
--- a/vm/interp/InterpDefs.h
+++ b/vm/interp/InterpDefs.h
@@ -39,7 +39,7 @@
#if defined(WITH_JIT)
/*
- * There are five entry points from the compiled code to the interpreter:
+ * There are six entry points from the compiled code to the interpreter:
* 1) dvmJitToInterpNormal: find if there is a corresponding compilation for
* the new dalvik PC. If so, chain the originating compilation with the
* target then jump to it.
@@ -62,6 +62,8 @@
* opportunity, and if none is available will switch to the debug
* interpreter immediately for trace selection (as if threshold had
* just been reached).
+ * 6) dvmJitToPredictedChain: patch the chaining cell for a virtual call site
+ * to a predicted callee.
*/
struct JitToInterpEntries {
void *dvmJitToInterpNormal;
@@ -69,6 +71,7 @@
void *dvmJitToInterpPunt;
void *dvmJitToInterpSingleStep;
void *dvmJitToTraceSelect;
+ void *dvmJitToPatchPredictedChain;
};
#define JIT_TRACE_THRESH_FILTER_SIZE 16
@@ -146,7 +149,7 @@
const u2* currRunHead; // Start of run we're building
int currRunLen; // Length of run in 16-bit words
int lastThreshFilter;
- u2* threshFilter[JIT_TRACE_THRESH_FILTER_SIZE];
+ const u2* threshFilter[JIT_TRACE_THRESH_FILTER_SIZE];
JitTraceRun trace[MAX_JIT_RUN_LEN];
#endif
diff --git a/vm/interp/Jit.c b/vm/interp/Jit.c
index 7d922bb..acd4704 100644
--- a/vm/interp/Jit.c
+++ b/vm/interp/Jit.c
@@ -191,8 +191,10 @@
#endif
LOGD("JIT: %d Translation chains", gDvmJit.translationChains);
#if defined(INVOKE_STATS)
- LOGD("JIT: Invoke: %d noOpt, %d chainable, %d return",
- gDvmJit.invokeNoOpt, gDvmJit.invokeChain, gDvmJit.returnOp);
+ LOGD("JIT: Invoke: %d chainable, %d pred. chain, %d native, "
+ "%d return",
+ gDvmJit.invokeChain, gDvmJit.invokePredictedChain,
+ gDvmJit.invokeNative, gDvmJit.returnOp);
#endif
if (gDvmJit.profile) {
int numTraces = 0;
@@ -367,6 +369,7 @@
switchInterp = !debugOrProfile;
break;
case kJitNormal:
+ switchInterp = !debugOrProfile;
break;
default:
dvmAbort();
diff --git a/vm/mterp/out/InterpAsm-armv7-a.S b/vm/mterp/out/InterpAsm-armv7-a.S
index 923084f..0555a30 100644
--- a/vm/mterp/out/InterpAsm-armv7-a.S
+++ b/vm/mterp/out/InterpAsm-armv7-a.S
@@ -9080,11 +9080,11 @@
common_updateProfile:
eor r3,rPC,rPC,lsr #12 @ cheap, but fast hash function
- lsl r3,r3,#20 @ shift out excess 4095
- ldrb r1,[r0,r3,lsr #20] @ get counter
+ lsl r3,r3,#23 @ shift out excess 511
+ ldrb r1,[r0,r3,lsr #23] @ get counter
GET_INST_OPCODE(ip)
subs r1,r1,#1 @ decrement counter
- strb r1,[r0,r3,lsr #20] @ and store it
+ strb r1,[r0,r3,lsr #23] @ and store it
GOTO_OPCODE_IFNE(ip) @ if not threshold, fallthrough otherwise */
/*
@@ -9094,7 +9094,7 @@
* jump to it now).
*/
mov r1,#255
- strb r1,[r0,r3,lsr #20] @ reset counter
+ strb r1,[r0,r3,lsr #23] @ reset counter
EXPORT_PC()
mov r0,rPC
bl dvmJitGetCodeAddr @ r0<- dvmJitGetCodeAddr(rPC)