Improved method invocation performance: 1.5x for virtual and 2.8x for interface.
- Implemented predicted chaining for invoke virtual and interface.
- Eliminated a little bit of fat for invoke native.
- Added 078-polymorphic-virtual for stress tests.
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S b/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
index 0831100..ff0a953 100644
--- a/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
+++ b/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
@@ -247,12 +247,6 @@
bxne lr @ bail to the interpreter
tst r10, #ACC_NATIVE
bne .LinvokeNative
- /*
- * If we want to punt to the interpreter for native call, swap the bne with
- * the following
- * bxne lr
- */
-
ldr r10, .LdvmJitToInterpNoChain
ldr r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
@@ -278,6 +272,8 @@
* method through a dedicated chaining cell.
*/
@ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+ @ methodToCall is guaranteed to be non-native
+.LinvokeChain:
ldrh r7, [r0, #offMethod_registersSize] @ r7<- methodToCall->regsSize
ldrh r2, [r0, #offMethod_outsSize] @ r2<- methodToCall->outsSize
ldr r9, [rGLUE, #offGlue_interpStackEnd] @ r9<- interpStackEnd
@@ -293,7 +289,6 @@
bxlt r12 @ return to raise stack overflow excep.
@ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
ldr r9, [r0, #offMethod_clazz] @ r9<- method->clazz
- ldr r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
str rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
str rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
ldr rPC, [r0, #offMethod_insns] @ rPC<- methodToCall->insns
@@ -305,14 +300,6 @@
str r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
cmp r8, #0 @ suspendCount != 0
bxne r12 @ bail to the interpreter
- tst r10, #ACC_NATIVE
- bne .LinvokeNative
- /*
- * If we want to punt to the interpreter for native call, swap the bne with
- * the following
- * bxne r12
- */
-
ldr r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
ldr r2, [rGLUE, #offGlue_self] @ r2<- glue->self
@@ -329,6 +316,111 @@
/* ------------------------------ */
.balign 4
+ .global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN
+dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN:
+/* File: armv5te/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */
+ /*
+ * For polymorphic callsite, check whether the cached class pointer matches
+ * the current one. If so setup the Dalvik frame and return to the
+ * Thumb code through the link register to transfer control to the callee
+ * method through a dedicated chaining cell.
+ *
+ * The predicted chaining cell is declared in Armv5teLIR.h with the
+ * following layout:
+ *
+ * typedef struct PredictedChainingCell {
+ * u4 branch;
+ * const ClassObject *clazz;
+ * const Method *method;
+ * u4 counter;
+ * } PredictedChainingCell;
+ *
+ * Upon returning to the callsite:
+ * - lr : to branch to the chaining cell
+ * - lr+2: to punt to the interpreter
+ * - lr+4: to fully resolve the callee and may rechain.
+ * r3 <- class
+ * r9 <- counter
+ */
+ @ r0 = this, r1 = returnCell, r2 = predictedChainCell, rPC = dalvikCallsite
+ ldr r3, [r0, #offObject_clazz] @ r3 <- this->class
+ ldr r8, [r2, #4] @ r8 <- predictedChainCell->clazz
+ ldr r0, [r2, #8] @ r0 <- predictedChainCell->method
+ ldr r9, [r2, #12] @ r9 <- predictedChainCell->counter
+ cmp r3, r8 @ predicted class == actual class?
+ beq .LinvokeChain @ predicted chain is valid
+ ldr r7, [r3, #offClassObject_vtable] @ r7 <- this->class->vtable
+ sub r1, r9, #1 @ count--
+ str r1, [r2, #12] @ write back to PredictedChainingCell->counter
+ add lr, lr, #4 @ return to fully-resolve landing pad
+ /*
+ * r1 <- count
+ * r2 <- &predictedChainCell
+ * r3 <- this->class
+ * r4 <- dPC
+ * r7 <- this->class->vtable
+ */
+ bx lr
+
+/* ------------------------------ */
+ .balign 4
+ .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE
+dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE:
+/* File: armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S */
+ @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+ ldrh r7, [r0, #offMethod_registersSize] @ r7<- methodToCall->regsSize
+ ldr r9, [rGLUE, #offGlue_interpStackEnd] @ r9<- interpStackEnd
+ ldr r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+ add r3, r1, #1 @ Thumb addr is odd
+ SAVEAREA_FROM_FP(r1, rFP) @ r1<- stack save area
+ sub r1, r1, r7, lsl #2 @ r1<- newFp (old savearea - regsSize)
+ SAVEAREA_FROM_FP(r10, r1) @ r10<- stack save area
+ ldr r8, [r8] @ r3<- suspendCount (int)
+ cmp r10, r9 @ bottom < interpStackEnd?
+ bxlt lr @ return to raise stack overflow excep.
+ @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
+ str rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
+ str rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
+ ldr rPC, [r0, #offMethod_insns] @ rPC<- methodToCall->insns
+
+
+ @ set up newSaveArea
+ str rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
+ str r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
+ ldr r3, [rGLUE, #offGlue_self] @ r3<- glue->self
+ str r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
+ cmp r8, #0 @ suspendCount != 0
+ ldr r8, [r0, #offMethod_nativeFunc] @ r8<- method->nativeFunc
+ bxne lr @ bail to the interpreter
+
+ @ go ahead and transfer control to the native code
+ ldr r9, [r3, #offThread_jniLocal_nextEntry] @ r9<- thread->refNext
+ str r1, [r3, #offThread_curFrame] @ self->curFrame = newFp
+ str r9, [r1, #(offStackSaveArea_localRefTop - sizeofStackSaveArea)]
+ @ newFp->localRefTop=refNext
+ mov r9, r3 @ r9<- glue->self (preserve)
+ SAVEAREA_FROM_FP(r10, r1) @ r10<- new stack save area
+
+ mov r2, r0 @ r2<- methodToCall
+ mov r0, r1 @ r0<- newFP
+ add r1, rGLUE, #offGlue_retval @ r1<- &retval
+
+ blx r8 @ off to the native code
+
+ @ native return; r9=self, r10=newSaveArea
+ @ equivalent to dvmPopJniLocals
+ ldr r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
+ ldr r0, [r10, #offStackSaveArea_localRefTop] @ r0<- newSave->localRefTop
+ ldr r1, [r9, #offThread_exception] @ check for exception
+ str rFP, [r9, #offThread_curFrame] @ self->curFrame = fp
+ cmp r1, #0 @ null?
+ str r0, [r9, #offThread_jniLocal_nextEntry] @ self->refNext<- r0
+ bne .LhandleException @ no, handle exception
+ bx r2
+
+
+/* ------------------------------ */
+ .balign 4
.global dvmCompiler_TEMPLATE_CMPG_DOUBLE
dvmCompiler_TEMPLATE_CMPG_DOUBLE:
/* File: armv5te/TEMPLATE_CMPG_DOUBLE.S */
@@ -1130,7 +1222,7 @@
bne .LhandleException @ no, handle exception
bx r2
-/* FIXME - untested */
+/* NOTE - this path can be exercised if the JIT threshold is set to 5 */
.LhandleException:
ldr r0, .LdvmMterpCommonExceptionThrown @ PIC way of getting &func
ldr rIBASE, .LdvmAsmInstructionStart @ same as above