Collect method traces with the fast interpreter and the JIT'ed code.
Insert inline code instead of switching to the debug interpreter in the hope
that the time stamps collected in traceview are more close to the real
world behavior with minimal profiling overhead.
Because the inline polling still introduces additional overhead (20% ~ 100%),
it is only enabled in the special VM build called "libdvm_traceview.so".
It won't work on the emulator because it is not implemented to collect the
detailed instruction traces.
Here are some performance numbers using the FibonacciSlow microbenchmark
(ie recursive workloads / the shorter the faster):
time: configuration
8,162,602: profiling off/libdvm.so/JIT off
2,801,829: profiling off/libdvm.so/JIT on
9,952,236: profiling off/libdvm_traceview.so/JIT off
4,465,701: profiling off/libdvm_traceview.so/JIT on
164,786,585: profiling on/libdvm.so/JIT off
164,664,634: profiling on/libdvm.so/JIT on
11,231,707: profiling on/libdvm_traceview.so/JIT off
8,427,846: profiling on/libdvm_traceview.so/JIT on
Comparing the 8,427,846 vs 164,664,634 numbers againt the true baseline
performance number of 2,801,829, the new libdvm_traceview.so improves the time
skew from 58x to 3x.
Change-Id: I48611a3a4ff9c4950059249e5503c26abd6b138e
diff --git a/vm/compiler/Compiler.c b/vm/compiler/Compiler.c
index 60f060c..8c26989 100644
--- a/vm/compiler/Compiler.c
+++ b/vm/compiler/Compiler.c
@@ -741,7 +741,11 @@
dvmLockMutex(&gDvmJit.tableLock);
jitActive = gDvmJit.pProfTable != NULL;
- jitActivate = !(gDvm.debuggerActive || (gDvm.activeProfilers > 0));
+ bool disableJit = gDvm.debuggerActive;
+#if !defined(WITH_INLINE_PROFILING)
+ disableJit = disableJit || (gDvm.activeProfilers > 0);
+#endif
+ jitActivate = !disableJit;
if (jitActivate && !jitActive) {
gDvmJit.pProfTable = gDvmJit.pProfTableCopy;
diff --git a/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S
index a137d22..aaadc00 100644
--- a/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S
+++ b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S
@@ -41,5 +41,12 @@
str r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
mov rFP, r1 @ fp = newFp
str rFP, [r2, #offThread_curFrame] @ self->curFrame = newFp
+#if defined(WITH_INLINE_PROFILING)
+ stmfd sp!, {r0-r2,lr} @ preserve clobbered live registers
+ mov r1, r6
+ @ r0=methodToCall, r1=rGlue
+ LDR_PC_LR ".LdvmFastMethodTraceEnter"
+ ldmfd sp!, {r0-r2,lr} @ restore registers
+#endif
bx lr @ return to the callee-chaining cell
diff --git a/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S
index 2557863..eeac2b0 100644
--- a/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S
+++ b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S
@@ -41,9 +41,24 @@
mov r2, r0 @ r2<- methodToCall
mov r0, r1 @ r0<- newFP
add r1, rGLUE, #offGlue_retval @ r1<- &retval
+#if defined(WITH_INLINE_PROFILING)
+ @ r2=methodToCall, r6=rGLUE
+ stmfd sp!, {r2,r6} @ to be consumed after JNI return
+ stmfd sp!, {r0-r3} @ preserve r0-r3
+ mov r0, r2
+ mov r1, r6
+ @ r0=JNIMethod, r1=rGlue
+ LDR_PC_LR ".LdvmFastMethodTraceEnter"
+ ldmfd sp!, {r0-r3} @ restore r0-r3
+#endif
blx r8 @ off to the native code
+#if defined(WITH_INLINE_PROFILING)
+ ldmfd sp!, {r0-r1} @ restore r2 and r6
+ @ r0=JNIMethod, r1=rGlue
+ LDR_PC_LR ".LdvmFastNativeMethodTraceExit"
+#endif
@ native return; r9=self, r10=newSaveArea
@ equivalent to dvmPopJniLocals
ldr r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
diff --git a/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S
index 5be6978..044d0ee 100644
--- a/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S
+++ b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S
@@ -46,6 +46,13 @@
str r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
mov rFP, r1 @ fp = newFp
str rFP, [r2, #offThread_curFrame] @ self->curFrame = newFp
+#if defined(WITH_INLINE_PROFILING)
+ stmfd sp!, {r0-r3} @ preserve r0-r3
+ mov r1, r6
+ @ r0=methodToCall, r1=rGlue
+ LDR_PC_LR ".LdvmFastMethodTraceEnter"
+ ldmfd sp!, {r0-r3} @ restore r0-r3
+#endif
@ Start executing the callee
#if defined(WITH_JIT_TUNING)
diff --git a/vm/compiler/template/armv5te/TEMPLATE_RETURN.S b/vm/compiler/template/armv5te/TEMPLATE_RETURN.S
index b7ab971..b2e71ee 100644
--- a/vm/compiler/template/armv5te/TEMPLATE_RETURN.S
+++ b/vm/compiler/template/armv5te/TEMPLATE_RETURN.S
@@ -5,6 +5,13 @@
* address in the code cache following the invoke instruction. Otherwise
* return to the special dvmJitToInterpNoChain entry point.
*/
+#if defined(WITH_INLINE_PROFILING)
+ stmfd sp!, {r0-r2,lr} @ preserve live registers
+ mov r0, r6
+ @ r0=rGlue
+ LDR_PC_LR ".LdvmFastJavaMethodTraceExit"
+ ldmfd sp!, {r0-r2,lr} @ restore live registers
+#endif
SAVEAREA_FROM_FP(r0, rFP) @ r0<- saveArea (old)
ldr r10, [r0, #offStackSaveArea_prevFrame] @ r10<- saveArea->prevFrame
ldr r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
diff --git a/vm/compiler/template/armv5te/footer.S b/vm/compiler/template/armv5te/footer.S
index 73fc3d7..a391dbe 100644
--- a/vm/compiler/template/armv5te/footer.S
+++ b/vm/compiler/template/armv5te/footer.S
@@ -22,9 +22,22 @@
mov r2, r0 @ r2<- methodToCall
mov r0, r1 @ r0<- newFP
add r1, rGLUE, #offGlue_retval @ r1<- &retval
+#if defined(WITH_INLINE_PROFILING)
+ @ r2: methodToCall, r6: rGLUE
+ stmfd sp!, {r2,r6}
+ stmfd sp!, {r0-r3}
+ mov r0, r2
+ mov r1, r6
+ LDR_PC_LR ".LdvmFastMethodTraceEnter"
+ ldmfd sp!, {r0-r3}
+#endif
LDR_PC_LR "[r2, #offMethod_nativeFunc]"
+#if defined(WITH_INLINE_PROFILING)
+ ldmfd sp!, {r0-r1}
+ LDR_PC_LR ".LdvmFastNativeMethodTraceExit"
+#endif
@ Refresh Jit's on/off status
ldr r3, [rGLUE, #offGlue_ppJitProfTable]
@@ -96,6 +109,14 @@
.LdvmSelfVerificationMemOpDecode:
.word dvmSelfVerificationMemOpDecode
#endif
+#if defined(WITH_INLINE_PROFILING)
+.LdvmFastMethodTraceEnter:
+ .word dvmFastMethodTraceEnter
+.LdvmFastNativeMethodTraceExit:
+ .word dvmFastNativeMethodTraceExit
+.LdvmFastJavaMethodTraceExit:
+ .word dvmFastJavaMethodTraceExit
+#endif
.L__aeabi_cdcmple:
.word __aeabi_cdcmple
.L__aeabi_cfcmple:
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S b/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S
index 60664fa..655bc54 100644
--- a/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S
+++ b/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S
@@ -177,6 +177,13 @@
* address in the code cache following the invoke instruction. Otherwise
* return to the special dvmJitToInterpNoChain entry point.
*/
+#if defined(WITH_INLINE_PROFILING)
+ stmfd sp!, {r0-r2,lr} @ preserve live registers
+ mov r0, r6
+ @ r0=rGlue
+ LDR_PC_LR ".LdvmFastJavaMethodTraceExit"
+ ldmfd sp!, {r0-r2,lr} @ restore live registers
+#endif
SAVEAREA_FROM_FP(r0, rFP) @ r0<- saveArea (old)
ldr r10, [r0, #offStackSaveArea_prevFrame] @ r10<- saveArea->prevFrame
ldr r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
@@ -274,6 +281,13 @@
str r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
mov rFP, r1 @ fp = newFp
str rFP, [r2, #offThread_curFrame] @ self->curFrame = newFp
+#if defined(WITH_INLINE_PROFILING)
+ stmfd sp!, {r0-r3} @ preserve r0-r3
+ mov r1, r6
+ @ r0=methodToCall, r1=rGlue
+ LDR_PC_LR ".LdvmFastMethodTraceEnter"
+ ldmfd sp!, {r0-r3} @ restore r0-r3
+#endif
@ Start executing the callee
#if defined(WITH_JIT_TUNING)
@@ -329,6 +343,13 @@
str r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
mov rFP, r1 @ fp = newFp
str rFP, [r2, #offThread_curFrame] @ self->curFrame = newFp
+#if defined(WITH_INLINE_PROFILING)
+ stmfd sp!, {r0-r2,lr} @ preserve clobbered live registers
+ mov r1, r6
+ @ r0=methodToCall, r1=rGlue
+ LDR_PC_LR ".LdvmFastMethodTraceEnter"
+ ldmfd sp!, {r0-r2,lr} @ restore registers
+#endif
bx lr @ return to the callee-chaining cell
@@ -436,9 +457,24 @@
mov r2, r0 @ r2<- methodToCall
mov r0, r1 @ r0<- newFP
add r1, rGLUE, #offGlue_retval @ r1<- &retval
+#if defined(WITH_INLINE_PROFILING)
+ @ r2=methodToCall, r6=rGLUE
+ stmfd sp!, {r2,r6} @ to be consumed after JNI return
+ stmfd sp!, {r0-r3} @ preserve r0-r3
+ mov r0, r2
+ mov r1, r6
+ @ r0=JNIMethod, r1=rGlue
+ LDR_PC_LR ".LdvmFastMethodTraceEnter"
+ ldmfd sp!, {r0-r3} @ restore r0-r3
+#endif
blx r8 @ off to the native code
+#if defined(WITH_INLINE_PROFILING)
+ ldmfd sp!, {r0-r1} @ restore r2 and r6
+ @ r0=JNIMethod, r1=rGlue
+ LDR_PC_LR ".LdvmFastNativeMethodTraceExit"
+#endif
@ native return; r9=self, r10=newSaveArea
@ equivalent to dvmPopJniLocals
ldr r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
@@ -1458,9 +1494,22 @@
mov r2, r0 @ r2<- methodToCall
mov r0, r1 @ r0<- newFP
add r1, rGLUE, #offGlue_retval @ r1<- &retval
+#if defined(WITH_INLINE_PROFILING)
+ @ r2: methodToCall, r6: rGLUE
+ stmfd sp!, {r2,r6}
+ stmfd sp!, {r0-r3}
+ mov r0, r2
+ mov r1, r6
+ LDR_PC_LR ".LdvmFastMethodTraceEnter"
+ ldmfd sp!, {r0-r3}
+#endif
LDR_PC_LR "[r2, #offMethod_nativeFunc]"
+#if defined(WITH_INLINE_PROFILING)
+ ldmfd sp!, {r0-r1}
+ LDR_PC_LR ".LdvmFastNativeMethodTraceExit"
+#endif
@ Refresh Jit's on/off status
ldr r3, [rGLUE, #offGlue_ppJitProfTable]
@@ -1532,6 +1581,14 @@
.LdvmSelfVerificationMemOpDecode:
.word dvmSelfVerificationMemOpDecode
#endif
+#if defined(WITH_INLINE_PROFILING)
+.LdvmFastMethodTraceEnter:
+ .word dvmFastMethodTraceEnter
+.LdvmFastNativeMethodTraceExit:
+ .word dvmFastNativeMethodTraceExit
+.LdvmFastJavaMethodTraceExit:
+ .word dvmFastJavaMethodTraceExit
+#endif
.L__aeabi_cdcmple:
.word __aeabi_cdcmple
.L__aeabi_cfcmple:
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S b/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
index ccdbcca..ff552bb 100644
--- a/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
+++ b/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
@@ -177,6 +177,13 @@
* address in the code cache following the invoke instruction. Otherwise
* return to the special dvmJitToInterpNoChain entry point.
*/
+#if defined(WITH_INLINE_PROFILING)
+ stmfd sp!, {r0-r2,lr} @ preserve live registers
+ mov r0, r6
+ @ r0=rGlue
+ LDR_PC_LR ".LdvmFastJavaMethodTraceExit"
+ ldmfd sp!, {r0-r2,lr} @ restore live registers
+#endif
SAVEAREA_FROM_FP(r0, rFP) @ r0<- saveArea (old)
ldr r10, [r0, #offStackSaveArea_prevFrame] @ r10<- saveArea->prevFrame
ldr r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
@@ -274,6 +281,13 @@
str r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
mov rFP, r1 @ fp = newFp
str rFP, [r2, #offThread_curFrame] @ self->curFrame = newFp
+#if defined(WITH_INLINE_PROFILING)
+ stmfd sp!, {r0-r3} @ preserve r0-r3
+ mov r1, r6
+ @ r0=methodToCall, r1=rGlue
+ LDR_PC_LR ".LdvmFastMethodTraceEnter"
+ ldmfd sp!, {r0-r3} @ restore r0-r3
+#endif
@ Start executing the callee
#if defined(WITH_JIT_TUNING)
@@ -329,6 +343,13 @@
str r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
mov rFP, r1 @ fp = newFp
str rFP, [r2, #offThread_curFrame] @ self->curFrame = newFp
+#if defined(WITH_INLINE_PROFILING)
+ stmfd sp!, {r0-r2,lr} @ preserve clobbered live registers
+ mov r1, r6
+ @ r0=methodToCall, r1=rGlue
+ LDR_PC_LR ".LdvmFastMethodTraceEnter"
+ ldmfd sp!, {r0-r2,lr} @ restore registers
+#endif
bx lr @ return to the callee-chaining cell
@@ -436,9 +457,24 @@
mov r2, r0 @ r2<- methodToCall
mov r0, r1 @ r0<- newFP
add r1, rGLUE, #offGlue_retval @ r1<- &retval
+#if defined(WITH_INLINE_PROFILING)
+ @ r2=methodToCall, r6=rGLUE
+ stmfd sp!, {r2,r6} @ to be consumed after JNI return
+ stmfd sp!, {r0-r3} @ preserve r0-r3
+ mov r0, r2
+ mov r1, r6
+ @ r0=JNIMethod, r1=rGlue
+ LDR_PC_LR ".LdvmFastMethodTraceEnter"
+ ldmfd sp!, {r0-r3} @ restore r0-r3
+#endif
blx r8 @ off to the native code
+#if defined(WITH_INLINE_PROFILING)
+ ldmfd sp!, {r0-r1} @ restore r2 and r6
+ @ r0=JNIMethod, r1=rGlue
+ LDR_PC_LR ".LdvmFastNativeMethodTraceExit"
+#endif
@ native return; r9=self, r10=newSaveArea
@ equivalent to dvmPopJniLocals
ldr r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
@@ -1181,9 +1217,22 @@
mov r2, r0 @ r2<- methodToCall
mov r0, r1 @ r0<- newFP
add r1, rGLUE, #offGlue_retval @ r1<- &retval
+#if defined(WITH_INLINE_PROFILING)
+ @ r2: methodToCall, r6: rGLUE
+ stmfd sp!, {r2,r6}
+ stmfd sp!, {r0-r3}
+ mov r0, r2
+ mov r1, r6
+ LDR_PC_LR ".LdvmFastMethodTraceEnter"
+ ldmfd sp!, {r0-r3}
+#endif
LDR_PC_LR "[r2, #offMethod_nativeFunc]"
+#if defined(WITH_INLINE_PROFILING)
+ ldmfd sp!, {r0-r1}
+ LDR_PC_LR ".LdvmFastNativeMethodTraceExit"
+#endif
@ Refresh Jit's on/off status
ldr r3, [rGLUE, #offGlue_ppJitProfTable]
@@ -1255,6 +1304,14 @@
.LdvmSelfVerificationMemOpDecode:
.word dvmSelfVerificationMemOpDecode
#endif
+#if defined(WITH_INLINE_PROFILING)
+.LdvmFastMethodTraceEnter:
+ .word dvmFastMethodTraceEnter
+.LdvmFastNativeMethodTraceExit:
+ .word dvmFastNativeMethodTraceExit
+.LdvmFastJavaMethodTraceExit:
+ .word dvmFastJavaMethodTraceExit
+#endif
.L__aeabi_cdcmple:
.word __aeabi_cdcmple
.L__aeabi_cfcmple:
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a-neon.S b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a-neon.S
index e520056..34931f8 100644
--- a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a-neon.S
+++ b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a-neon.S
@@ -177,6 +177,13 @@
* address in the code cache following the invoke instruction. Otherwise
* return to the special dvmJitToInterpNoChain entry point.
*/
+#if defined(WITH_INLINE_PROFILING)
+ stmfd sp!, {r0-r2,lr} @ preserve live registers
+ mov r0, r6
+ @ r0=rGlue
+ LDR_PC_LR ".LdvmFastJavaMethodTraceExit"
+ ldmfd sp!, {r0-r2,lr} @ restore live registers
+#endif
SAVEAREA_FROM_FP(r0, rFP) @ r0<- saveArea (old)
ldr r10, [r0, #offStackSaveArea_prevFrame] @ r10<- saveArea->prevFrame
ldr r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
@@ -274,6 +281,13 @@
str r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
mov rFP, r1 @ fp = newFp
str rFP, [r2, #offThread_curFrame] @ self->curFrame = newFp
+#if defined(WITH_INLINE_PROFILING)
+ stmfd sp!, {r0-r3} @ preserve r0-r3
+ mov r1, r6
+ @ r0=methodToCall, r1=rGlue
+ LDR_PC_LR ".LdvmFastMethodTraceEnter"
+ ldmfd sp!, {r0-r3} @ restore r0-r3
+#endif
@ Start executing the callee
#if defined(WITH_JIT_TUNING)
@@ -329,6 +343,13 @@
str r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
mov rFP, r1 @ fp = newFp
str rFP, [r2, #offThread_curFrame] @ self->curFrame = newFp
+#if defined(WITH_INLINE_PROFILING)
+ stmfd sp!, {r0-r2,lr} @ preserve clobbered live registers
+ mov r1, r6
+ @ r0=methodToCall, r1=rGlue
+ LDR_PC_LR ".LdvmFastMethodTraceEnter"
+ ldmfd sp!, {r0-r2,lr} @ restore registers
+#endif
bx lr @ return to the callee-chaining cell
@@ -436,9 +457,24 @@
mov r2, r0 @ r2<- methodToCall
mov r0, r1 @ r0<- newFP
add r1, rGLUE, #offGlue_retval @ r1<- &retval
+#if defined(WITH_INLINE_PROFILING)
+ @ r2=methodToCall, r6=rGLUE
+ stmfd sp!, {r2,r6} @ to be consumed after JNI return
+ stmfd sp!, {r0-r3} @ preserve r0-r3
+ mov r0, r2
+ mov r1, r6
+ @ r0=JNIMethod, r1=rGlue
+ LDR_PC_LR ".LdvmFastMethodTraceEnter"
+ ldmfd sp!, {r0-r3} @ restore r0-r3
+#endif
blx r8 @ off to the native code
+#if defined(WITH_INLINE_PROFILING)
+ ldmfd sp!, {r0-r1} @ restore r2 and r6
+ @ r0=JNIMethod, r1=rGlue
+ LDR_PC_LR ".LdvmFastNativeMethodTraceExit"
+#endif
@ native return; r9=self, r10=newSaveArea
@ equivalent to dvmPopJniLocals
ldr r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
@@ -1458,9 +1494,22 @@
mov r2, r0 @ r2<- methodToCall
mov r0, r1 @ r0<- newFP
add r1, rGLUE, #offGlue_retval @ r1<- &retval
+#if defined(WITH_INLINE_PROFILING)
+ @ r2: methodToCall, r6: rGLUE
+ stmfd sp!, {r2,r6}
+ stmfd sp!, {r0-r3}
+ mov r0, r2
+ mov r1, r6
+ LDR_PC_LR ".LdvmFastMethodTraceEnter"
+ ldmfd sp!, {r0-r3}
+#endif
LDR_PC_LR "[r2, #offMethod_nativeFunc]"
+#if defined(WITH_INLINE_PROFILING)
+ ldmfd sp!, {r0-r1}
+ LDR_PC_LR ".LdvmFastNativeMethodTraceExit"
+#endif
@ Refresh Jit's on/off status
ldr r3, [rGLUE, #offGlue_ppJitProfTable]
@@ -1532,6 +1581,14 @@
.LdvmSelfVerificationMemOpDecode:
.word dvmSelfVerificationMemOpDecode
#endif
+#if defined(WITH_INLINE_PROFILING)
+.LdvmFastMethodTraceEnter:
+ .word dvmFastMethodTraceEnter
+.LdvmFastNativeMethodTraceExit:
+ .word dvmFastNativeMethodTraceExit
+.LdvmFastJavaMethodTraceExit:
+ .word dvmFastJavaMethodTraceExit
+#endif
.L__aeabi_cdcmple:
.word __aeabi_cdcmple
.L__aeabi_cfcmple:
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S
index 87a0691..b10beef 100644
--- a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S
+++ b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S
@@ -177,6 +177,13 @@
* address in the code cache following the invoke instruction. Otherwise
* return to the special dvmJitToInterpNoChain entry point.
*/
+#if defined(WITH_INLINE_PROFILING)
+ stmfd sp!, {r0-r2,lr} @ preserve live registers
+ mov r0, r6
+ @ r0=rGlue
+ LDR_PC_LR ".LdvmFastJavaMethodTraceExit"
+ ldmfd sp!, {r0-r2,lr} @ restore live registers
+#endif
SAVEAREA_FROM_FP(r0, rFP) @ r0<- saveArea (old)
ldr r10, [r0, #offStackSaveArea_prevFrame] @ r10<- saveArea->prevFrame
ldr r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
@@ -274,6 +281,13 @@
str r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
mov rFP, r1 @ fp = newFp
str rFP, [r2, #offThread_curFrame] @ self->curFrame = newFp
+#if defined(WITH_INLINE_PROFILING)
+ stmfd sp!, {r0-r3} @ preserve r0-r3
+ mov r1, r6
+ @ r0=methodToCall, r1=rGlue
+ LDR_PC_LR ".LdvmFastMethodTraceEnter"
+ ldmfd sp!, {r0-r3} @ restore r0-r3
+#endif
@ Start executing the callee
#if defined(WITH_JIT_TUNING)
@@ -329,6 +343,13 @@
str r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
mov rFP, r1 @ fp = newFp
str rFP, [r2, #offThread_curFrame] @ self->curFrame = newFp
+#if defined(WITH_INLINE_PROFILING)
+ stmfd sp!, {r0-r2,lr} @ preserve clobbered live registers
+ mov r1, r6
+ @ r0=methodToCall, r1=rGlue
+ LDR_PC_LR ".LdvmFastMethodTraceEnter"
+ ldmfd sp!, {r0-r2,lr} @ restore registers
+#endif
bx lr @ return to the callee-chaining cell
@@ -436,9 +457,24 @@
mov r2, r0 @ r2<- methodToCall
mov r0, r1 @ r0<- newFP
add r1, rGLUE, #offGlue_retval @ r1<- &retval
+#if defined(WITH_INLINE_PROFILING)
+ @ r2=methodToCall, r6=rGLUE
+ stmfd sp!, {r2,r6} @ to be consumed after JNI return
+ stmfd sp!, {r0-r3} @ preserve r0-r3
+ mov r0, r2
+ mov r1, r6
+ @ r0=JNIMethod, r1=rGlue
+ LDR_PC_LR ".LdvmFastMethodTraceEnter"
+ ldmfd sp!, {r0-r3} @ restore r0-r3
+#endif
blx r8 @ off to the native code
+#if defined(WITH_INLINE_PROFILING)
+ ldmfd sp!, {r0-r1} @ restore r2 and r6
+ @ r0=JNIMethod, r1=rGlue
+ LDR_PC_LR ".LdvmFastNativeMethodTraceExit"
+#endif
@ native return; r9=self, r10=newSaveArea
@ equivalent to dvmPopJniLocals
ldr r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
@@ -1458,9 +1494,22 @@
mov r2, r0 @ r2<- methodToCall
mov r0, r1 @ r0<- newFP
add r1, rGLUE, #offGlue_retval @ r1<- &retval
+#if defined(WITH_INLINE_PROFILING)
+ @ r2: methodToCall, r6: rGLUE
+ stmfd sp!, {r2,r6}
+ stmfd sp!, {r0-r3}
+ mov r0, r2
+ mov r1, r6
+ LDR_PC_LR ".LdvmFastMethodTraceEnter"
+ ldmfd sp!, {r0-r3}
+#endif
LDR_PC_LR "[r2, #offMethod_nativeFunc]"
+#if defined(WITH_INLINE_PROFILING)
+ ldmfd sp!, {r0-r1}
+ LDR_PC_LR ".LdvmFastNativeMethodTraceExit"
+#endif
@ Refresh Jit's on/off status
ldr r3, [rGLUE, #offGlue_ppJitProfTable]
@@ -1532,6 +1581,14 @@
.LdvmSelfVerificationMemOpDecode:
.word dvmSelfVerificationMemOpDecode
#endif
+#if defined(WITH_INLINE_PROFILING)
+.LdvmFastMethodTraceEnter:
+ .word dvmFastMethodTraceEnter
+.LdvmFastNativeMethodTraceExit:
+ .word dvmFastNativeMethodTraceExit
+.LdvmFastJavaMethodTraceExit:
+ .word dvmFastJavaMethodTraceExit
+#endif
.L__aeabi_cdcmple:
.word __aeabi_cdcmple
.L__aeabi_cfcmple: