Fine-tune the instructions on the method invocation path.

1) Initialize the register and out sizes for callee methods through
   constant moves.
2) Eliminate an unnecessary load of Dalvik PC for chained and
   native callees.

Improved method invocation performance by ~3%.

Change-Id: Iead1276eed0ba527e82eb876f08d169ab9b496b2
diff --git a/vm/compiler/codegen/arm/CodegenDriver.c b/vm/compiler/codegen/arm/CodegenDriver.c
index 2d92d88..d90050b 100644
--- a/vm/compiler/codegen/arm/CodegenDriver.c
+++ b/vm/compiler/codegen/arm/CodegenDriver.c
@@ -1062,16 +1062,21 @@
     ArmLIR *retChainingCell = &labelList[bb->fallThrough->id];
 
     /* r1 = &retChainingCell */
-    dvmCompilerLockTemp(cUnit, r1);
     ArmLIR *addrRetChain = opRegRegImm(cUnit, kOpAdd, r1, rpc, 0);
+
     /* r4PC = dalvikCallsite */
     loadConstant(cUnit, r4PC,
                  (int) (cUnit->method->insns + mir->offset));
     addrRetChain->generic.target = (LIR *) retChainingCell;
+
+    /* r7 = calleeMethod->registersSize */
+    loadConstant(cUnit, r7, calleeMethod->registersSize);
     /*
      * r0 = calleeMethod (loaded upon calling genInvokeSingletonCommon)
      * r1 = &ChainingCell
+     * r2 = calleeMethod->outsSize (to be loaded later for Java callees)
      * r4PC = callsiteDPC
+     * r7 = calleeMethod->registersSize
      */
     if (dvmIsNativeMethod(calleeMethod)) {
         genDispatchToHandler(cUnit, TEMPLATE_INVOKE_METHOD_NATIVE);
@@ -1079,6 +1084,8 @@
         gDvmJit.invokeNative++;
 #endif
     } else {
+        /* For Java callees, set up r2 to be calleeMethod->outsSize */
+        loadConstant(cUnit, r2, calleeMethod->outsSize);
         genDispatchToHandler(cUnit, TEMPLATE_INVOKE_METHOD_CHAIN);
 #if defined(WITH_JIT_TUNING)
         gDvmJit.invokeMonomorphic++;
diff --git a/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S
index aaadc00..475f3cc 100644
--- a/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S
+++ b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S
@@ -3,11 +3,10 @@
      * Thumb code through the link register to transfer control to the callee
      * method through a dedicated chaining cell.
      */
-    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+    @ r0 = methodToCall, r1 = returnCell, r2 = methodToCall->outsSize
+    @ rPC = dalvikCallsite, r7 = methodToCall->registersSize
     @ methodToCall is guaranteed to be non-native
 .LinvokeChain:
-    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
-    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
     ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
     ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
     add     r3, r1, #1  @ Thumb addr is odd
@@ -23,8 +22,6 @@
     ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
     str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
     str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
-    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
-
 
     @ set up newSaveArea
     str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
diff --git a/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S
index eeac2b0..f4f4025 100644
--- a/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S
+++ b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S
@@ -1,5 +1,5 @@
     @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
-    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
+    @ r7 = methodToCall->registersSize
     ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
     ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
     add     r3, r1, #1  @ Thumb addr is odd
@@ -12,8 +12,6 @@
     @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
     str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
     str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
-    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
-
 
     @ set up newSaveArea
     str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
diff --git a/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S
index c3085b9..8469d70 100644
--- a/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S
+++ b/vm/compiler/template/armv5te/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S
@@ -33,6 +33,8 @@
     add     r10, r10, #1
     streq   r10, [r7, #0]
 #endif
+    ldreqh  r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
+    ldreqh  r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
     beq     .LinvokeChain   @ predicted chain is valid
     ldr     r7, [r3, #offClassObject_vtable] @ r7 <- this->class->vtable
     cmp     r8, #0          @ initialized class or not
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S b/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S
index 655bc54..05491d2 100644
--- a/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S
+++ b/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S
@@ -305,11 +305,10 @@
      * Thumb code through the link register to transfer control to the callee
      * method through a dedicated chaining cell.
      */
-    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+    @ r0 = methodToCall, r1 = returnCell, r2 = methodToCall->outsSize
+    @ rPC = dalvikCallsite, r7 = methodToCall->registersSize
     @ methodToCall is guaranteed to be non-native
 .LinvokeChain:
-    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
-    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
     ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
     ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
     add     r3, r1, #1  @ Thumb addr is odd
@@ -325,8 +324,6 @@
     ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
     str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
     str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
-    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
-
 
     @ set up newSaveArea
     str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
@@ -393,6 +390,8 @@
     add     r10, r10, #1
     streq   r10, [r7, #0]
 #endif
+    ldreqh  r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
+    ldreqh  r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
     beq     .LinvokeChain   @ predicted chain is valid
     ldr     r7, [r3, #offClassObject_vtable] @ r7 <- this->class->vtable
     cmp     r8, #0          @ initialized class or not
@@ -415,7 +414,7 @@
 dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE:
 /* File: armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S */
     @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
-    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
+    @ r7 = methodToCall->registersSize
     ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
     ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
     add     r3, r1, #1  @ Thumb addr is odd
@@ -428,8 +427,6 @@
     @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
     str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
     str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
-    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
-
 
     @ set up newSaveArea
     str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S b/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
index ff552bb..c96c127 100644
--- a/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
+++ b/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
@@ -305,11 +305,10 @@
      * Thumb code through the link register to transfer control to the callee
      * method through a dedicated chaining cell.
      */
-    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+    @ r0 = methodToCall, r1 = returnCell, r2 = methodToCall->outsSize
+    @ rPC = dalvikCallsite, r7 = methodToCall->registersSize
     @ methodToCall is guaranteed to be non-native
 .LinvokeChain:
-    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
-    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
     ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
     ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
     add     r3, r1, #1  @ Thumb addr is odd
@@ -325,8 +324,6 @@
     ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
     str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
     str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
-    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
-
 
     @ set up newSaveArea
     str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
@@ -393,6 +390,8 @@
     add     r10, r10, #1
     streq   r10, [r7, #0]
 #endif
+    ldreqh  r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
+    ldreqh  r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
     beq     .LinvokeChain   @ predicted chain is valid
     ldr     r7, [r3, #offClassObject_vtable] @ r7 <- this->class->vtable
     cmp     r8, #0          @ initialized class or not
@@ -415,7 +414,7 @@
 dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE:
 /* File: armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S */
     @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
-    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
+    @ r7 = methodToCall->registersSize
     ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
     ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
     add     r3, r1, #1  @ Thumb addr is odd
@@ -428,8 +427,6 @@
     @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
     str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
     str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
-    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
-
 
     @ set up newSaveArea
     str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a-neon.S b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a-neon.S
index 34931f8..3d60a08 100644
--- a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a-neon.S
+++ b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a-neon.S
@@ -305,11 +305,10 @@
      * Thumb code through the link register to transfer control to the callee
      * method through a dedicated chaining cell.
      */
-    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+    @ r0 = methodToCall, r1 = returnCell, r2 = methodToCall->outsSize
+    @ rPC = dalvikCallsite, r7 = methodToCall->registersSize
     @ methodToCall is guaranteed to be non-native
 .LinvokeChain:
-    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
-    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
     ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
     ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
     add     r3, r1, #1  @ Thumb addr is odd
@@ -325,8 +324,6 @@
     ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
     str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
     str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
-    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
-
 
     @ set up newSaveArea
     str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
@@ -393,6 +390,8 @@
     add     r10, r10, #1
     streq   r10, [r7, #0]
 #endif
+    ldreqh  r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
+    ldreqh  r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
     beq     .LinvokeChain   @ predicted chain is valid
     ldr     r7, [r3, #offClassObject_vtable] @ r7 <- this->class->vtable
     cmp     r8, #0          @ initialized class or not
@@ -415,7 +414,7 @@
 dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE:
 /* File: armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S */
     @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
-    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
+    @ r7 = methodToCall->registersSize
     ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
     ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
     add     r3, r1, #1  @ Thumb addr is odd
@@ -428,8 +427,6 @@
     @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
     str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
     str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
-    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
-
 
     @ set up newSaveArea
     str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S
index b10beef..bcdd237 100644
--- a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S
+++ b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S
@@ -305,11 +305,10 @@
      * Thumb code through the link register to transfer control to the callee
      * method through a dedicated chaining cell.
      */
-    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+    @ r0 = methodToCall, r1 = returnCell, r2 = methodToCall->outsSize
+    @ rPC = dalvikCallsite, r7 = methodToCall->registersSize
     @ methodToCall is guaranteed to be non-native
 .LinvokeChain:
-    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
-    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
     ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
     ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
     add     r3, r1, #1  @ Thumb addr is odd
@@ -325,8 +324,6 @@
     ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
     str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
     str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
-    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
-
 
     @ set up newSaveArea
     str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
@@ -393,6 +390,8 @@
     add     r10, r10, #1
     streq   r10, [r7, #0]
 #endif
+    ldreqh  r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
+    ldreqh  r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
     beq     .LinvokeChain   @ predicted chain is valid
     ldr     r7, [r3, #offClassObject_vtable] @ r7 <- this->class->vtable
     cmp     r8, #0          @ initialized class or not
@@ -415,7 +414,7 @@
 dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE:
 /* File: armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S */
     @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
-    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
+    @ r7 = methodToCall->registersSize
     ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
     ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
     add     r3, r1, #1  @ Thumb addr is odd
@@ -428,8 +427,6 @@
     @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
     str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
     str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
-    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
-
 
     @ set up newSaveArea
     str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-ia32.S b/vm/compiler/template/out/CompilerTemplateAsm-ia32.S
index 6ccb067..7726e97 100644
--- a/vm/compiler/template/out/CompilerTemplateAsm-ia32.S
+++ b/vm/compiler/template/out/CompilerTemplateAsm-ia32.S
@@ -6,7 +6,7 @@
 
 /* File: ia32/header.S */
 /*
- * Copyright (C) 2008 The Android Open Source Project
+ * Copyright (C) 2010 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.