Changes to remove need for compiled invoke stubs for quick.
ARM, x86, and MIPS implementation complete, though MIPS is untested.
The ArgArray is changed to be a uint32_t array instead of a JValue array.
Also, a separate result for float/double was needed for x86/MIPS. The invoke
stubs are currently still there, but only used for portable.
Change-Id: I0647f8d5d420cea61370e662e85bdc0c13b5e378
diff --git a/src/oat/runtime/arm/runtime_support_arm.S b/src/oat/runtime/arm/runtime_support_arm.S
index 6067dd5..bd3f45d 100644
--- a/src/oat/runtime/arm/runtime_support_arm.S
+++ b/src/oat/runtime/arm/runtime_support_arm.S
@@ -103,13 +103,16 @@
push {r1-r3, r5-r8, r10-r11, lr} @ 10 words of callee saves
.save {r1-r3, r5-r8, r10-r11, lr}
.cfi_adjust_cfa_offset 40
- .cfi_rel_offset r5, 0
- .cfi_rel_offset r6, 4
- .cfi_rel_offset r7, 8
- .cfi_rel_offset r8, 12
- .cfi_rel_offset r10, 16
- .cfi_rel_offset r11, 20
- .cfi_rel_offset lr, 24
+ .cfi_rel_offset r1, 0
+ .cfi_rel_offset r2, 4
+ .cfi_rel_offset r3, 8
+ .cfi_rel_offset r5, 12
+ .cfi_rel_offset r6, 16
+ .cfi_rel_offset r7, 20
+ .cfi_rel_offset r8, 24
+ .cfi_rel_offset r10, 28
+ .cfi_rel_offset r11, 32
+ .cfi_rel_offset lr, 36
sub sp, #8 @ 2 words of space, bottom word will hold Method*
.pad #8
.cfi_adjust_cfa_offset 8
@@ -244,6 +247,53 @@
INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
/*
+ * Invocation stub.
+ * On entry:
+ * r0 = method pointer
+ * r1 = argument array or NULL for no argument methods
+ * r2 = size of argument array in bytes
+ * r3 = (managed) thread pointer
+ * [sp] = JValue* result for non-floating point returns
+ * [sp + 4] = JValue* result for floating point returns
+ */
+ENTRY art_quick_invoke_stub
+ push {r0, r4, r5, r9, r11, lr} @ spill regs
+ .save {r0, r4, r5, r9, r11, lr}
+ .pad #24
+ .cfi_adjust_cfa_offset 24
+ .cfi_rel_offset r0, 0
+ .cfi_rel_offset r4, 4
+ .cfi_rel_offset r5, 8
+ .cfi_rel_offset r9, 12
+ .cfi_rel_offset r11, 16
+ .cfi_rel_offset lr, 20
+ mov r11, sp @ save the stack pointer
+ .cfi_def_cfa_register r11
+ mov r9, r3 @ move managed thread pointer into r9
+ mov r4, #SUSPEND_CHECK_INTERVAL @ reset r4 to suspend check interval
+ add r5, r2, #16 @ create space for method pointer in frame
+ and r5, #0xFFFFFFF8 @ align frame size to 16 bytes
+ sub sp, r5 @ reserve stack space for argument array
+ add r0, sp, #4 @ pass stack pointer + method ptr as dest for memcpy
+ bl memcpy @ memcpy (dest, src, bytes)
+ ldr r0, [r11] @ restore method*
+ ldr r1, [sp, #4] @ copy arg value for r1
+ ldr r2, [sp, #8] @ copy arg value for r2
+ ldr r3, [sp, #12] @ copy arg value for r3
+ mov ip, #0 @ set ip to 0
+ str ip, [sp] @ store NULL for method* at bottom of frame
+ ldr ip, [r0, #METHOD_CODE_OFFSET] @ get pointer to the code
+ blx ip @ call the method
+ add sp, r5 @ restore the stack
+ ldr ip, [sp, #24] @ load the result pointer
+ strd r0, [ip] @ store r0/r1 into result pointer
+ ldr ip, [sp, #28] @ load the floating point result pointer
+ strd r0, [ip] @ store r0/r1 into floating point result pointer
+ pop {r0, r4, r5, r9, r11, lr} @ restore spill regs
+ .cfi_adjust_cfa_offset -24
+ bx lr
+END art_quick_invoke_stub
+ /*
* On entry, r0 and r1 must be preserved, r2 is dex PC
*/
.extern artUpdateDebuggerFromCode
diff --git a/src/oat/runtime/mips/runtime_support_mips.S b/src/oat/runtime/mips/runtime_support_mips.S
index 56535b2..cc41d14 100644
--- a/src/oat/runtime/mips/runtime_support_mips.S
+++ b/src/oat/runtime/mips/runtime_support_mips.S
@@ -427,6 +427,63 @@
INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
/*
+ * Invocation stub.
+ * On entry:
+ * a0 = method pointer
+ * a1 = argument array or NULL for no argument methods
+ * a2 = size of argument array in bytes
+ * a3 = (managed) thread pointer
+ * [sp + 16] = JValue* result for non-floating point returns
+ * [sp + 20] = JValue* result for floating point returns
+ */
+ENTRY art_quick_invoke_stub
+ GENERATE_GLOBAL_POINTER
+ sw $a0, 0($sp) # save out a0
+ addiu $sp, $sp, -16 # spill s0, s1, fp, ra
+ .cfi_adjust_cfa_offset 16
+ sw $ra, 12($sp)
+ .cfi_rel_offset 31, 12
+ sw $fp, 8($sp)
+ .cfi_rel_offset 30, 8
+ sw $s1, 4($sp)
+ .cfi_rel_offset 17, 4
+ sw $s0, 0($sp)
+ .cfi_rel_offset 16, 0
+ move $fp, $sp # save sp in fp
+ .cfi_def_cfa_register 30
+ move $s1, $a3 # move managed thread pointer into s1
+ addiu $s0, $zero, SUSPEND_CHECK_INTERVAL # reset s0 to suspend check interval
+ addiu $t0, $a2, 16 # create space for method pointer in frame
+ srl $t0, $t0, 3 # shift the frame size right 3
+ sll $t0, $t0, 3 # shift the frame size left 3 to align to 16 bytes
+ subu $sp, $sp, $t0 # reserve stack space for argument array
+ addiu $a0, $sp, 4 # pass stack pointer + method ptr as dest for memcpy
+ jal memcpy # (dest, src, bytes)
+ addiu $sp, $sp, -16 # make space for argument slots for memcpy
+ addiu $sp, $sp, 16 # restore stack after memcpy
+ lw $a0, 16($fp) # restore method*
+ lw $a1, 4($sp) # copy arg value for a1
+ lw $a2, 8($sp) # copy arg value for a2
+ lw $a3, 12($sp) # copy arg value for a3
+ lw $t9, METHOD_CODE_OFFSET($a0) # get pointer to the code
+ jalr $t9 # call the method
+ sw $zero, 0($sp) # store NULL for method* at bottom of frame
+ move $sp, $fp # restore the stack
+ lw $s0, 0($sp)
+ lw $s1, 4($sp)
+ lw $fp, 8($sp)
+ lw $ra, 12($sp)
+ addiu $sp, $sp, 16
+ .cfi_adjust_cfa_offset -16
+ lw $t0, 16($sp) # get result pointer
+ sw $v0, 0($t0) # store the result
+ sw $v1, 4($t0) # store the other half of the result
+ lw $t0, 20($sp) # get floating point result pointer
+ jr $ra
+ s.d $f0, 0($t0) # store floating point result
+END art_quick_invoke_stub
+
+ /*
* Entry point of native methods when JNI bug compatibility is enabled.
*/
.extern artWorkAroundAppJniBugs
diff --git a/src/oat/runtime/x86/runtime_support_x86.S b/src/oat/runtime/x86/runtime_support_x86.S
index 0ff69d9..32d657d 100644
--- a/src/oat/runtime/x86/runtime_support_x86.S
+++ b/src/oat/runtime/x86/runtime_support_x86.S
@@ -301,6 +301,50 @@
INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
+ /*
+ * Invocation stub.
+ * On entry:
+ * [sp] = return address
+ * [sp + 4] = method pointer
+ * [sp + 8] = argument array or NULL for no argument methods
+ * [sp + 12] = size of argument array in bytes
+ * [sp + 16] = (managed) thread pointer
+ * [sp + 20] = JValue* result for non-floating point returns
+ * [sp + 24] = JValue* result for floating point returns
+ */
+DEFINE_FUNCTION art_quick_invoke_stub
+ PUSH ebp // save ebp
+ PUSH ebx // save ebx
+ mov %esp, %ebp // copy value of stack pointer into base pointer
+ .cfi_def_cfa_register ebp
+ mov 20(%ebp), %ebx // get arg array size
+ addl LITERAL(28), %ebx // reserve space for return addr, method*, ebx, and ebp in frame
+ andl LITERAL(0xFFFFFFF8), %ebx // align frame size to 16 bytes
+ subl LITERAL(12), %ebx // remove space for return address, ebx, and ebp
+ subl %ebx, %esp // reserve stack space for argument array
+ lea 4(%esp), %eax // use stack pointer + method ptr as dest for memcpy
+ pushl 20(%ebp) // push size of region to memcpy
+ pushl 16(%ebp) // push arg array as source of memcpy
+ pushl %eax // push stack pointer as destination of memcpy
+ call SYMBOL(memcpy) // (void*, const void*, size_t)
+ addl LITERAL(12), %esp // pop arguments to memcpy
+ movl LITERAL(0), (%esp) // store NULL for method*
+ mov 12(%ebp), %eax // move method pointer into eax
+ mov 4(%esp), %ecx // copy arg1 into ecx
+ mov 8(%esp), %edx // copy arg2 into edx
+ mov 12(%esp), %ebx // copy arg3 into ebx
+ call METHOD_CODE_OFFSET(%eax) // call the method
+ mov %ebp, %esp // restore stack pointer
+ POP ebx // pop ebx
+ POP ebp // pop ebp
+ mov 20(%esp), %ecx // get result pointer
+ mov %eax, (%ecx) // store the result
+ mov %edx, 4(%ecx) // store the other half of the result
+ mov 24(%esp), %ecx // get floating point result pointer
+ movsd %xmm0, (%ecx) // store the floating point result
+ ret
+END_FUNCTION art_quick_invoke_stub
+
MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
DEFINE_FUNCTION VAR(c_name, 0)
SETUP_REF_ONLY_CALLEE_SAVE_FRAME // save ref containing registers for GC