blob: 401865de245846fa57e226757606728cf103b745 [file] [log] [blame]
/*
* This file was generated automatically by gen-template.py for 'mips'.
*
* --> DO NOT EDIT <--
*/
/* File: mips/header.S */
/*
* Copyright (C) 2008 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#if defined(WITH_JIT)
/*
* This is a #include, not a %include, because we want the C pre-processor
* to expand the macros into assembler assignment statements.
*/
#include "../../../mterp/common/asm-constants.h"
#include "../../../mterp/common/mips-defines.h"
#include "../../../mterp/common/jit-config.h"
#include <asm/regdef.h>
#include <asm/fpregdef.h>
#ifdef __mips_hard_float
#define HARD_FLOAT
#else
#define SOFT_FLOAT
#endif
/* MIPS definitions and declarations
reg nick purpose
s0 rPC interpreted program counter, used for fetching instructions
s1 rFP interpreted frame pointer, used for accessing locals and args
s2 rSELF pointer to thread
s3 rIBASE interpreted instruction base pointer, used for computed goto
s4 rINST first 16-bit code unit of current instruction
*/
/* register offsets */
#define r_ZERO 0
#define r_AT 1
#define r_V0 2
#define r_V1 3
#define r_A0 4
#define r_A1 5
#define r_A2 6
#define r_A3 7
#define r_T0 8
#define r_T1 9
#define r_T2 10
#define r_T3 11
#define r_T4 12
#define r_T5 13
#define r_T6 14
#define r_T7 15
#define r_S0 16
#define r_S1 17
#define r_S2 18
#define r_S3 19
#define r_S4 20
#define r_S5 21
#define r_S6 22
#define r_S7 23
#define r_T8 24
#define r_T9 25
#define r_K0 26
#define r_K1 27
#define r_GP 28
#define r_SP 29
#define r_FP 30
#define r_RA 31
#define r_F0 32
#define r_F1 33
#define r_F2 34
#define r_F3 35
#define r_F4 36
#define r_F5 37
#define r_F6 38
#define r_F7 39
#define r_F8 40
#define r_F9 41
#define r_F10 42
#define r_F11 43
#define r_F12 44
#define r_F13 45
#define r_F14 46
#define r_F15 47
#define r_F16 48
#define r_F17 49
#define r_F18 50
#define r_F19 51
#define r_F20 52
#define r_F21 53
#define r_F22 54
#define r_F23 55
#define r_F24 56
#define r_F25 57
#define r_F26 58
#define r_F27 59
#define r_F28 60
#define r_F29 61
#define r_F30 62
#define r_F31 63
/* single-purpose registers, given names for clarity */
#define rPC s0
#define rFP s1
#define rSELF s2
#define rIBASE s3
#define rINST s4
#define rOBJ s5
#define rBIX s6
#define rTEMP s7
/* The long arguments sent to function calls in Big-endian mode should be register
swapped when sent to functions in little endian mode. In other words long variable
sent as a0(MSW), a1(LSW) for a function call in LE mode should be sent as a1, a0 in
Big Endian mode */
#ifdef HAVE_LITTLE_ENDIAN
#define rARG0 a0
#define rARG1 a1
#define rARG2 a2
#define rARG3 a3
#define rRESULT0 v0
#define rRESULT1 v1
#else
#define rARG0 a1
#define rARG1 a0
#define rARG2 a3
#define rARG3 a2
#define rRESULT0 v1
#define rRESULT1 v0
#endif
/* save/restore the PC and/or FP from the thread struct */
#define LOAD_PC_FROM_SELF() lw rPC, offThread_pc(rSELF)
#define SAVE_PC_TO_SELF() sw rPC, offThread_pc(rSELF)
#define LOAD_FP_FROM_SELF() lw rFP, offThread_curFrame(rSELF)
#define SAVE_FP_TO_SELF() sw rFP, offThread_curFrame(rSELF)
#define EXPORT_PC() \
sw rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
#define SAVEAREA_FROM_FP(rd, _fpreg) \
subu rd, _fpreg, sizeofStackSaveArea
#define FETCH_INST() lhu rINST, (rPC)
#define FETCH_ADVANCE_INST(_count) lhu rINST, (_count*2)(rPC); \
addu rPC, rPC, (_count * 2)
#define FETCH_ADVANCE_INST_RB(rd) addu rPC, rPC, rd; \
lhu rINST, (rPC)
#define FETCH(rd, _count) lhu rd, (_count * 2)(rPC)
#define FETCH_S(rd, _count) lh rd, (_count * 2)(rPC)
#ifdef HAVE_LITTLE_ENDIAN
#define FETCH_B(rd, _count) lbu rd, (_count * 2)(rPC)
#define FETCH_C(rd, _count) lbu rd, (_count * 2 + 1)(rPC)
#else
#define FETCH_B(rd, _count) lbu rd, (_count * 2 + 1)(rPC)
#define FETCH_C(rd, _count) lbu rd, (_count * 2)(rPC)
#endif
#define GET_INST_OPCODE(rd) and rd, rINST, 0xFF
#define GOTO_OPCODE(rd) sll rd, rd, -1000; \
addu rd, rIBASE, rd; \
jr rd
#define LOAD(rd, rbase) lw rd, 0(rbase)
#define LOAD_F(rd, rbase) l.s rd, (rbase)
#define STORE(rd, rbase) sw rd, 0(rbase)
#define STORE_F(rd, rbase) s.s rd, (rbase)
#define GET_VREG(rd, rix) LOAD_eas2(rd,rFP,rix)
#define GET_VREG_F(rd, rix) EAS2(AT, rFP, rix); \
.set noat; l.s rd, (AT); .set at
#define SET_VREG(rd, rix) STORE_eas2(rd, rFP, rix)
#define SET_VREG_GOTO(rd, rix, dst) .set noreorder; \
sll dst, dst, -1000; \
addu dst, rIBASE, dst; \
sll t8, rix, 2; \
addu t8, t8, rFP; \
jr dst; \
sw rd, 0(t8); \
.set reorder
#define SET_VREG_F(rd, rix) EAS2(AT, rFP, rix); \
.set noat; s.s rd, (AT); .set at
#define GET_OPA(rd) srl rd, rINST, 8
#ifndef MIPS32R2
#define GET_OPA4(rd) GET_OPA(rd); and rd, 0xf
#else
#define GET_OPA4(rd) ext rd, rd, 8, 4
#endif
#define GET_OPB(rd) srl rd, rINST, 12
#define LOAD_rSELF_OFF(rd,off) lw rd, offThread_##off##(rSELF)
#define LOAD_rSELF_method(rd) LOAD_rSELF_OFF(rd, method)
#define LOAD_rSELF_methodClassDex(rd) LOAD_rSELF_OFF(rd, methodClassDex)
#define LOAD_rSELF_interpStackEnd(rd) LOAD_rSELF_OFF(rd, interpStackEnd)
#define LOAD_rSELF_retval(rd) LOAD_rSELF_OFF(rd, retval)
#define LOAD_rSELF_pActiveProfilers(rd) LOAD_rSELF_OFF(rd, pActiveProfilers)
#define LOAD_rSELF_bailPtr(rd) LOAD_rSELF_OFF(rd, bailPtr)
#define GET_JIT_PROF_TABLE(rd) LOAD_rSELF_OFF(rd,pJitProfTable)
#define GET_JIT_THRESHOLD(rd) LOAD_rSELF_OFF(rd,jitThreshold)
/*
* Form an Effective Address rd = rbase + roff<<n;
* Uses reg AT
*/
#define EASN(rd,rbase,roff,rshift) .set noat; \
sll AT, roff, rshift; \
addu rd, rbase, AT; \
.set at
#define EAS1(rd,rbase,roff) EASN(rd,rbase,roff,1)
#define EAS2(rd,rbase,roff) EASN(rd,rbase,roff,2)
#define EAS3(rd,rbase,roff) EASN(rd,rbase,roff,3)
#define EAS4(rd,rbase,roff) EASN(rd,rbase,roff,4)
/*
* Form an Effective Shift Right rd = rbase + roff>>n;
* Uses reg AT
*/
#define ESRN(rd,rbase,roff,rshift) .set noat; \
srl AT, roff, rshift; \
addu rd, rbase, AT; \
.set at
#define LOAD_eas2(rd,rbase,roff) EAS2(AT, rbase, roff); \
.set noat; lw rd, 0(AT); .set at
#define STORE_eas2(rd,rbase,roff) EAS2(AT, rbase, roff); \
.set noat; sw rd, 0(AT); .set at
#define LOAD_RB_OFF(rd,rbase,off) lw rd, off(rbase)
#define LOADu2_RB_OFF(rd,rbase,off) lhu rd, off(rbase)
#define STORE_RB_OFF(rd,rbase,off) sw rd, off(rbase)
#ifdef HAVE_LITTLE_ENDIAN
#define STORE64_off(rlo,rhi,rbase,off) sw rlo, off(rbase); \
sw rhi, (off+4)(rbase)
#define LOAD64_off(rlo,rhi,rbase,off) lw rlo, off(rbase); \
lw rhi, (off+4)(rbase)
#define STORE64_off_F(rlo,rhi,rbase,off) s.s rlo, off(rbase); \
s.s rhi, (off+4)(rbase)
#define LOAD64_off_F(rlo,rhi,rbase,off) l.s rlo, off(rbase); \
l.s rhi, (off+4)(rbase)
#else
#define STORE64_off(rlo,rhi,rbase,off) sw rlo, (off+4)(rbase); \
sw rhi, (off)(rbase)
#define LOAD64_off(rlo,rhi,rbase,off) lw rlo, (off+4)(rbase); \
lw rhi, (off)(rbase)
#define STORE64_off_F(rlo,rhi,rbase,off) s.s rlo, (off+4)(rbase); \
s.s rhi, (off)(rbase)
#define LOAD64_off_F(rlo,rhi,rbase,off) l.s rlo, (off+4)(rbase); \
l.s rhi, (off)(rbase)
#endif
#define STORE64(rlo,rhi,rbase) STORE64_off(rlo,rhi,rbase,0)
#define LOAD64(rlo,rhi,rbase) LOAD64_off(rlo,rhi,rbase,0)
#define STORE64_F(rlo,rhi,rbase) STORE64_off_F(rlo,rhi,rbase,0)
#define LOAD64_F(rlo,rhi,rbase) LOAD64_off_F(rlo,rhi,rbase,0)
#define STORE64_lo(rd,rbase) sw rd, 0(rbase)
#define STORE64_hi(rd,rbase) sw rd, 4(rbase)
#define LOAD_offThread_exception(rd,rbase) LOAD_RB_OFF(rd,rbase,offThread_exception)
#define LOAD_base_offArrayObject_length(rd,rbase) LOAD_RB_OFF(rd,rbase,offArrayObject_length)
#define LOAD_base_offClassObject_accessFlags(rd,rbase) LOAD_RB_OFF(rd,rbase,offClassObject_accessFlags)
#define LOAD_base_offClassObject_descriptor(rd,rbase) LOAD_RB_OFF(rd,rbase,offClassObject_descriptor)
#define LOAD_base_offClassObject_super(rd,rbase) LOAD_RB_OFF(rd,rbase,offClassObject_super)
#define LOAD_base_offClassObject_vtable(rd,rbase) LOAD_RB_OFF(rd,rbase,offClassObject_vtable)
#define LOAD_base_offClassObject_vtableCount(rd,rbase) LOAD_RB_OFF(rd,rbase,offClassObject_vtableCount)
#define LOAD_base_offDvmDex_pResClasses(rd,rbase) LOAD_RB_OFF(rd,rbase,offDvmDex_pResClasses)
#define LOAD_base_offDvmDex_pResFields(rd,rbase) LOAD_RB_OFF(rd,rbase,offDvmDex_pResFields)
#define LOAD_base_offDvmDex_pResMethods(rd,rbase) LOAD_RB_OFF(rd,rbase,offDvmDex_pResMethods)
#define LOAD_base_offDvmDex_pResStrings(rd,rbase) LOAD_RB_OFF(rd,rbase,offDvmDex_pResStrings)
#define LOAD_base_offInstField_byteOffset(rd,rbase) LOAD_RB_OFF(rd,rbase,offInstField_byteOffset)
#define LOAD_base_offStaticField_value(rd,rbase) LOAD_RB_OFF(rd,rbase,offStaticField_value)
#define LOAD_base_offMethod_clazz(rd,rbase) LOAD_RB_OFF(rd,rbase,offMethod_clazz)
#define LOAD_base_offMethod_name(rd,rbase) LOAD_RB_OFF(rd,rbase,offMethod_name)
#define LOAD_base_offObject_clazz(rd,rbase) LOAD_RB_OFF(rd,rbase,offObject_clazz)
#define LOADu2_offMethod_methodIndex(rd,rbase) LOADu2_RB_OFF(rd,rbase,offMethod_methodIndex)
#define STORE_offThread_exception(rd,rbase) STORE_RB_OFF(rd,rbase,offThread_exception)
#define STACK_STORE(rd,off) sw rd, off(sp)
#define STACK_LOAD(rd,off) lw rd, off(sp)
#define CREATE_STACK(n) subu sp, sp, n
#define DELETE_STACK(n) addu sp, sp, n
#define SAVE_RA(offset) STACK_STORE(ra, offset)
#define LOAD_RA(offset) STACK_LOAD(ra, offset)
#define LOAD_ADDR(dest,addr) la dest, addr
#define LOAD_IMM(dest, imm) li dest, imm
#define MOVE_REG(dest,src) move dest, src
#define RETURN jr ra
#define STACK_SIZE 128
#define STACK_OFFSET_ARG04 16
#define STACK_OFFSET_GP 84
#define STACK_OFFSET_rFP 112
/* This directive will make sure all subsequent jal restore gp at a known offset */
.cprestore STACK_OFFSET_GP
#define JAL(func) move rTEMP, ra; \
jal func; \
move ra, rTEMP
#define JALR(reg) move rTEMP, ra; \
jalr ra, reg; \
move ra, rTEMP
#define BAL(n) bal n
#define STACK_STORE_RA() CREATE_STACK(STACK_SIZE); \
STACK_STORE(gp, STACK_OFFSET_GP); \
STACK_STORE(ra, 124)
#define STACK_STORE_S0() STACK_STORE_RA(); \
STACK_STORE(s0, 116)
#define STACK_STORE_S0S1() STACK_STORE_S0(); \
STACK_STORE(s1, STACK_OFFSET_rFP)
#define STACK_LOAD_RA() STACK_LOAD(ra, 124); \
STACK_LOAD(gp, STACK_OFFSET_GP); \
DELETE_STACK(STACK_SIZE)
#define STACK_LOAD_S0() STACK_LOAD(s0, 116); \
STACK_LOAD_RA()
#define STACK_LOAD_S0S1() STACK_LOAD(s1, STACK_OFFSET_rFP); \
STACK_LOAD_S0()
#define STACK_STORE_FULL() CREATE_STACK(STACK_SIZE); \
STACK_STORE(ra, 124); \
STACK_STORE(fp, 120); \
STACK_STORE(s0, 116); \
STACK_STORE(s1, STACK_OFFSET_rFP); \
STACK_STORE(s2, 108); \
STACK_STORE(s3, 104); \
STACK_STORE(s4, 100); \
STACK_STORE(s5, 96); \
STACK_STORE(s6, 92); \
STACK_STORE(s7, 88);
#define STACK_LOAD_FULL() STACK_LOAD(gp, STACK_OFFSET_GP); \
STACK_LOAD(s7, 88); \
STACK_LOAD(s6, 92); \
STACK_LOAD(s5, 96); \
STACK_LOAD(s4, 100); \
STACK_LOAD(s3, 104); \
STACK_LOAD(s2, 108); \
STACK_LOAD(s1, STACK_OFFSET_rFP); \
STACK_LOAD(s0, 116); \
STACK_LOAD(fp, 120); \
STACK_LOAD(ra, 124); \
DELETE_STACK(STACK_SIZE)
/*
* first 8 words are reserved for function calls
* Maximum offset is STACK_OFFSET_SCRMX-STACK_OFFSET_SCR
*/
#define STACK_OFFSET_SCR 32
#define SCRATCH_STORE(r,off) \
STACK_STORE(r, STACK_OFFSET_SCR+off);
#define SCRATCH_LOAD(r,off) \
STACK_LOAD(r, STACK_OFFSET_SCR+off);
/* File: mips/platform.S */
/*
* ===========================================================================
* CPU-version-specific defines and utility
* ===========================================================================
*/
.global dvmCompilerTemplateStart
.type dvmCompilerTemplateStart, %function
.section .data.rel.ro
dvmCompilerTemplateStart:
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_CMP_LONG
dvmCompiler_TEMPLATE_CMP_LONG:
/* File: mips/TEMPLATE_CMP_LONG.S */
/*
* Compare two 64-bit values
* x = y return 0
* x < y return -1
* x > y return 1
*
* I think I can improve on the ARM code by the following observation
* slt t0, x.hi, y.hi; # (x.hi < y.hi) ? 1:0
* sgt t1, x.hi, y.hi; # (y.hi > x.hi) ? 1:0
* subu v0, t0, t1 # v0= -1:1:0 for [ < > = ]
*
* This code assumes the register pair ordering will depend on endianess (a1:a0 or a0:a1).
* a1:a0 => vBB
* a3:a2 => vCC
*/
/* cmp-long vAA, vBB, vCC */
slt t0, rARG1, rARG3 # compare hi
sgt t1, rARG1, rARG3
subu v0, t1, t0 # v0<- (-1,1,0)
bnez v0, .LTEMPLATE_CMP_LONG_finish
# at this point x.hi==y.hi
sltu t0, rARG0, rARG2 # compare lo
sgtu t1, rARG0, rARG2
subu v0, t1, t0 # v0<- (-1,1,0) for [< > =]
.LTEMPLATE_CMP_LONG_finish:
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_RETURN
dvmCompiler_TEMPLATE_RETURN:
/* File: mips/TEMPLATE_RETURN.S */
/*
* Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
* If the stored value in returnAddr
* is non-zero, the caller is compiled by the JIT thus return to the
* address in the code cache following the invoke instruction. Otherwise
* return to the special dvmJitToInterpNoChain entry point.
*/
#if defined(TEMPLATE_INLINE_PROFILING)
# preserve a0-a2 and ra
SCRATCH_STORE(a0, 0)
SCRATCH_STORE(a1, 4)
SCRATCH_STORE(a2, 8)
SCRATCH_STORE(ra, 12)
# a0=rSELF
move a0, rSELF
la t9, dvmFastMethodTraceExit
JALR(t9)
lw gp, STACK_OFFSET_GP(sp)
# restore a0-a2 and ra
SCRATCH_LOAD(ra, 12)
SCRATCH_LOAD(a2, 8)
SCRATCH_LOAD(a1, 4)
SCRATCH_LOAD(a0, 0)
#endif
SAVEAREA_FROM_FP(a0, rFP) # a0<- saveArea (old)
lw t0, offStackSaveArea_prevFrame(a0) # t0<- saveArea->prevFrame
lbu t1, offThread_breakFlags(rSELF) # t1<- breakFlags
lw rPC, offStackSaveArea_savedPc(a0) # rPC<- saveArea->savedPc
#if !defined(WITH_SELF_VERIFICATION)
lw t2, offStackSaveArea_returnAddr(a0) # t2<- chaining cell ret
#else
move t2, zero # disable chaining
#endif
lw a2, offStackSaveArea_method - sizeofStackSaveArea(t0)
# a2<- method we're returning to
#if !defined(WITH_SELF_VERIFICATION)
beq a2, zero, 1f # bail to interpreter
#else
bne a2, zero, 2f
JALR(ra) # punt to interpreter and compare state
# DOUG: assume this does not return ???
2:
#endif
la t4, .LdvmJitToInterpNoChainNoProfile # defined in footer.S
lw a1, (t4)
move rFP, t0 # publish new FP
beq a2, zero, 4f
lw t0, offMethod_clazz(a2) # t0<- method->clazz
4:
sw a2, offThread_method(rSELF) # self->method = newSave->method
lw a0, offClassObject_pDvmDex(t0) # a0<- method->clazz->pDvmDex
sw rFP, offThread_curFrame(rSELF) # self->curFrame = fp
add rPC, rPC, 3*2 # publish new rPC
sw a0, offThread_methodClassDex(rSELF)
movn t2, zero, t1 # check the breadFlags and
# clear the chaining cell address
sw t2, offThread_inJitCodeCache(rSELF) # in code cache or not
beq t2, zero, 3f # chaining cell exists?
JALR(t2) # jump to the chaining cell
# DOUG: assume this does not return ???
3:
#if defined(WITH_JIT_TUNING)
li a0, kCallsiteInterpreted
#endif
j a1 # callsite is interpreted
1:
sw zero, offThread_inJitCodeCache(rSELF) # reset inJitCodeCache
SAVE_PC_TO_SELF() # SAVE_PC_FP_TO_SELF()
SAVE_FP_TO_SELF()
la t4, .LdvmMterpStdBail # defined in footer.S
lw a2, (t4)
move a0, rSELF # Expecting rSELF in a0
JALR(a2) # exit the interpreter
# DOUG: assume this does not return ???
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT
dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT:
/* File: mips/TEMPLATE_INVOKE_METHOD_NO_OPT.S */
/*
* For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
* into rPC then jump to dvmJitToInterpNoChain to dispatch the
* runtime-resolved callee.
*/
# a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
lh t7, offMethod_registersSize(a0) # t7<- methodToCall->regsSize
lh a2, offMethod_outsSize(a0) # a2<- methodToCall->outsSize
lw t9, offThread_interpStackEnd(rSELF) # t9<- interpStackEnd
lbu t8, offThread_breakFlags(rSELF) # t8<- breakFlags
move a3, a1 # a3<- returnCell
SAVEAREA_FROM_FP(a1, rFP) # a1<- stack save area
sll t6, t7, 2 # multiply regsSize by 4 (4 bytes per reg)
sub a1, a1, t6 # a1<- newFp(old savearea-regsSize)
SAVEAREA_FROM_FP(t0, a1) # t0<- stack save area
sll t6, a2, 2 # multiply outsSize by 4 (4 bytes per reg)
sub t0, t0, t6 # t0<- bottom (newsave-outsSize)
bgeu t0, t9, 1f # bottom < interpStackEnd?
RETURN # return to raise stack overflow excep.
1:
# a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
lw t9, offMethod_clazz(a0) # t9<- methodToCall->clazz
lw t0, offMethod_accessFlags(a0) # t0<- methodToCall->accessFlags
sw rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
sw rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
lw rPC, offMethod_insns(a0) # rPC<- methodToCall->insns
# set up newSaveArea
sw rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
sw a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
sw a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
beqz t8, 2f # breakFlags != 0
RETURN # bail to the interpreter
2:
and t6, t0, ACC_NATIVE
beqz t6, 3f
#if !defined(WITH_SELF_VERIFICATION)
j .LinvokeNative
#else
RETURN # bail to the interpreter
#endif
3:
# continue executing the next instruction through the interpreter
la t0, .LdvmJitToInterpTraceSelectNoChain # defined in footer.S
lw rTEMP, (t0)
lw a3, offClassObject_pDvmDex(t9) # a3<- method->clazz->pDvmDex
# Update "thread" values for the new method
sw a0, offThread_method(rSELF) # self->method = methodToCall
sw a3, offThread_methodClassDex(rSELF) # self->methodClassDex = ...
move rFP, a1 # fp = newFp
sw rFP, offThread_curFrame(rSELF) # self->curFrame = newFp
#if defined(TEMPLATE_INLINE_PROFILING)
# preserve a0-a3
SCRATCH_STORE(a0, 0)
SCRATCH_STORE(a1, 4)
SCRATCH_STORE(a2, 8)
SCRATCH_STORE(a3, 12)
# a0=methodToCall, a1=rSELF
move a1, rSELF
la t9, dvmFastMethodTraceEnter
JALR(t9)
lw gp, STACK_OFFSET_GP(sp)
# restore a0-a3
SCRATCH_LOAD(a3, 12)
SCRATCH_LOAD(a2, 8)
SCRATCH_LOAD(a1, 4)
SCRATCH_LOAD(a0, 0)
#endif
# Start executing the callee
#if defined(WITH_JIT_TUNING)
li a0, kInlineCacheMiss
#endif
jr rTEMP # dvmJitToInterpTraceSelectNoChain
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN
dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN:
/* File: mips/TEMPLATE_INVOKE_METHOD_CHAIN.S */
/*
* For monomorphic callsite, setup the Dalvik frame and return to the
* Thumb code through the link register to transfer control to the callee
* method through a dedicated chaining cell.
*/
# a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
# methodToCall is guaranteed to be non-native
.LinvokeChain:
lh t7, offMethod_registersSize(a0) # t7<- methodToCall->regsSize
lh a2, offMethod_outsSize(a0) # a2<- methodToCall->outsSize
lw t9, offThread_interpStackEnd(rSELF) # t9<- interpStackEnd
lbu t8, offThread_breakFlags(rSELF) # t8<- breakFlags
move a3, a1 # a3<- returnCell
SAVEAREA_FROM_FP(a1, rFP) # a1<- stack save area
sll t6, t7, 2 # multiply regsSize by 4 (4 bytes per reg)
sub a1, a1, t6 # a1<- newFp(old savearea-regsSize)
SAVEAREA_FROM_FP(t0, a1) # t0<- stack save area
add t2, ra, 8 # setup the punt-to-interp address
# 8 bytes skips branch and delay slot
sll t6, a2, 2 # multiply outsSize by 4 (4 bytes per reg)
sub t0, t0, t6 # t0<- bottom (newsave-outsSize)
bgeu t0, t9, 1f # bottom < interpStackEnd?
jr t2 # return to raise stack overflow excep.
1:
# a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
lw t9, offMethod_clazz(a0) # t9<- methodToCall->clazz
sw rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
sw rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
lw rPC, offMethod_insns(a0) # rPC<- methodToCall->insns
# set up newSaveArea
sw rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
sw a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
sw a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
beqz t8, 2f # breakFlags != 0
jr t2 # bail to the interpreter
2:
lw a3, offClassObject_pDvmDex(t9) # a3<- methodToCall->clazz->pDvmDex
# Update "thread" values for the new method
sw a0, offThread_method(rSELF) # self->method = methodToCall
sw a3, offThread_methodClassDex(rSELF) # self->methodClassDex = ...
move rFP, a1 # fp = newFp
sw rFP, offThread_curFrame(rSELF) # self->curFrame = newFp
#if defined(TEMPLATE_INLINE_PROFILING)
# preserve a0-a2 and ra
SCRATCH_STORE(a0, 0)
SCRATCH_STORE(a1, 4)
SCRATCH_STORE(a2, 8)
SCRATCH_STORE(ra, 12)
move a1, rSELF
# a0=methodToCall, a1=rSELF
la t9, dvmFastMethodTraceEnter
jalr t9
lw gp, STACK_OFFSET_GP(sp)
# restore a0-a2 and ra
SCRATCH_LOAD(ra, 12)
SCRATCH_LOAD(a2, 8)
SCRATCH_LOAD(a1, 4)
SCRATCH_LOAD(a0, 0)
#endif
RETURN # return to the callee-chaining cell
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN
dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN:
/* File: mips/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */
/*
* For polymorphic callsite, check whether the cached class pointer matches
* the current one. If so setup the Dalvik frame and return to the
* Thumb code through the link register to transfer control to the callee
* method through a dedicated chaining cell.
*
* The predicted chaining cell is declared in ArmLIR.h with the
* following layout:
*
* typedef struct PredictedChainingCell {
* u4 branch;
* u4 delay_slot;
* const ClassObject *clazz;
* const Method *method;
* u4 counter;
* } PredictedChainingCell;
*
* Upon returning to the callsite:
* - lr : to branch to the chaining cell
* - lr+8 : to punt to the interpreter
* - lr+16: to fully resolve the callee and may rechain.
* a3 <- class
*/
# a0 = this, a1 = returnCell, a2 = predictedChainCell, rPC = dalvikCallsite
lw a3, offObject_clazz(a0) # a3 <- this->class
lw rIBASE, 8(a2) # t0 <- predictedChainCell->clazz
lw a0, 12(a2) # a0 <- predictedChainCell->method
lw t1, offThread_icRechainCount(rSELF) # t1 <- shared rechainCount
#if defined(WITH_JIT_TUNING)
la rINST, .LdvmICHitCount
#add t2, t2, 1
bne a3, rIBASE, 1f
nop
lw t2, 0(rINST)
add t2, t2, 1
sw t2, 0(rINST)
1:
#add t2, t2, 1
#endif
beq a3, rIBASE, .LinvokeChain # branch if predicted chain is valid
lw rINST, offClassObject_vtable(a3) # rINST <- this->class->vtable
beqz rIBASE, 2f # initialized class or not
sub a1, t1, 1 # count--
sw a1, offThread_icRechainCount(rSELF) # write back to InterpState
b 3f
2:
move a1, zero
3:
add ra, ra, 16 # return to fully-resolve landing pad
/*
* a1 <- count
* a2 <- &predictedChainCell
* a3 <- this->class
* rPC <- dPC
* rINST <- this->class->vtable
*/
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE
dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE:
/* File: mips/TEMPLATE_INVOKE_METHOD_NATIVE.S */
# a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
lh t7, offMethod_registersSize(a0) # t7<- methodToCall->regsSize
lw t9, offThread_interpStackEnd(rSELF) # t9<- interpStackEnd
lbu t8, offThread_breakFlags(rSELF) # t8<- breakFlags
move a3, a1 # a3<- returnCell
SAVEAREA_FROM_FP(a1, rFP) # a1<- stack save area
sll t6, t7, 2 # multiply regsSize by 4 (4 bytes per reg)
sub a1, a1, t6 # a1<- newFp(old savearea-regsSize)
SAVEAREA_FROM_FP(t0, a1) # t0<- stack save area
bgeu t0, t9, 1f # bottom < interpStackEnd?
RETURN # return to raise stack overflow excep.
1:
# a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
sw rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
sw rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
lw rPC, offMethod_insns(a0) # rPC<- methodToCall->insns
# set up newSaveArea
sw rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
sw a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
sw a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
lw rTEMP, offMethod_nativeFunc(a0) # t9<- method->nativeFunc
#if !defined(WITH_SELF_VERIFICATION)
beqz t8, 2f # breakFlags != 0
RETURN # bail to the interpreter
2:
#else
RETURN # bail to the interpreter unconditionally
#endif
# go ahead and transfer control to the native code
lw t6, offThread_jniLocal_topCookie(rSELF) # t6<- thread->localRef->...
sw a1, offThread_curFrame(rSELF) # self->curFrame = newFp
sw zero, offThread_inJitCodeCache(rSELF) # not in the jit code cache
sw t6, (offStackSaveArea_localRefCookie - sizeofStackSaveArea)(a1)
# newFp->localRefCookie=top
SAVEAREA_FROM_FP(rBIX, a1) # rBIX<- new stack save area
move a2, a0 # a2<- methodToCall
move a0, a1 # a0<- newFp
add a1, rSELF, offThread_retval # a1<- &retval
move a3, rSELF # a3<- self
#if defined(TEMPLATE_INLINE_PROFILING)
# a2: methodToCall
# preserve a0-a3
SCRATCH_STORE(a0, 0)
SCRATCH_STORE(a1, 4)
SCRATCH_STORE(a2, 8)
SCRATCH_STORE(a3, 12)
move a0, a2
move a1, rSELF
# a0=JNIMethod, a1=rSELF
la t9, dvmFastMethodTraceEnter
JALR(t9) # off to the native code
lw gp, STACK_OFFSET_GP(sp)
# restore a0-a3
SCRATCH_LOAD(a3, 12)
SCRATCH_LOAD(a2, 8)
SCRATCH_LOAD(a1, 4)
SCRATCH_LOAD(a0, 0)
move rOBJ, a2 # save a2
#endif
JALR(rTEMP) # off to the native code
lw gp, STACK_OFFSET_GP(sp)
#if defined(TEMPLATE_INLINE_PROFILING)
move a0, rOBJ
move a1, rSELF
# a0=JNIMethod, a1=rSELF
la t9, dvmFastNativeMethodTraceExit
JALR(t9)
lw gp, STACK_OFFSET_GP(sp)
#endif
# native return; rBIX=newSaveArea
# equivalent to dvmPopJniLocals
lw a2, offStackSaveArea_returnAddr(rBIX) # a2 = chaining cell ret addr
lw a0, offStackSaveArea_localRefCookie(rBIX) # a0<- saved->top
lw a1, offThread_exception(rSELF) # check for exception
sw rFP, offThread_curFrame(rSELF) # self->curFrame = fp
sw a0, offThread_jniLocal_topCookie(rSELF) # new top <- old top
lw a0, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
# a0 = dalvikCallsitePC
bnez a1, .LhandleException # handle exception if any
sw a2, offThread_inJitCodeCache(rSELF) # set the mode properly
beqz a2, 3f
jr a2 # go if return chaining cell still exist
3:
# continue executing the next instruction through the interpreter
la a1, .LdvmJitToInterpTraceSelectNoChain # defined in footer.S
lw a1, (a1)
add rPC, a0, 3*2 # reconstruct new rPC (advance 3 dalvik instr)
#if defined(WITH_JIT_TUNING)
li a0, kCallsiteInterpreted
#endif
jr a1
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_MUL_LONG
dvmCompiler_TEMPLATE_MUL_LONG:
/* File: mips/TEMPLATE_MUL_LONG.S */
/*
* Signed 64-bit integer multiply.
*
* For JIT: op1 in a0/a1, op2 in a2/a3, return in v0/v1
*
* Consider WXxYZ (a1a0 x a3a2) with a long multiply:
*
* a1 a0
* x a3 a2
* -------------
* a2a1 a2a0
* a3a0
* a3a1 (<= unused)
* ---------------
* v1 v0
*
*/
/* mul-long vAA, vBB, vCC */
mul rRESULT1,rARG3,rARG0 # v1= a3a0
multu rARG2,rARG0
mfhi t1
mflo rRESULT0 # v0= a2a0
mul t0,rARG2,rARG1 # t0= a2a1
addu rRESULT1,rRESULT1,t1 # v1= a3a0 + hi(a2a0)
addu rRESULT1,rRESULT1,t0 # v1= a3a0 + hi(a2a0) + a2a1;
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_SHL_LONG
dvmCompiler_TEMPLATE_SHL_LONG:
/* File: mips/TEMPLATE_SHL_LONG.S */
/*
* Long integer shift. This is different from the generic 32/64-bit
* binary operations because vAA/vBB are 64-bit but vCC (the shift
* distance) is 32-bit. Also, Dalvik requires us to ignore all but the low
* 6 bits.
*/
/* shl-long vAA:vBB(rARG1:rARG0), vCC(a2) - result in (rRESULT1:rRESULT0) */
sll rRESULT0, rARG0, a2 # rlo<- alo << (shift&31)
not rRESULT1, a2 # rhi<- 31-shift (shift is 5b)
srl rARG0, 1
srl rARG0, rRESULT1 # alo<- alo >> (32-(shift&31))
sll rRESULT1, rARG1, a2 # rhi<- ahi << (shift&31)
or rRESULT1, rARG0 # rhi<- rhi | alo
andi a2, 0x20 # shift< shift & 0x20
movn rRESULT1, rRESULT0, a2 # rhi<- rlo (if shift&0x20)
movn rRESULT0, zero, a2 # rlo<- 0 (if shift&0x20)
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_SHR_LONG
dvmCompiler_TEMPLATE_SHR_LONG:
/* File: mips/TEMPLATE_SHR_LONG.S */
/*
* Long integer shift. This is different from the generic 32/64-bit
* binary operations because vAA/vBB are 64-bit but vCC (the shift
* distance) is 32-bit. Also, Dalvik requires us to ignore all but the low
* 6 bits.
*/
/* shr-long vAA:vBB(rARG1:rARG0), vCC(a2) - result in (rRESULT1:rRESULT0) */
sra rRESULT1, rARG1, a2 # rhi<- ahi >> (shift&31)
srl rRESULT0, rARG0, a2 # rlo<- alo >> (shift&31)
sra a3, rARG1, 31 # a3<- sign(ah)
not rARG0, a2 # alo<- 31-shift (shift is 5b)
sll rARG1, 1
sll rARG1, rARG0 # ahi<- ahi << (32-(shift&31))
or rRESULT0, rARG1 # rlo<- rlo | ahi
andi a2, 0x20 # shift & 0x20
movn rRESULT0, rRESULT1, a2 # rlo<- rhi (if shift&0x20)
movn rRESULT1, a3, a2 # rhi<- sign(ahi) (if shift&0x20)
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_USHR_LONG
dvmCompiler_TEMPLATE_USHR_LONG:
/* File: mips/TEMPLATE_USHR_LONG.S */
/*
* Long integer shift. This is different from the generic 32/64-bit
* binary operations because vAA/vBB are 64-bit but vCC (the shift
* distance) is 32-bit. Also, Dalvik requires us to ignore all but the low
* 6 bits.
*/
/* ushr-long vAA:vBB(rARG1:rARG0), vCC(a2) - result in (rRESULT1:rRESULT0) */
srl rRESULT1, rARG1, a2 # rhi<- ahi >> (shift&31)
srl rRESULT0, rARG0, a2 # rlo<- alo >> (shift&31)
not rARG0, a2 # alo<- 31-n (shift is 5b)
sll rARG1, 1
sll rARG1, rARG0 # ahi<- ahi << (32-(shift&31))
or rRESULT0, rARG1 # rlo<- rlo | ahi
andi a2, 0x20 # shift & 0x20
movn rRESULT0, rRESULT1, a2 # rlo<- rhi (if shift&0x20)
movn rRESULT1, zero, a2 # rhi<- 0 (if shift&0x20)
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_ADD_FLOAT_VFP
dvmCompiler_TEMPLATE_ADD_FLOAT_VFP:
/* File: mips/TEMPLATE_ADD_FLOAT_VFP.S */
/* File: mips/fbinop.S */
/*
* Generic 32-bit binary float operation. a0 = a1 op a2.
*
* For: add-fp, sub-fp, mul-fp, div-fp
*
* On entry:
* a0 = target dalvik register address
* a1 = op1 address
* a2 = op2 address
*
* IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
*
*/
move rOBJ, a0 # save a0
#ifdef SOFT_FLOAT
LOAD(a0, a1) # a0<- vBB
LOAD(a1, a2) # a1<- vCC
.if 0
beqz a1, common_errDivideByZero # is second operand zero?
.endif
# optional op
JAL(__addsf3) # v0 = result
STORE(v0, rOBJ) # vAA <- v0
#else
LOAD_F(fa0, a1) # fa0<- vBB
LOAD_F(fa1, a2) # fa1<- vCC
.if 0
# is second operand zero?
li.s ft0, 0
c.eq.s fcc0, ft0, fa1 # condition bit and comparision with 0
bc1t fcc0, common_errDivideByZero
.endif
# optional op
add.s fv0, fa0, fa1 # fv0 = result
STORE_F(fv0, rOBJ) # vAA <- fv0
#endif
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_SUB_FLOAT_VFP
dvmCompiler_TEMPLATE_SUB_FLOAT_VFP:
/* File: mips/TEMPLATE_SUB_FLOAT_VFP.S */
/* File: mips/fbinop.S */
/*
* Generic 32-bit binary float operation. a0 = a1 op a2.
*
* For: add-fp, sub-fp, mul-fp, div-fp
*
* On entry:
* a0 = target dalvik register address
* a1 = op1 address
* a2 = op2 address
*
* IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
*
*/
move rOBJ, a0 # save a0
#ifdef SOFT_FLOAT
LOAD(a0, a1) # a0<- vBB
LOAD(a1, a2) # a1<- vCC
.if 0
beqz a1, common_errDivideByZero # is second operand zero?
.endif
# optional op
JAL(__subsf3) # v0 = result
STORE(v0, rOBJ) # vAA <- v0
#else
LOAD_F(fa0, a1) # fa0<- vBB
LOAD_F(fa1, a2) # fa1<- vCC
.if 0
# is second operand zero?
li.s ft0, 0
c.eq.s fcc0, ft0, fa1 # condition bit and comparision with 0
bc1t fcc0, common_errDivideByZero
.endif
# optional op
sub.s fv0, fa0, fa1 # fv0 = result
STORE_F(fv0, rOBJ) # vAA <- fv0
#endif
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_MUL_FLOAT_VFP
dvmCompiler_TEMPLATE_MUL_FLOAT_VFP:
/* File: mips/TEMPLATE_MUL_FLOAT_VFP.S */
/* File: mips/fbinop.S */
/*
* Generic 32-bit binary float operation. a0 = a1 op a2.
*
* For: add-fp, sub-fp, mul-fp, div-fp
*
* On entry:
* a0 = target dalvik register address
* a1 = op1 address
* a2 = op2 address
*
* IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
*
*/
move rOBJ, a0 # save a0
#ifdef SOFT_FLOAT
LOAD(a0, a1) # a0<- vBB
LOAD(a1, a2) # a1<- vCC
.if 0
beqz a1, common_errDivideByZero # is second operand zero?
.endif
# optional op
JAL(__mulsf3) # v0 = result
STORE(v0, rOBJ) # vAA <- v0
#else
LOAD_F(fa0, a1) # fa0<- vBB
LOAD_F(fa1, a2) # fa1<- vCC
.if 0
# is second operand zero?
li.s ft0, 0
c.eq.s fcc0, ft0, fa1 # condition bit and comparision with 0
bc1t fcc0, common_errDivideByZero
.endif
# optional op
mul.s fv0, fa0, fa1 # fv0 = result
STORE_F(fv0, rOBJ) # vAA <- fv0
#endif
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_DIV_FLOAT_VFP
dvmCompiler_TEMPLATE_DIV_FLOAT_VFP:
/* File: mips/TEMPLATE_DIV_FLOAT_VFP.S */
/* File: mips/fbinop.S */
/*
* Generic 32-bit binary float operation. a0 = a1 op a2.
*
* For: add-fp, sub-fp, mul-fp, div-fp
*
* On entry:
* a0 = target dalvik register address
* a1 = op1 address
* a2 = op2 address
*
* IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
*
*/
move rOBJ, a0 # save a0
#ifdef SOFT_FLOAT
LOAD(a0, a1) # a0<- vBB
LOAD(a1, a2) # a1<- vCC
.if 0
beqz a1, common_errDivideByZero # is second operand zero?
.endif
# optional op
JAL(__divsf3) # v0 = result
STORE(v0, rOBJ) # vAA <- v0
#else
LOAD_F(fa0, a1) # fa0<- vBB
LOAD_F(fa1, a2) # fa1<- vCC
.if 0
# is second operand zero?
li.s ft0, 0
c.eq.s fcc0, ft0, fa1 # condition bit and comparision with 0
bc1t fcc0, common_errDivideByZero
.endif
# optional op
div.s fv0, fa0, fa1 # fv0 = result
STORE_F(fv0, rOBJ) # vAA <- fv0
#endif
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP
dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP:
/* File: mips/TEMPLATE_ADD_DOUBLE_VFP.S */
/* File: mips/fbinopWide.S */
/*
* Generic 64-bit binary operation. Provide an "instr" line that
* specifies an instruction that performs "result = a0-a1 op a2-a3".
* This could be an MIPS instruction or a function call.
* If "chkzero" is set to 1, we perform a divide-by-zero check on
* vCC (a1). Useful for integer division and modulus.
*
* for: add-long, sub-long, div-long, rem-long, and-long, or-long,
* xor-long, add-double, sub-double, mul-double, div-double,
* rem-double
*
* On entry:
* a0 = target dalvik register address
* a1 = op1 address
* a2 = op2 address
*
* IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
*/
move rOBJ, a0 # save a0
#ifdef SOFT_FLOAT
move t0, a1 # save a1
move t1, a2 # save a2
LOAD64(rARG0, rARG1, t0) # a0/a1<- vBB/vBB+1
LOAD64(rARG2, rARG3, t1) # a2/a3<- vCC/vCC+1
.if 0
or t0, rARG2, rARG3 # second arg (a2-a3) is zero?
beqz t0, common_errDivideByZero
.endif
# optional op
JAL(__adddf3) # result<- op, a0-a3 changed
STORE64(rRESULT0, rRESULT1, rOBJ)
#else
LOAD64_F(fa0, fa0f, a1)
LOAD64_F(fa1, fa1f, a2)
.if 0
li.d ft0, 0
c.eq.d fcc0, fa1, ft0
bc1t fcc0, common_errDivideByZero
.endif
# optional op
add.d fv0, fa0, fa1
STORE64_F(fv0, fv0f, rOBJ)
#endif
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP
dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP:
/* File: mips/TEMPLATE_SUB_DOUBLE_VFP.S */
/* File: mips/fbinopWide.S */
/*
* Generic 64-bit binary operation. Provide an "instr" line that
* specifies an instruction that performs "result = a0-a1 op a2-a3".
* This could be an MIPS instruction or a function call.
* If "chkzero" is set to 1, we perform a divide-by-zero check on
* vCC (a1). Useful for integer division and modulus.
*
* for: add-long, sub-long, div-long, rem-long, and-long, or-long,
* xor-long, add-double, sub-double, mul-double, div-double,
* rem-double
*
* On entry:
* a0 = target dalvik register address
* a1 = op1 address
* a2 = op2 address
*
* IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
*/
move rOBJ, a0 # save a0
#ifdef SOFT_FLOAT
move t0, a1 # save a1
move t1, a2 # save a2
LOAD64(rARG0, rARG1, t0) # a0/a1<- vBB/vBB+1
LOAD64(rARG2, rARG3, t1) # a2/a3<- vCC/vCC+1
.if 0
or t0, rARG2, rARG3 # second arg (a2-a3) is zero?
beqz t0, common_errDivideByZero
.endif
# optional op
JAL(__subdf3) # result<- op, a0-a3 changed
STORE64(rRESULT0, rRESULT1, rOBJ)
#else
LOAD64_F(fa0, fa0f, a1)
LOAD64_F(fa1, fa1f, a2)
.if 0
li.d ft0, 0
c.eq.d fcc0, fa1, ft0
bc1t fcc0, common_errDivideByZero
.endif
# optional op
sub.d fv0, fa0, fa1
STORE64_F(fv0, fv0f, rOBJ)
#endif
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP
dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP:
/* File: mips/TEMPLATE_MUL_DOUBLE_VFP.S */
/* File: mips/fbinopWide.S */
/*
* Generic 64-bit binary operation. Provide an "instr" line that
* specifies an instruction that performs "result = a0-a1 op a2-a3".
* This could be an MIPS instruction or a function call.
* If "chkzero" is set to 1, we perform a divide-by-zero check on
* vCC (a1). Useful for integer division and modulus.
*
* for: add-long, sub-long, div-long, rem-long, and-long, or-long,
* xor-long, add-double, sub-double, mul-double, div-double,
* rem-double
*
* On entry:
* a0 = target dalvik register address
* a1 = op1 address
* a2 = op2 address
*
* IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
*/
move rOBJ, a0 # save a0
#ifdef SOFT_FLOAT
move t0, a1 # save a1
move t1, a2 # save a2
LOAD64(rARG0, rARG1, t0) # a0/a1<- vBB/vBB+1
LOAD64(rARG2, rARG3, t1) # a2/a3<- vCC/vCC+1
.if 0
or t0, rARG2, rARG3 # second arg (a2-a3) is zero?
beqz t0, common_errDivideByZero
.endif
# optional op
JAL(__muldf3) # result<- op, a0-a3 changed
STORE64(rRESULT0, rRESULT1, rOBJ)
#else
LOAD64_F(fa0, fa0f, a1)
LOAD64_F(fa1, fa1f, a2)
.if 0
li.d ft0, 0
c.eq.d fcc0, fa1, ft0
bc1t fcc0, common_errDivideByZero
.endif
# optional op
mul.d fv0, fa0, fa1
STORE64_F(fv0, fv0f, rOBJ)
#endif
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP
dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP:
/* File: mips/TEMPLATE_DIV_DOUBLE_VFP.S */
/* File: mips/fbinopWide.S */
/*
* Generic 64-bit binary operation. Provide an "instr" line that
* specifies an instruction that performs "result = a0-a1 op a2-a3".
* This could be an MIPS instruction or a function call.
* If "chkzero" is set to 1, we perform a divide-by-zero check on
* vCC (a1). Useful for integer division and modulus.
*
* for: add-long, sub-long, div-long, rem-long, and-long, or-long,
* xor-long, add-double, sub-double, mul-double, div-double,
* rem-double
*
* On entry:
* a0 = target dalvik register address
* a1 = op1 address
* a2 = op2 address
*
* IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
*/
move rOBJ, a0 # save a0
#ifdef SOFT_FLOAT
move t0, a1 # save a1
move t1, a2 # save a2
LOAD64(rARG0, rARG1, t0) # a0/a1<- vBB/vBB+1
LOAD64(rARG2, rARG3, t1) # a2/a3<- vCC/vCC+1
.if 0
or t0, rARG2, rARG3 # second arg (a2-a3) is zero?
beqz t0, common_errDivideByZero
.endif
# optional op
JAL(__divdf3) # result<- op, a0-a3 changed
STORE64(rRESULT0, rRESULT1, rOBJ)
#else
LOAD64_F(fa0, fa0f, a1)
LOAD64_F(fa1, fa1f, a2)
.if 0
li.d ft0, 0
c.eq.d fcc0, fa1, ft0
bc1t fcc0, common_errDivideByZero
.endif
# optional op
div.d fv0, fa0, fa1
STORE64_F(fv0, fv0f, rOBJ)
#endif
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP
dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP:
/* File: mips/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S */
/* File: mips/funopNarrower.S */
/*
* Generic 64bit-to-32bit unary operation. Provide an "instr" line
* that specifies an instruction that performs "result = op a0/a1", where
* "result" is a 32-bit quantity in a0.
*
* For: long-to-float, double-to-int, double-to-float
* If hard floating point support is available, use fa0 as the parameter, except for
* long-to-float opcode.
* (This would work for long-to-int, but that instruction is actually
* an exact match for OP_MOVE.)
*
* On entry:
* a0 = target dalvik register address
* a1 = src dalvik register address
*
*/
move rINST, a0 # save a0
#ifdef SOFT_FLOAT
move t0, a1 # save a1
LOAD64(rARG0, rARG1, t0) # a0/a1<- vB/vB+1
# optional op
JAL(__truncdfsf2) # v0<- op, a0-a3 changed
.LTEMPLATE_DOUBLE_TO_FLOAT_VFP_set_vreg:
STORE(v0, rINST) # vA<- v0
#else
LOAD64_F(fa0, fa0f, a1)
# optional op
cvt.s.d fv0,fa0 # fv0 = result
.LTEMPLATE_DOUBLE_TO_FLOAT_VFP_set_vreg_f:
STORE_F(fv0, rINST) # vA<- fv0
#endif
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP
dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP:
/* File: mips/TEMPLATE_DOUBLE_TO_INT_VFP.S */
/* File: mips/funopNarrower.S */
/*
* Generic 64bit-to-32bit unary operation. Provide an "instr" line
* that specifies an instruction that performs "result = op a0/a1", where
* "result" is a 32-bit quantity in a0.
*
* For: long-to-float, double-to-int, double-to-float
* If hard floating point support is available, use fa0 as the parameter, except for
* long-to-float opcode.
* (This would work for long-to-int, but that instruction is actually
* an exact match for OP_MOVE.)
*
* On entry:
* a0 = target dalvik register address
* a1 = src dalvik register address
*
*/
move rINST, a0 # save a0
#ifdef SOFT_FLOAT
move t0, a1 # save a1
LOAD64(rARG0, rARG1, t0) # a0/a1<- vB/vB+1
# optional op
b d2i_doconv # v0<- op, a0-a3 changed
.LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg:
STORE(v0, rINST) # vA<- v0
#else
LOAD64_F(fa0, fa0f, a1)
# optional op
b d2i_doconv # fv0 = result
.LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f:
STORE_F(fv0, rINST) # vA<- fv0
#endif
RETURN
/*
* Convert the double in a0/a1 to an int in a0.
*
* We have to clip values to int min/max per the specification. The
* expected common case is a "reasonable" value that converts directly
* to modest integer. The EABI convert function isn't doing this for us.
* Use rBIX / rOBJ as global to hold arguments (they are not bound to a global var)
*/
d2i_doconv:
#ifdef SOFT_FLOAT
la t0, .LDOUBLE_TO_INT_max
LOAD64(rARG2, rARG3, t0)
move rBIX, rARG0 # save a0
move rOBJ, rARG1 # and a1
JAL(__gedf2) # is arg >= maxint?
move t0, v0
li v0, ~0x80000000 # return maxint (7fffffff)
bgez t0, .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg # nonzero == yes
move rARG0, rBIX # recover arg
move rARG1, rOBJ
la t0, .LDOUBLE_TO_INT_min
LOAD64(rARG2, rARG3, t0)
JAL(__ledf2) # is arg <= minint?
move t0, v0
li v0, 0x80000000 # return minint (80000000)
blez t0, .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg # nonzero == yes
move rARG0, rBIX # recover arg
move rARG1, rOBJ
move rARG2, rBIX # compare against self
move rARG3, rOBJ
JAL(__nedf2) # is arg == self?
move t0, v0 # zero == no
li v0, 0
bnez t0, .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg # return zero for NaN
move rARG0, rBIX # recover arg
move rARG1, rOBJ
JAL(__fixdfsi) # convert double to int
b .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg
#else
la t0, .LDOUBLE_TO_INT_max
LOAD64_F(fa1, fa1f, t0)
c.ole.d fcc0, fa1, fa0
l.s fv0, .LDOUBLE_TO_INT_maxret
bc1t .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f
la t0, .LDOUBLE_TO_INT_min
LOAD64_F(fa1, fa1f, t0)
c.ole.d fcc0, fa0, fa1
l.s fv0, .LDOUBLE_TO_INT_minret
bc1t .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f
mov.d fa1, fa0
c.un.d fcc0, fa0, fa1
li.s fv0, 0
bc1t .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f
trunc.w.d fv0, fa0
b .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f
#endif
.LDOUBLE_TO_INT_max:
.dword 0x41dfffffffc00000
.LDOUBLE_TO_INT_min:
.dword 0xc1e0000000000000 # minint, as a double (high word)
.LDOUBLE_TO_INT_maxret:
.word 0x7fffffff
.LDOUBLE_TO_INT_minret:
.word 0x80000000
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP
dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP:
/* File: mips/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S */
/* File: mips/funopWider.S */
/*
* Generic 32bit-to-64bit floating point unary operation. Provide an
* "instr" line that specifies an instruction that performs "d0 = op s0".
*
* For: int-to-double, float-to-double
*
* On entry:
* a0 = target dalvik register address
* a1 = src dalvik register address
*/
/* unop vA, vB */
move rOBJ, a0 # save a0
#ifdef SOFT_FLOAT
LOAD(a0, a1) # a0<- vB
# optional op
JAL(__extendsfdf2) # result<- op, a0-a3 changed
.LTEMPLATE_FLOAT_TO_DOUBLE_VFP_set_vreg:
STORE64(rRESULT0, rRESULT1, rOBJ) # vA/vA+1<- v0/v1
#else
LOAD_F(fa0, a1) # fa0<- vB
# optional op
cvt.d.s fv0, fa0
.LTEMPLATE_FLOAT_TO_DOUBLE_VFP_set_vreg:
STORE64_F(fv0, fv0f, rOBJ) # vA/vA+1<- fv0/fv0f
#endif
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP
dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP:
/* File: mips/TEMPLATE_FLOAT_TO_INT_VFP.S */
/* File: mips/funop.S */
/*
* Generic 32-bit unary operation. Provide an "instr" line that
* specifies an instruction that performs "result = op a0".
* This could be a MIPS instruction or a function call.
*
* for: int-to-float, float-to-int
*
* On entry:
* a0 = target dalvik register address
* a1 = src dalvik register address
*
* IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
*
*/
move rOBJ, a0 # save a0
#ifdef SOFT_FLOAT
LOAD(a0, a1) # a0<- vBB
# optional op
b f2i_doconv # v0<- op, a0-a3 changed
.LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg:
STORE(v0, rOBJ) # vAA<- v0
#else
LOAD_F(fa0, a1) # fa0<- vBB
# optional op
b f2i_doconv # fv0 = result
.LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f:
STORE_F(fv0, rOBJ) # vAA <- fv0
#endif
RETURN
/*
* Not an entry point as it is used only once !!
*/
f2i_doconv:
#ifdef SOFT_FLOAT
li a1, 0x4f000000 # (float)maxint
move rBIX, a0
JAL(__gesf2) # is arg >= maxint?
move t0, v0
li v0, ~0x80000000 # return maxint (7fffffff)
bgez t0, .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg
move a0, rBIX # recover arg
li a1, 0xcf000000 # (float)minint
JAL(__lesf2)
move t0, v0
li v0, 0x80000000 # return minint (80000000)
blez t0, .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg
move a0, rBIX
move a1, rBIX
JAL(__nesf2)
move t0, v0
li v0, 0 # return zero for NaN
bnez t0, .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg
move a0, rBIX
JAL(__fixsfsi)
b .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg
#else
l.s fa1, .LFLOAT_TO_INT_max
c.ole.s fcc0, fa1, fa0
l.s fv0, .LFLOAT_TO_INT_ret_max
bc1t .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f
l.s fa1, .LFLOAT_TO_INT_min
c.ole.s fcc0, fa0, fa1
l.s fv0, .LFLOAT_TO_INT_ret_min
bc1t .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f
mov.s fa1, fa0
c.un.s fcc0, fa0, fa1
li.s fv0, 0
bc1t .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f
trunc.w.s fv0, fa0
b .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f
#endif
.LFLOAT_TO_INT_max:
.word 0x4f000000
.LFLOAT_TO_INT_min:
.word 0xcf000000
.LFLOAT_TO_INT_ret_max:
.word 0x7fffffff
.LFLOAT_TO_INT_ret_min:
.word 0x80000000
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP
dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP:
/* File: mips/TEMPLATE_INT_TO_DOUBLE_VFP.S */
/* File: mips/funopWider.S */
/*
* Generic 32bit-to-64bit floating point unary operation. Provide an
* "instr" line that specifies an instruction that performs "d0 = op s0".
*
* For: int-to-double, float-to-double
*
* On entry:
* a0 = target dalvik register address
* a1 = src dalvik register address
*/
/* unop vA, vB */
move rOBJ, a0 # save a0
#ifdef SOFT_FLOAT
LOAD(a0, a1) # a0<- vB
# optional op
JAL(__floatsidf) # result<- op, a0-a3 changed
.LTEMPLATE_INT_TO_DOUBLE_VFP_set_vreg:
STORE64(rRESULT0, rRESULT1, rOBJ) # vA/vA+1<- v0/v1
#else
LOAD_F(fa0, a1) # fa0<- vB
# optional op
cvt.d.w fv0, fa0
.LTEMPLATE_INT_TO_DOUBLE_VFP_set_vreg:
STORE64_F(fv0, fv0f, rOBJ) # vA/vA+1<- fv0/fv0f
#endif
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP
dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP:
/* File: mips/TEMPLATE_INT_TO_FLOAT_VFP.S */
/* File: mips/funop.S */
/*
* Generic 32-bit unary operation. Provide an "instr" line that
* specifies an instruction that performs "result = op a0".
* This could be a MIPS instruction or a function call.
*
* for: int-to-float, float-to-int
*
* On entry:
* a0 = target dalvik register address
* a1 = src dalvik register address
*
* IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
*
*/
move rOBJ, a0 # save a0
#ifdef SOFT_FLOAT
LOAD(a0, a1) # a0<- vBB
# optional op
JAL(__floatsisf) # v0<- op, a0-a3 changed
.LTEMPLATE_INT_TO_FLOAT_VFP_set_vreg:
STORE(v0, rOBJ) # vAA<- v0
#else
LOAD_F(fa0, a1) # fa0<- vBB
# optional op
cvt.s.w fv0, fa0 # fv0 = result
.LTEMPLATE_INT_TO_FLOAT_VFP_set_vreg_f:
STORE_F(fv0, rOBJ) # vAA <- fv0
#endif
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP
dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP:
/* File: mips/TEMPLATE_CMPG_DOUBLE_VFP.S */
/* File: mips/TEMPLATE_CMPL_DOUBLE_VFP.S */
/*
* Compare two double precision floating-point values. Puts 0, 1, or -1 into the
* destination register based on the results of the comparison.
*
* Provide a "naninst" instruction that puts 1 or -1 into a1 depending
* on what value we'd like to return when one of the operands is NaN.
*
* The operation we're implementing is:
* if (x == y)
* return 0;
* else if (x < y)
* return -1;
* else if (x > y)
* return 1;
* else
* return {-1,1}; // one or both operands was NaN
*
* On entry:
* a0 = &op1 [vBB]
* a1 = &op2 [vCC]
*
* for: cmpl-double, cmpg-double
*/
/* op vAA, vBB, vCC */
/* "clasic" form */
#ifdef SOFT_FLOAT
move rOBJ, a0 # save a0
move rBIX, a1 # save a1
LOAD64(rARG0, rARG1, rOBJ) # a0/a1<- vBB/vBB+1
LOAD64(rARG2, rARG3, rBIX) # a2/a3<- vCC/vCC+1
JAL(__eqdf2) # v0<- (vBB == vCC)
li rTEMP, 0 # vAA<- 0
beqz v0, TEMPLATE_CMPG_DOUBLE_VFP_finish
LOAD64(rARG0, rARG1, rOBJ) # a0/a1<- vBB/vBB+1
LOAD64(rARG2, rARG3, rBIX) # a2/a3<- vCC/vCC+1
JAL(__ltdf2) # a0<- (vBB < vCC)
li rTEMP, -1 # vAA<- -1
bltz v0, TEMPLATE_CMPG_DOUBLE_VFP_finish
LOAD64(rARG0, rARG1, rOBJ) # a0/a1<- vBB/vBB+1
LOAD64(rARG2, rARG3, rBIX) # a2/a3<- vCC/vCC+1
JAL(__gtdf2) # v0<- (vBB > vCC)
li rTEMP, 1 # vAA<- 1
bgtz v0, TEMPLATE_CMPG_DOUBLE_VFP_finish
#else
LOAD64_F(fs0, fs0f, a0) # fs0<- vBB
LOAD64_F(fs1, fs1f, a1) # fs1<- vCC
c.olt.d fcc0, fs0, fs1 # Is fs0 < fs1
li rTEMP, -1
bc1t fcc0, TEMPLATE_CMPG_DOUBLE_VFP_finish
c.olt.d fcc0, fs1, fs0
li rTEMP, 1
bc1t fcc0, TEMPLATE_CMPG_DOUBLE_VFP_finish
c.eq.d fcc0, fs0, fs1
li rTEMP, 0
bc1t fcc0, TEMPLATE_CMPG_DOUBLE_VFP_finish
#endif
li rTEMP, 1
TEMPLATE_CMPG_DOUBLE_VFP_finish:
move v0, rTEMP # v0<- vAA
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP
dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP:
/* File: mips/TEMPLATE_CMPL_DOUBLE_VFP.S */
/*
* Compare two double precision floating-point values. Puts 0, 1, or -1 into the
* destination register based on the results of the comparison.
*
* Provide a "naninst" instruction that puts 1 or -1 into a1 depending
* on what value we'd like to return when one of the operands is NaN.
*
* The operation we're implementing is:
* if (x == y)
* return 0;
* else if (x < y)
* return -1;
* else if (x > y)
* return 1;
* else
* return {-1,1}; // one or both operands was NaN
*
* On entry:
* a0 = &op1 [vBB]
* a1 = &op2 [vCC]
*
* for: cmpl-double, cmpg-double
*/
/* op vAA, vBB, vCC */
/* "clasic" form */
#ifdef SOFT_FLOAT
move rOBJ, a0 # save a0
move rBIX, a1 # save a1
LOAD64(rARG0, rARG1, rOBJ) # a0/a1<- vBB/vBB+1
LOAD64(rARG2, rARG3, rBIX) # a2/a3<- vCC/vCC+1
JAL(__eqdf2) # v0<- (vBB == vCC)
li rTEMP, 0 # vAA<- 0
beqz v0, TEMPLATE_CMPL_DOUBLE_VFP_finish
LOAD64(rARG0, rARG1, rOBJ) # a0/a1<- vBB/vBB+1
LOAD64(rARG2, rARG3, rBIX) # a2/a3<- vCC/vCC+1
JAL(__ltdf2) # a0<- (vBB < vCC)
li rTEMP, -1 # vAA<- -1
bltz v0, TEMPLATE_CMPL_DOUBLE_VFP_finish
LOAD64(rARG0, rARG1, rOBJ) # a0/a1<- vBB/vBB+1
LOAD64(rARG2, rARG3, rBIX) # a2/a3<- vCC/vCC+1
JAL(__gtdf2) # v0<- (vBB > vCC)
li rTEMP, 1 # vAA<- 1
bgtz v0, TEMPLATE_CMPL_DOUBLE_VFP_finish
#else
LOAD64_F(fs0, fs0f, a0) # fs0<- vBB
LOAD64_F(fs1, fs1f, a1) # fs1<- vCC
c.olt.d fcc0, fs0, fs1 # Is fs0 < fs1
li rTEMP, -1
bc1t fcc0, TEMPLATE_CMPL_DOUBLE_VFP_finish
c.olt.d fcc0, fs1, fs0
li rTEMP, 1
bc1t fcc0, TEMPLATE_CMPL_DOUBLE_VFP_finish
c.eq.d fcc0, fs0, fs1
li rTEMP, 0
bc1t fcc0, TEMPLATE_CMPL_DOUBLE_VFP_finish
#endif
li rTEMP, -1
TEMPLATE_CMPL_DOUBLE_VFP_finish:
move v0, rTEMP # v0<- vAA
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP
dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP:
/* File: mips/TEMPLATE_CMPG_FLOAT_VFP.S */
/* File: mips/TEMPLATE_CMPL_FLOAT_VFP.S */
/*
* Compare two floating-point values. Puts 0, 1, or -1 into the
* destination register based on the results of the comparison.
*
* Provide a "naninst" instruction that puts 1 or -1 into a1 depending
* on what value we'd like to return when one of the operands is NaN.
*
* The operation we're implementing is:
* if (x == y)
* return 0;
* else if (x < y)
* return -1;
* else if (x > y)
* return 1;
* else
* return {-1,1}; // one or both operands was NaN
*
* On entry:
* a0 = &op1 [vBB]
* a1 = &op2 [vCC]
*
* for: cmpl-float, cmpg-float
*/
/* op vAA, vBB, vCC */
/* "clasic" form */
#ifdef SOFT_FLOAT
LOAD(rOBJ, a0) # rOBJ<- vBB
LOAD(rBIX, a1) # rBIX<- vCC
move a0, rOBJ # a0<- vBB
move a1, rBIX # a1<- vCC
JAL(__eqsf2) # v0<- (vBB == vCC)
li rTEMP, 0 # vAA<- 0
beqz v0, TEMPLATE_CMPG_FLOAT_VFP_finish
move a0, rOBJ # a0<- vBB
move a1, rBIX # a1<- vCC
JAL(__ltsf2) # a0<- (vBB < vCC)
li rTEMP, -1 # vAA<- -1
bltz v0, TEMPLATE_CMPG_FLOAT_VFP_finish
move a0, rOBJ # a0<- vBB
move a1, rBIX # a1<- vCC
JAL(__gtsf2) # v0<- (vBB > vCC)
li rTEMP, 1 # vAA<- 1
bgtz v0, TEMPLATE_CMPG_FLOAT_VFP_finish
#else
LOAD_F(fs0, a0) # fs0<- vBB
LOAD_F(fs1, a1) # fs1<- vCC
c.olt.s fcc0, fs0, fs1 #Is fs0 < fs1
li rTEMP, -1
bc1t fcc0, TEMPLATE_CMPG_FLOAT_VFP_finish
c.olt.s fcc0, fs1, fs0
li rTEMP, 1
bc1t fcc0, TEMPLATE_CMPG_FLOAT_VFP_finish
c.eq.s fcc0, fs0, fs1
li rTEMP, 0
bc1t fcc0, TEMPLATE_CMPG_FLOAT_VFP_finish
#endif
li rTEMP, 1
TEMPLATE_CMPG_FLOAT_VFP_finish:
move v0, rTEMP # v0<- vAA
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP
dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP:
/* File: mips/TEMPLATE_CMPL_FLOAT_VFP.S */
/*
* Compare two floating-point values. Puts 0, 1, or -1 into the
* destination register based on the results of the comparison.
*
* Provide a "naninst" instruction that puts 1 or -1 into a1 depending
* on what value we'd like to return when one of the operands is NaN.
*
* The operation we're implementing is:
* if (x == y)
* return 0;
* else if (x < y)
* return -1;
* else if (x > y)
* return 1;
* else
* return {-1,1}; // one or both operands was NaN
*
* On entry:
* a0 = &op1 [vBB]
* a1 = &op2 [vCC]
*
* for: cmpl-float, cmpg-float
*/
/* op vAA, vBB, vCC */
/* "clasic" form */
#ifdef SOFT_FLOAT
LOAD(rOBJ, a0) # rOBJ<- vBB
LOAD(rBIX, a1) # rBIX<- vCC
move a0, rOBJ # a0<- vBB
move a1, rBIX # a1<- vCC
JAL(__eqsf2) # v0<- (vBB == vCC)
li rTEMP, 0 # vAA<- 0
beqz v0, TEMPLATE_CMPL_FLOAT_VFP_finish
move a0, rOBJ # a0<- vBB
move a1, rBIX # a1<- vCC
JAL(__ltsf2) # a0<- (vBB < vCC)
li rTEMP, -1 # vAA<- -1
bltz v0, TEMPLATE_CMPL_FLOAT_VFP_finish
move a0, rOBJ # a0<- vBB
move a1, rBIX # a1<- vCC
JAL(__gtsf2) # v0<- (vBB > vCC)
li rTEMP, 1 # vAA<- 1
bgtz v0, TEMPLATE_CMPL_FLOAT_VFP_finish
#else
LOAD_F(fs0, a0) # fs0<- vBB
LOAD_F(fs1, a1) # fs1<- vCC
c.olt.s fcc0, fs0, fs1 #Is fs0 < fs1
li rTEMP, -1
bc1t fcc0, TEMPLATE_CMPL_FLOAT_VFP_finish
c.olt.s fcc0, fs1, fs0
li rTEMP, 1
bc1t fcc0, TEMPLATE_CMPL_FLOAT_VFP_finish
c.eq.s fcc0, fs0, fs1
li rTEMP, 0
bc1t fcc0, TEMPLATE_CMPL_FLOAT_VFP_finish
#endif
li rTEMP, -1
TEMPLATE_CMPL_FLOAT_VFP_finish:
move v0, rTEMP # v0<- vAA
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP
dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP:
/* File: mips/TEMPLATE_SQRT_DOUBLE_VFP.S */
/*
* 64-bit floating point sqrt operation.
* If the result is a NaN, bail out to library code to do
* the right thing.
*
* On entry:
* a2 src addr of op1
* On exit:
* v0,v1/fv0 = res
*/
#ifdef SOFT_FLOAT
LOAD64(rARG0, rARG1, a2) # a0/a1<- vBB/vBB+1
#else
LOAD64_F(fa0, fa0f, a2) # fa0/fa0f<- vBB/vBB+1
sqrt.d fv0, fa0
c.eq.d fv0, fv0
bc1t 1f
#endif
JAL(sqrt)
1:
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON
dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON:
/* File: mips/TEMPLATE_THROW_EXCEPTION_COMMON.S */
/*
* Throw an exception from JIT'ed code.
* On entry:
* a0 Dalvik PC that raises the exception
*/
j .LhandleException
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_MEM_OP_DECODE
dvmCompiler_TEMPLATE_MEM_OP_DECODE:
/* File: mips/TEMPLATE_MEM_OP_DECODE.S */
#if defined(WITH_SELF_VERIFICATION)
/*
* This handler encapsulates heap memory ops for selfVerification mode.
*
* The call to the handler is inserted prior to a heap memory operation.
* This handler then calls a function to decode the memory op, and process
* it accordingly. Afterwards, the handler changes the return address to
* skip the memory op so it never gets executed.
*/
#ifdef HARD_FLOAT
/* push f0-f31 onto stack */
sw f0, fr0*-4(sp) # push f0
sw f1, fr1*-4(sp) # push f1
sw f2, fr2*-4(sp) # push f2
sw f3, fr3*-4(sp) # push f3
sw f4, fr4*-4(sp) # push f4
sw f5, fr5*-4(sp) # push f5
sw f6, fr6*-4(sp) # push f6
sw f7, fr7*-4(sp) # push f7
sw f8, fr8*-4(sp) # push f8
sw f9, fr9*-4(sp) # push f9
sw f10, fr10*-4(sp) # push f10
sw f11, fr11*-4(sp) # push f11
sw f12, fr12*-4(sp) # push f12
sw f13, fr13*-4(sp) # push f13
sw f14, fr14*-4(sp) # push f14
sw f15, fr15*-4(sp) # push f15
sw f16, fr16*-4(sp) # push f16
sw f17, fr17*-4(sp) # push f17
sw f18, fr18*-4(sp) # push f18
sw f19, fr19*-4(sp) # push f19
sw f20, fr20*-4(sp) # push f20
sw f21, fr21*-4(sp) # push f21
sw f22, fr22*-4(sp) # push f22
sw f23, fr23*-4(sp) # push f23
sw f24, fr24*-4(sp) # push f24
sw f25, fr25*-4(sp) # push f25
sw f26, fr26*-4(sp) # push f26
sw f27, fr27*-4(sp) # push f27
sw f28, fr28*-4(sp) # push f28
sw f29, fr29*-4(sp) # push f29
sw f30, fr30*-4(sp) # push f30
sw f31, fr31*-4(sp) # push f31
sub sp, (32-0)*4 # adjust stack pointer
#endif
/* push gp registers (except zero, gp, sp, and fp) */
.set noat
sw AT, r_AT*-4(sp) # push at
.set at
sw v0, r_V0*-4(sp) # push v0
sw v1, r_V1*-4(sp) # push v1
sw a0, r_A0*-4(sp) # push a0
sw a1, r_A1*-4(sp) # push a1
sw a2, r_A2*-4(sp) # push a2
sw a3, r_A3*-4(sp) # push a3
sw t0, r_T0*-4(sp) # push t0
sw t1, r_T1*-4(sp) # push t1
sw t2, r_T2*-4(sp) # push t2
sw t3, r_T3*-4(sp) # push t3
sw t4, r_T4*-4(sp) # push t4
sw t5, r_T5*-4(sp) # push t5
sw t6, r_T6*-4(sp) # push t6
sw t7, r_T7*-4(sp) # push t7
sw s0, r_S0*-4(sp) # push s0
sw s1, r_S1*-4(sp) # push s1
sw s2, r_S2*-4(sp) # push s2
sw s3, r_S3*-4(sp) # push s3
sw s4, r_S4*-4(sp) # push s4
sw s5, r_S5*-4(sp) # push s5
sw s6, r_S6*-4(sp) # push s6
sw s7, r_S7*-4(sp) # push s7
sw t8, r_T8*-4(sp) # push t8
sw t9, r_T9*-4(sp) # push t9
sw k0, r_K0*-4(sp) # push k0
sw k1, r_K1*-4(sp) # push k1
sw ra, r_RA*-4(sp) # push RA
# Note: even if we don't save all 32 registers, we still need to
# adjust SP by 32 registers due to the way we are storing
# the registers on the stack.
sub sp, (32-0)*4 # adjust stack pointer
la a2, .LdvmSelfVerificationMemOpDecode # defined in footer.S
lw a2, (a2)
move a0, ra # a0<- link register
move a1, sp # a1<- stack pointer
JALR(a2)
/* pop gp registers (except zero, gp, sp, and fp) */
# Note: even if we don't save all 32 registers, we still need to
# adjust SP by 32 registers due to the way we are storing
# the registers on the stack.
add sp, (32-0)*4 # adjust stack pointer
.set noat
lw AT, r_AT*-4(sp) # pop at
.set at
lw v0, r_V0*-4(sp) # pop v0
lw v1, r_V1*-4(sp) # pop v1
lw a0, r_A0*-4(sp) # pop a0
lw a1, r_A1*-4(sp) # pop a1
lw a2, r_A2*-4(sp) # pop a2
lw a3, r_A3*-4(sp) # pop a3
lw t0, r_T0*-4(sp) # pop t0
lw t1, r_T1*-4(sp) # pop t1
lw t2, r_T2*-4(sp) # pop t2
lw t3, r_T3*-4(sp) # pop t3
lw t4, r_T4*-4(sp) # pop t4
lw t5, r_T5*-4(sp) # pop t5
lw t6, r_T6*-4(sp) # pop t6
lw t7, r_T7*-4(sp) # pop t7
lw s0, r_S0*-4(sp) # pop s0
lw s1, r_S1*-4(sp) # pop s1
lw s2, r_S2*-4(sp) # pop s2
lw s3, r_S3*-4(sp) # pop s3
lw s4, r_S4*-4(sp) # pop s4
lw s5, r_S5*-4(sp) # pop s5
lw s6, r_S6*-4(sp) # pop s6
lw s7, r_S7*-4(sp) # pop s7
lw t8, r_T8*-4(sp) # pop t8
lw t9, r_T9*-4(sp) # pop t9
lw k0, r_K0*-4(sp) # pop k0
lw k1, r_K1*-4(sp) # pop k1
lw ra, r_RA*-4(sp) # pop RA
#ifdef HARD_FLOAT
/* pop f0-f31 from stack */
add sp, (32-0)*4 # adjust stack pointer
lw f0, fr0*-4(sp) # pop f0
lw f1, fr1*-4(sp) # pop f1
lw f2, fr2*-4(sp) # pop f2
lw f3, fr3*-4(sp) # pop f3
lw f4, fr4*-4(sp) # pop f4
lw f5, fr5*-4(sp) # pop f5
lw f6, fr6*-4(sp) # pop f6
lw f7, fr7*-4(sp) # pop f7
lw f8, fr8*-4(sp) # pop f8
lw f9, fr9*-4(sp) # pop f9
lw f10, fr10*-4(sp) # pop f10
lw f11, fr11*-4(sp) # pop f11
lw f12, fr12*-4(sp) # pop f12
lw f13, fr13*-4(sp) # pop f13
lw f14, fr14*-4(sp) # pop f14
lw f15, fr15*-4(sp) # pop f15
lw f16, fr16*-4(sp) # pop f16
lw f17, fr17*-4(sp) # pop f17
lw f18, fr18*-4(sp) # pop f18
lw f19, fr19*-4(sp) # pop f19
lw f20, fr20*-4(sp) # pop f20
lw f21, fr21*-4(sp) # pop f21
lw f22, fr22*-4(sp) # pop f22
lw f23, fr23*-4(sp) # pop f23
lw f24, fr24*-4(sp) # pop f24
lw f25, fr25*-4(sp) # pop f25
lw f26, fr26*-4(sp) # pop f26
lw f27, fr27*-4(sp) # pop f27
lw f28, fr28*-4(sp) # pop f28
lw f29, fr29*-4(sp) # pop f29
lw f30, fr30*-4(sp) # pop f30
lw f31, fr31*-4(sp) # pop f31
#endif
RETURN
#endif
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_STRING_COMPARETO
dvmCompiler_TEMPLATE_STRING_COMPARETO:
/* File: mips/TEMPLATE_STRING_COMPARETO.S */
/*
* String's compareTo.
*
* Requires a0/a1 to have been previously checked for null. Will
* return negative if this's string is < comp, 0 if they are the
* same and positive if >.
*
* IMPORTANT NOTE:
*
* This code relies on hard-coded offsets for string objects, and must be
* kept in sync with definitions in UtfString.h. See asm-constants.h
*
* On entry:
* a0: this object pointer
* a1: comp object pointer
*
*/
subu v0, a0, a1 # Same?
bnez v0, 1f
RETURN
1:
lw t0, STRING_FIELDOFF_OFFSET(a0)
lw t1, STRING_FIELDOFF_OFFSET(a1)
lw t2, STRING_FIELDOFF_COUNT(a0)
lw a2, STRING_FIELDOFF_COUNT(a1)
lw a0, STRING_FIELDOFF_VALUE(a0)
lw a1, STRING_FIELDOFF_VALUE(a1)
/*
* At this point, we have this/comp:
* offset: t0/t1
* count: t2/a2
* value: a0/a1
* We're going to compute
* a3 <- countDiff
* a2 <- minCount
*/
subu a3, t2, a2 # a3<- countDiff
sleu t7, t2, a2
movn a2, t2, t7 # a2<- minCount
/*
* Note: data pointers point to first element.
*/
addu a0, 16 # point to contents[0]
addu a1, 16 # point to contents[0]
/* Now, build pointers to the string data */
sll t7, t0, 1 # multiply offset by 2
addu a0, a0, t7
sll t7, t1, 1 # multiply offset by 2
addu a1, a1, t7
/*
* At this point we have:
* a0: *this string data
* a1: *comp string data
* a2: iteration count for comparison
* a3: value to return if the first part of the string is equal
* v0: reserved for result
* t0-t5 available for loading string data
*/
subu a2, 2
bltz a2, do_remainder2
/*
* Unroll the first two checks so we can quickly catch early mismatch
* on long strings (but preserve incoming alignment)
*/
lhu t0, 0(a0)
lhu t1, 0(a1)
subu v0, t0, t1
beqz v0, 1f
RETURN
1:
lhu t2, 2(a0)
lhu t3, 2(a1)
subu v0, t2, t3
beqz v0, 2f
RETURN
2:
addu a0, 4 # offset to contents[2]
addu a1, 4 # offset to contents[2]
li t7, 28
bgt a2, t7, do_memcmp16
subu a2, 3
bltz a2, do_remainder
loopback_triple:
lhu t0, 0(a0)
lhu t1, 0(a1)
subu v0, t0, t1
beqz v0, 1f
RETURN
1:
lhu t2, 2(a0)
lhu t3, 2(a1)
subu v0, t2, t3
beqz v0, 2f
RETURN
2:
lhu t4, 4(a0)
lhu t5, 4(a1)
subu v0, t4, t5
beqz v0, 3f
RETURN
3:
addu a0, 6 # offset to contents[i+3]
addu a1, 6 # offset to contents[i+3]
subu a2, 3
bgez a2, loopback_triple
do_remainder:
addu a2, 3
beqz a2, returnDiff
loopback_single:
lhu t0, 0(a0)
lhu t1, 0(a1)
subu v0, t0, t1
bnez v0, 1f
addu a0, 2 # offset to contents[i+1]
addu a1, 2 # offset to contents[i+1]
subu a2, 1
bnez a2, loopback_single
returnDiff:
move v0, a3
1:
RETURN
do_remainder2:
addu a2, 2
bnez a2, loopback_single
move v0, a3
RETURN
/* Long string case */
do_memcmp16:
move rOBJ, a3 # save return value if strings are equal
JAL(__memcmp16)
seq t0, v0, zero
movn v0, rOBJ, t0 # overwrite return value if strings are equal
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_STRING_INDEXOF
dvmCompiler_TEMPLATE_STRING_INDEXOF:
/* File: mips/TEMPLATE_STRING_INDEXOF.S */
/*
* String's indexOf.
*
* Requires a0 to have been previously checked for null. Will
* return index of match of a1 in v0.
*
* IMPORTANT NOTE:
*
* This code relies on hard-coded offsets for string objects, and must be
* kept in sync wth definitions in UtfString.h See asm-constants.h
*
* On entry:
* a0: string object pointer
* a1: char to match
* a2: Starting offset in string data
*/
lw t0, STRING_FIELDOFF_OFFSET(a0)
lw t1, STRING_FIELDOFF_COUNT(a0)
lw v0, STRING_FIELDOFF_VALUE(a0)
/*
* At this point, we have:
* v0: object pointer
* a1: char to match
* a2: starting offset
* t0: offset
* t1: string length
*/
/* Point to first element */
addu v0, 16 # point to contents[0]
/* Build pointer to start of string data */
sll t7, t0, 1 # multiply offset by 2
addu v0, v0, t7
/* Save a copy of starting data in v1 */
move v1, v0
/* Clamp start to [0..count] */
slt t7, a2, zero
movn a2, zero, t7
sgt t7, a2, t1
movn a2, t1, t7
/* Build pointer to start of data to compare */
sll t7, a2, 1 # multiply offset by 2
addu v0, v0, t7
/* Compute iteration count */
subu a3, t1, a2
/*
* At this point we have:
* v0: start of data to test
* a1: char to compare
* a3: iteration count
* v1: original start of string
* t0-t7 available for loading string data
*/
subu a3, 4
bltz a3, indexof_remainder
indexof_loop4:
lhu t0, 0(v0)
beq t0, a1, match_0
lhu t0, 2(v0)
beq t0, a1, match_1
lhu t0, 4(v0)
beq t0, a1, match_2
lhu t0, 6(v0)
beq t0, a1, match_3
addu v0, 8 # offset to contents[i+4]
subu a3, 4
bgez a3, indexof_loop4
indexof_remainder:
addu a3, 4
beqz a3, indexof_nomatch
indexof_loop1:
lhu t0, 0(v0)
beq t0, a1, match_0
addu v0, 2 # offset to contents[i+1]
subu a3, 1
bnez a3, indexof_loop1
indexof_nomatch:
li v0, -1
RETURN
match_0:
subu v0, v1
sra v0, v0, 1 # divide by 2
RETURN
match_1:
addu v0, 2
subu v0, v1
sra v0, v0, 1 # divide by 2
RETURN
match_2:
addu v0, 4
subu v0, v1
sra v0, v0, 1 # divide by 2
RETURN
match_3:
addu v0, 6
subu v0, v1
sra v0, v0, 1 # divide by 2
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_INTERPRET
dvmCompiler_TEMPLATE_INTERPRET:
/* File: mips/TEMPLATE_INTERPRET.S */
/*
* This handler transfers control to the interpeter without performing
* any lookups. It may be called either as part of a normal chaining
* operation, or from the transition code in header.S. We distinquish
* the two cases by looking at the link register. If called from a
* translation chain, it will point to the chaining Dalvik PC.
* On entry:
* ra - if NULL:
* a1 - the Dalvik PC to begin interpretation.
* else
* [ra] contains Dalvik PC to begin interpretation
* rSELF - pointer to thread
* rFP - Dalvik frame pointer
*/
la t0, dvmJitToInterpPunt
move a0, a1
beq ra, zero, 1f
lw a0, 0(ra)
1:
jr t0
# doesn't return
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_MONITOR_ENTER
dvmCompiler_TEMPLATE_MONITOR_ENTER:
/* File: mips/TEMPLATE_MONITOR_ENTER.S */
/*
* Call out to the runtime to lock an object. Because this thread
* may have been suspended in THREAD_MONITOR state and the Jit's
* translation cache subsequently cleared, we cannot return directly.
* Instead, unconditionally transition to the interpreter to resume.
*
* On entry:
* a0 - self pointer
* a1 - the object (which has already been null-checked by the caller
* rPC - the Dalvik PC of the following instruction.
*/
la a2, .LdvmLockObject
lw t9, (a2)
sw zero, offThread_inJitCodeCache(a0) # record that we're not returning
JALR(t9) # dvmLockObject(self, obj)
lw gp, STACK_OFFSET_GP(sp)
la a2, .LdvmJitToInterpNoChain
lw a2, (a2)
# Bail to interpreter - no chain [note - rPC still contains dPC]
#if defined(WITH_JIT_TUNING)
li a0, kHeavyweightMonitor
#endif
jr a2
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG
dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
/* File: mips/TEMPLATE_MONITOR_ENTER_DEBUG.S */
/*
* To support deadlock prediction, this version of MONITOR_ENTER
* will always call the heavyweight dvmLockObject, check for an
* exception and then bail out to the interpreter.
*
* On entry:
* a0 - self pointer
* a1 - the object (which has already been null-checked by the caller
* rPC - the Dalvik PC of the following instruction.
*
*/
la a2, .LdvmLockObject
lw t9, (a2)
sw zero, offThread_inJitCodeCache(a0) # record that we're not returning
JALR(t9) # dvmLockObject(self, obj)
lw gp, STACK_OFFSET_GP(sp)
# test for exception
lw a1, offThread_exception(rSELF)
beqz a1, 1f
sub a0, rPC, 2 # roll dPC back to this monitor instruction
j .LhandleException
1:
# Bail to interpreter - no chain [note - rPC still contains dPC]
#if defined(WITH_JIT_TUNING)
li a0, kHeavyweightMonitor
#endif
la a2, .LdvmJitToInterpNoChain
lw a2, (a2)
jr a2
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_RESTORE_STATE
dvmCompiler_TEMPLATE_RESTORE_STATE:
/* File: mips/TEMPLATE_RESTORE_STATE.S */
/*
* This handler restores state following a selfVerification memory access.
* On entry:
* a0 - offset from rSELF to the 1st element of the coreRegs save array.
* Note: the following registers are not restored
* zero, AT, gp, sp, fp, ra
*/
add a0, a0, rSELF # pointer to heapArgSpace.coreRegs[0]
#if 0
lw zero, r_ZERO*4(a0) # restore zero
#endif
.set noat
lw AT, r_AT*4(a0) # restore at
.set at
lw v0, r_V0*4(a0) # restore v0
lw v1, r_V1*4(a0) # restore v1
lw a1, r_A1*4(a0) # restore a1
lw a2, r_A2*4(a0) # restore a2
lw a3, r_A3*4(a0) # restore a3
lw t0, r_T0*4(a0) # restore t0
lw t1, r_T1*4(a0) # restore t1
lw t2, r_T2*4(a0) # restore t2
lw t3, r_T3*4(a0) # restore t3
lw t4, r_T4*4(a0) # restore t4
lw t5, r_T5*4(a0) # restore t5
lw t6, r_T6*4(a0) # restore t6
lw t7, r_T7*4(a0) # restore t7
lw s0, r_S0*4(a0) # restore s0
lw s1, r_S1*4(a0) # restore s1
lw s2, r_S2*4(a0) # restore s2
lw s3, r_S3*4(a0) # restore s3
lw s4, r_S4*4(a0) # restore s4
lw s5, r_S5*4(a0) # restore s5
lw s6, r_S6*4(a0) # restore s6
lw s7, r_S7*4(a0) # restore s7
lw t8, r_T8*4(a0) # restore t8
lw t9, r_T9*4(a0) # restore t9
lw k0, r_K0*4(a0) # restore k0
lw k1, r_K1*4(a0) # restore k1
#if 0
lw gp, r_GP*4(a0) # restore gp
lw sp, r_SP*4(a0) # restore sp
lw fp, r_FP*4(a0) # restore fp
lw ra, r_RA*4(a0) # restore ra
#endif
/* #ifdef HARD_FLOAT */
#if 0
lw f0, fr0*4(a0) # restore f0
lw f1, fr1*4(a0) # restore f1
lw f2, fr2*4(a0) # restore f2
lw f3, fr3*4(a0) # restore f3
lw f4, fr4*4(a0) # restore f4
lw f5, fr5*4(a0) # restore f5
lw f6, fr6*4(a0) # restore f6
lw f7, fr7*4(a0) # restore f7
lw f8, fr8*4(a0) # restore f8
lw f9, fr9*4(a0) # restore f9
lw f10, fr10*4(a0) # restore f10
lw f11, fr11*4(a0) # restore f11
lw f12, fr12*4(a0) # restore f12
lw f13, fr13*4(a0) # restore f13
lw f14, fr14*4(a0) # restore f14
lw f15, fr15*4(a0) # restore f15
lw f16, fr16*4(a0) # restore f16
lw f17, fr17*4(a0) # restore f17
lw f18, fr18*4(a0) # restore f18
lw f19, fr19*4(a0) # restore f19
lw f20, fr20*4(a0) # restore f20
lw f21, fr21*4(a0) # restore f21
lw f22, fr22*4(a0) # restore f22
lw f23, fr23*4(a0) # restore f23
lw f24, fr24*4(a0) # restore f24
lw f25, fr25*4(a0) # restore f25
lw f26, fr26*4(a0) # restore f26
lw f27, fr27*4(a0) # restore f27
lw f28, fr28*4(a0) # restore f28
lw f29, fr29*4(a0) # restore f29
lw f30, fr30*4(a0) # restore f30
lw f31, fr31*4(a0) # restore f31
#endif
lw a0, r_A1*4(a0) # restore a0
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_SAVE_STATE
dvmCompiler_TEMPLATE_SAVE_STATE:
/* File: mips/TEMPLATE_SAVE_STATE.S */
/*
* This handler performs a register save for selfVerification mode.
* On entry:
* Top of stack + 4: a1 value to save
* Top of stack + 0: a0 value to save
* a0 - offset from rSELF to the beginning of the heapArgSpace record
* a1 - the value of regMap
*
* The handler must save regMap, r0-r31, f0-f31 if FPU, and then return with
* r0-r31 with their original values (note that this means a0 and a1 must take
* the values on the stack - not the ones in those registers on entry.
* Finally, the two registers previously pushed must be popped.
* Note: the following registers are not saved
* zero, AT, gp, sp, fp, ra
*/
add a0, a0, rSELF # pointer to heapArgSpace
sw a1, 0(a0) # save regMap
add a0, a0, 4 # pointer to coreRegs
#if 0
sw zero, r_ZERO*4(a0) # save zero
#endif
.set noat
sw AT, r_AT*4(a0) # save at
.set at
sw v0, r_V0*4(a0) # save v0
sw v1, r_V1*4(a0) # save v1
lw a1, 0(sp) # recover a0 value
sw a1, r_A0*4(a0) # save a0
lw a1, 4(sp) # recover a1 value
sw a1, r_A1*4(a0) # save a1
sw a2, r_A2*4(a0) # save a2
sw a3, r_A3*4(a0) # save a3
sw t0, r_T0*4(a0) # save t0
sw t1, r_T1*4(a0) # save t1
sw t2, r_T2*4(a0) # save t2
sw t3, r_T3*4(a0) # save t3
sw t4, r_T4*4(a0) # save t4
sw t5, r_T5*4(a0) # save t5
sw t6, r_T6*4(a0) # save t6
sw t7, r_T7*4(a0) # save t7
sw s0, r_S0*4(a0) # save s0
sw s1, r_S1*4(a0) # save s1
sw s2, r_S2*4(a0) # save s2
sw s3, r_S3*4(a0) # save s3
sw s4, r_S4*4(a0) # save s4
sw s5, r_S5*4(a0) # save s5
sw s6, r_S6*4(a0) # save s6
sw s7, r_S7*4(a0) # save s7
sw t8, r_T8*4(a0) # save t8
sw t9, r_T9*4(a0) # save t9
sw k0, r_K0*4(a0) # save k0
sw k1, r_K1*4(a0) # save k1
#if 0
sw gp, r_GP*4(a0) # save gp
sw sp, r_SP*4(a0) # save sp (need to adjust??? )
sw fp, r_FP*4(a0) # save fp
sw ra, r_RA*4(a0) # save ra
#endif
/* #ifdef HARD_FLOAT */
#if 0
sw f0, fr0*4(a0) # save f0
sw f1, fr1*4(a0) # save f1
sw f2, fr2*4(a0) # save f2
sw f3, fr3*4(a0) # save f3
sw f4, fr4*4(a0) # save f4
sw f5, fr5*4(a0) # save f5
sw f6, fr6*4(a0) # save f6
sw f7, fr7*4(a0) # save f7
sw f8, fr8*4(a0) # save f8
sw f9, fr9*4(a0) # save f9
sw f10, fr10*4(a0) # save f10
sw f11, fr11*4(a0) # save f11
sw f12, fr12*4(a0) # save f12
sw f13, fr13*4(a0) # save f13
sw f14, fr14*4(a0) # save f14
sw f15, fr15*4(a0) # save f15
sw f16, fr16*4(a0) # save f16
sw f17, fr17*4(a0) # save f17
sw f18, fr18*4(a0) # save f18
sw f19, fr19*4(a0) # save f19
sw f20, fr20*4(a0) # save f20
sw f21, fr21*4(a0) # save f21
sw f22, fr22*4(a0) # save f22
sw f23, fr23*4(a0) # save f23
sw f24, fr24*4(a0) # save f24
sw f25, fr25*4(a0) # save f25
sw f26, fr26*4(a0) # save f26
sw f27, fr27*4(a0) # save f27
sw f28, fr28*4(a0) # save f28
sw f29, fr29*4(a0) # save f29
sw f30, fr30*4(a0) # save f30
sw f31, fr31*4(a0) # save f31
#endif
lw a1, 0(sp) # recover a0 value
lw a1, 4(sp) # recover a1 value
sub sp, sp, 8 # adjust stack ptr
RETURN
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_PERIODIC_PROFILING
dvmCompiler_TEMPLATE_PERIODIC_PROFILING:
/* File: mips/TEMPLATE_PERIODIC_PROFILING.S */
/*
* Increment profile counter for this trace, and decrement
* sample counter. If sample counter goes below zero, turn
* off profiling.
*
* On entry
* (ra-16) is address of pointer to counter. Note: the counter
* actually exists 16 bytes before the return target for mips.
* - 4 bytes for prof count addr.
* - 4 bytes for chain cell offset (2bytes 32 bit aligned).
* - 4 bytes for call TEMPLATE_PERIODIC_PROFILING.
* - 4 bytes for call delay slot.
*/
lw a0, -16(ra)
lw a1, offThread_pProfileCountdown(rSELF)
lw a2, 0(a0) # get counter
lw a3, 0(a1) # get countdown timer
addu a2, 1
sub a3, 1 # FIXME - bug in ARM code???
bltz a3, .LTEMPLATE_PERIODIC_PROFILING_disable_profiling
sw a2, 0(a0)
sw a3, 0(a1)
RETURN
.LTEMPLATE_PERIODIC_PROFILING_disable_profiling:
move rTEMP, ra # preserve ra
la a0, dvmJitTraceProfilingOff
JALR(a0)
jr rTEMP
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_RETURN_PROF
dvmCompiler_TEMPLATE_RETURN_PROF:
/* File: mips/TEMPLATE_RETURN_PROF.S */
#define TEMPLATE_INLINE_PROFILING
/* File: mips/TEMPLATE_RETURN.S */
/*
* Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
* If the stored value in returnAddr
* is non-zero, the caller is compiled by the JIT thus return to the
* address in the code cache following the invoke instruction. Otherwise
* return to the special dvmJitToInterpNoChain entry point.
*/
#if defined(TEMPLATE_INLINE_PROFILING)
# preserve a0-a2 and ra
SCRATCH_STORE(a0, 0)
SCRATCH_STORE(a1, 4)
SCRATCH_STORE(a2, 8)
SCRATCH_STORE(ra, 12)
# a0=rSELF
move a0, rSELF
la t9, dvmFastMethodTraceExit
JALR(t9)
lw gp, STACK_OFFSET_GP(sp)
# restore a0-a2 and ra
SCRATCH_LOAD(ra, 12)
SCRATCH_LOAD(a2, 8)
SCRATCH_LOAD(a1, 4)
SCRATCH_LOAD(a0, 0)
#endif
SAVEAREA_FROM_FP(a0, rFP) # a0<- saveArea (old)
lw t0, offStackSaveArea_prevFrame(a0) # t0<- saveArea->prevFrame
lbu t1, offThread_breakFlags(rSELF) # t1<- breakFlags
lw rPC, offStackSaveArea_savedPc(a0) # rPC<- saveArea->savedPc
#if !defined(WITH_SELF_VERIFICATION)
lw t2, offStackSaveArea_returnAddr(a0) # t2<- chaining cell ret
#else
move t2, zero # disable chaining
#endif
lw a2, offStackSaveArea_method - sizeofStackSaveArea(t0)
# a2<- method we're returning to
#if !defined(WITH_SELF_VERIFICATION)
beq a2, zero, 1f # bail to interpreter
#else
bne a2, zero, 2f
JALR(ra) # punt to interpreter and compare state
# DOUG: assume this does not return ???
2:
#endif
la t4, .LdvmJitToInterpNoChainNoProfile # defined in footer.S
lw a1, (t4)
move rFP, t0 # publish new FP
beq a2, zero, 4f
lw t0, offMethod_clazz(a2) # t0<- method->clazz
4:
sw a2, offThread_method(rSELF) # self->method = newSave->method
lw a0, offClassObject_pDvmDex(t0) # a0<- method->clazz->pDvmDex
sw rFP, offThread_curFrame(rSELF) # self->curFrame = fp
add rPC, rPC, 3*2 # publish new rPC
sw a0, offThread_methodClassDex(rSELF)
movn t2, zero, t1 # check the breadFlags and
# clear the chaining cell address
sw t2, offThread_inJitCodeCache(rSELF) # in code cache or not
beq t2, zero, 3f # chaining cell exists?
JALR(t2) # jump to the chaining cell
# DOUG: assume this does not return ???
3:
#if defined(WITH_JIT_TUNING)
li a0, kCallsiteInterpreted
#endif
j a1 # callsite is interpreted
1:
sw zero, offThread_inJitCodeCache(rSELF) # reset inJitCodeCache
SAVE_PC_TO_SELF() # SAVE_PC_FP_TO_SELF()
SAVE_FP_TO_SELF()
la t4, .LdvmMterpStdBail # defined in footer.S
lw a2, (t4)
move a0, rSELF # Expecting rSELF in a0
JALR(a2) # exit the interpreter
# DOUG: assume this does not return ???
#undef TEMPLATE_INLINE_PROFILING
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT_PROF
dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT_PROF:
/* File: mips/TEMPLATE_INVOKE_METHOD_NO_OPT_PROF.S */
#define TEMPLATE_INLINE_PROFILING
/* File: mips/TEMPLATE_INVOKE_METHOD_NO_OPT.S */
/*
* For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
* into rPC then jump to dvmJitToInterpNoChain to dispatch the
* runtime-resolved callee.
*/
# a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
lh t7, offMethod_registersSize(a0) # t7<- methodToCall->regsSize
lh a2, offMethod_outsSize(a0) # a2<- methodToCall->outsSize
lw t9, offThread_interpStackEnd(rSELF) # t9<- interpStackEnd
lbu t8, offThread_breakFlags(rSELF) # t8<- breakFlags
move a3, a1 # a3<- returnCell
SAVEAREA_FROM_FP(a1, rFP) # a1<- stack save area
sll t6, t7, 2 # multiply regsSize by 4 (4 bytes per reg)
sub a1, a1, t6 # a1<- newFp(old savearea-regsSize)
SAVEAREA_FROM_FP(t0, a1) # t0<- stack save area
sll t6, a2, 2 # multiply outsSize by 4 (4 bytes per reg)
sub t0, t0, t6 # t0<- bottom (newsave-outsSize)
bgeu t0, t9, 1f # bottom < interpStackEnd?
RETURN # return to raise stack overflow excep.
1:
# a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
lw t9, offMethod_clazz(a0) # t9<- methodToCall->clazz
lw t0, offMethod_accessFlags(a0) # t0<- methodToCall->accessFlags
sw rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
sw rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
lw rPC, offMethod_insns(a0) # rPC<- methodToCall->insns
# set up newSaveArea
sw rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
sw a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
sw a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
beqz t8, 2f # breakFlags != 0
RETURN # bail to the interpreter
2:
and t6, t0, ACC_NATIVE
beqz t6, 3f
#if !defined(WITH_SELF_VERIFICATION)
j .LinvokeNative
#else
RETURN # bail to the interpreter
#endif
3:
# continue executing the next instruction through the interpreter
la t0, .LdvmJitToInterpTraceSelectNoChain # defined in footer.S
lw rTEMP, (t0)
lw a3, offClassObject_pDvmDex(t9) # a3<- method->clazz->pDvmDex
# Update "thread" values for the new method
sw a0, offThread_method(rSELF) # self->method = methodToCall
sw a3, offThread_methodClassDex(rSELF) # self->methodClassDex = ...
move rFP, a1 # fp = newFp
sw rFP, offThread_curFrame(rSELF) # self->curFrame = newFp
#if defined(TEMPLATE_INLINE_PROFILING)
# preserve a0-a3
SCRATCH_STORE(a0, 0)
SCRATCH_STORE(a1, 4)
SCRATCH_STORE(a2, 8)
SCRATCH_STORE(a3, 12)
# a0=methodToCall, a1=rSELF
move a1, rSELF
la t9, dvmFastMethodTraceEnter
JALR(t9)
lw gp, STACK_OFFSET_GP(sp)
# restore a0-a3
SCRATCH_LOAD(a3, 12)
SCRATCH_LOAD(a2, 8)
SCRATCH_LOAD(a1, 4)
SCRATCH_LOAD(a0, 0)
#endif
# Start executing the callee
#if defined(WITH_JIT_TUNING)
li a0, kInlineCacheMiss
#endif
jr rTEMP # dvmJitToInterpTraceSelectNoChain
#undef TEMPLATE_INLINE_PROFILING
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN_PROF
dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN_PROF:
/* File: mips/TEMPLATE_INVOKE_METHOD_CHAIN_PROF.S */
#define TEMPLATE_INLINE_PROFILING
/* File: mips/TEMPLATE_INVOKE_METHOD_CHAIN.S */
/*
* For monomorphic callsite, setup the Dalvik frame and return to the
* Thumb code through the link register to transfer control to the callee
* method through a dedicated chaining cell.
*/
# a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
# methodToCall is guaranteed to be non-native
.LinvokeChainProf:
lh t7, offMethod_registersSize(a0) # t7<- methodToCall->regsSize
lh a2, offMethod_outsSize(a0) # a2<- methodToCall->outsSize
lw t9, offThread_interpStackEnd(rSELF) # t9<- interpStackEnd
lbu t8, offThread_breakFlags(rSELF) # t8<- breakFlags
move a3, a1 # a3<- returnCell
SAVEAREA_FROM_FP(a1, rFP) # a1<- stack save area
sll t6, t7, 2 # multiply regsSize by 4 (4 bytes per reg)
sub a1, a1, t6 # a1<- newFp(old savearea-regsSize)
SAVEAREA_FROM_FP(t0, a1) # t0<- stack save area
add t2, ra, 8 # setup the punt-to-interp address
# 8 bytes skips branch and delay slot
sll t6, a2, 2 # multiply outsSize by 4 (4 bytes per reg)
sub t0, t0, t6 # t0<- bottom (newsave-outsSize)
bgeu t0, t9, 1f # bottom < interpStackEnd?
jr t2 # return to raise stack overflow excep.
1:
# a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
lw t9, offMethod_clazz(a0) # t9<- methodToCall->clazz
sw rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
sw rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
lw rPC, offMethod_insns(a0) # rPC<- methodToCall->insns
# set up newSaveArea
sw rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
sw a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
sw a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
beqz t8, 2f # breakFlags != 0
jr t2 # bail to the interpreter
2:
lw a3, offClassObject_pDvmDex(t9) # a3<- methodToCall->clazz->pDvmDex
# Update "thread" values for the new method
sw a0, offThread_method(rSELF) # self->method = methodToCall
sw a3, offThread_methodClassDex(rSELF) # self->methodClassDex = ...
move rFP, a1 # fp = newFp
sw rFP, offThread_curFrame(rSELF) # self->curFrame = newFp
#if defined(TEMPLATE_INLINE_PROFILING)
# preserve a0-a2 and ra
SCRATCH_STORE(a0, 0)
SCRATCH_STORE(a1, 4)
SCRATCH_STORE(a2, 8)
SCRATCH_STORE(ra, 12)
move a1, rSELF
# a0=methodToCall, a1=rSELF
la t9, dvmFastMethodTraceEnter
jalr t9
lw gp, STACK_OFFSET_GP(sp)
# restore a0-a2 and ra
SCRATCH_LOAD(ra, 12)
SCRATCH_LOAD(a2, 8)
SCRATCH_LOAD(a1, 4)
SCRATCH_LOAD(a0, 0)
#endif
RETURN # return to the callee-chaining cell
#undef TEMPLATE_INLINE_PROFILING
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF
dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF:
/* File: mips/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF.S */
#define TEMPLATE_INLINE_PROFILING
/* File: mips/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */
/*
* For polymorphic callsite, check whether the cached class pointer matches
* the current one. If so setup the Dalvik frame and return to the
* Thumb code through the link register to transfer control to the callee
* method through a dedicated chaining cell.
*
* The predicted chaining cell is declared in ArmLIR.h with the
* following layout:
*
* typedef struct PredictedChainingCell {
* u4 branch;
* u4 delay_slot;
* const ClassObject *clazz;
* const Method *method;
* u4 counter;
* } PredictedChainingCell;
*
* Upon returning to the callsite:
* - lr : to branch to the chaining cell
* - lr+8 : to punt to the interpreter
* - lr+16: to fully resolve the callee and may rechain.
* a3 <- class
*/
# a0 = this, a1 = returnCell, a2 = predictedChainCell, rPC = dalvikCallsite
lw a3, offObject_clazz(a0) # a3 <- this->class
lw rIBASE, 8(a2) # t0 <- predictedChainCell->clazz
lw a0, 12(a2) # a0 <- predictedChainCell->method
lw t1, offThread_icRechainCount(rSELF) # t1 <- shared rechainCount
#if defined(WITH_JIT_TUNING)
la rINST, .LdvmICHitCount
#add t2, t2, 1
bne a3, rIBASE, 1f
nop
lw t2, 0(rINST)
add t2, t2, 1
sw t2, 0(rINST)
1:
#add t2, t2, 1
#endif
beq a3, rIBASE, .LinvokeChainProf # branch if predicted chain is valid
lw rINST, offClassObject_vtable(a3) # rINST <- this->class->vtable
beqz rIBASE, 2f # initialized class or not
sub a1, t1, 1 # count--
sw a1, offThread_icRechainCount(rSELF) # write back to InterpState
b 3f
2:
move a1, zero
3:
add ra, ra, 16 # return to fully-resolve landing pad
/*
* a1 <- count
* a2 <- &predictedChainCell
* a3 <- this->class
* rPC <- dPC
* rINST <- this->class->vtable
*/
RETURN
#undef TEMPLATE_INLINE_PROFILING
/* ------------------------------ */
.balign 4
.global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE_PROF
dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE_PROF:
/* File: mips/TEMPLATE_INVOKE_METHOD_NATIVE_PROF.S */
#define TEMPLATE_INLINE_PROFILING
/* File: mips/TEMPLATE_INVOKE_METHOD_NATIVE.S */
# a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
lh t7, offMethod_registersSize(a0) # t7<- methodToCall->regsSize
lw t9, offThread_interpStackEnd(rSELF) # t9<- interpStackEnd
lbu t8, offThread_breakFlags(rSELF) # t8<- breakFlags
move a3, a1 # a3<- returnCell
SAVEAREA_FROM_FP(a1, rFP) # a1<- stack save area
sll t6, t7, 2 # multiply regsSize by 4 (4 bytes per reg)
sub a1, a1, t6 # a1<- newFp(old savearea-regsSize)
SAVEAREA_FROM_FP(t0, a1) # t0<- stack save area
bgeu t0, t9, 1f # bottom < interpStackEnd?
RETURN # return to raise stack overflow excep.
1:
# a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
sw rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
sw rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
lw rPC, offMethod_insns(a0) # rPC<- methodToCall->insns
# set up newSaveArea
sw rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
sw a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
sw a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
lw rTEMP, offMethod_nativeFunc(a0) # t9<- method->nativeFunc
#if !defined(WITH_SELF_VERIFICATION)
beqz t8, 2f # breakFlags != 0
RETURN # bail to the interpreter
2:
#else
RETURN # bail to the interpreter unconditionally
#endif
# go ahead and transfer control to the native code
lw t6, offThread_jniLocal_topCookie(rSELF) # t6<- thread->localRef->...
sw a1, offThread_curFrame(rSELF) # self->curFrame = newFp
sw zero, offThread_inJitCodeCache(rSELF) # not in the jit code cache
sw t6, (offStackSaveArea_localRefCookie - sizeofStackSaveArea)(a1)
# newFp->localRefCookie=top
SAVEAREA_FROM_FP(rBIX, a1) # rBIX<- new stack save area
move a2, a0 # a2<- methodToCall
move a0, a1 # a0<- newFp
add a1, rSELF, offThread_retval # a1<- &retval
move a3, rSELF # a3<- self
#if defined(TEMPLATE_INLINE_PROFILING)
# a2: methodToCall
# preserve a0-a3
SCRATCH_STORE(a0, 0)
SCRATCH_STORE(a1, 4)
SCRATCH_STORE(a2, 8)
SCRATCH_STORE(a3, 12)
move a0, a2
move a1, rSELF
# a0=JNIMethod, a1=rSELF
la t9, dvmFastMethodTraceEnter
JALR(t9) # off to the native code
lw gp, STACK_OFFSET_GP(sp)
# restore a0-a3
SCRATCH_LOAD(a3, 12)
SCRATCH_LOAD(a2, 8)
SCRATCH_LOAD(a1, 4)
SCRATCH_LOAD(a0, 0)
move rOBJ, a2 # save a2
#endif
JALR(rTEMP) # off to the native code
lw gp, STACK_OFFSET_GP(sp)
#if defined(TEMPLATE_INLINE_PROFILING)
move a0, rOBJ
move a1, rSELF
# a0=JNIMethod, a1=rSELF
la t9, dvmFastNativeMethodTraceExit
JALR(t9)
lw gp, STACK_OFFSET_GP(sp)
#endif
# native return; rBIX=newSaveArea
# equivalent to dvmPopJniLocals
lw a2, offStackSaveArea_returnAddr(rBIX) # a2 = chaining cell ret addr
lw a0, offStackSaveArea_localRefCookie(rBIX) # a0<- saved->top
lw a1, offThread_exception(rSELF) # check for exception
sw rFP, offThread_curFrame(rSELF) # self->curFrame = fp
sw a0, offThread_jniLocal_topCookie(rSELF) # new top <- old top
lw a0, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
# a0 = dalvikCallsitePC
bnez a1, .LhandleException # handle exception if any
sw a2, offThread_inJitCodeCache(rSELF) # set the mode properly
beqz a2, 3f
jr a2 # go if return chaining cell still exist
3:
# continue executing the next instruction through the interpreter
la a1, .LdvmJitToInterpTraceSelectNoChain # defined in footer.S
lw a1, (a1)
add rPC, a0, 3*2 # reconstruct new rPC (advance 3 dalvik instr)
#if defined(WITH_JIT_TUNING)
li a0, kCallsiteInterpreted
#endif
jr a1
#undef TEMPLATE_INLINE_PROFILING
.size dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
/* File: mips/footer.S */
/*
* ===========================================================================
* Common subroutines and data
* ===========================================================================
*/
.section .data.rel.ro
.align 4
.LinvokeNative:
# Prep for the native call
# a1 = newFP, a0 = methodToCall
lw t9, offThread_jniLocal_topCookie(rSELF) # t9<- thread->localRef->...
sw zero, offThread_inJitCodeCache(rSELF) # not in jit code cache
sw a1, offThread_curFrame(rSELF) # self->curFrame = newFp
sw t9, (offStackSaveArea_localRefCookie - sizeofStackSaveArea)(a1)
# newFp->localRefCookie=top
lhu ra, offThread_subMode(rSELF)
SAVEAREA_FROM_FP(rBIX, a1) # rBIX<- new stack save area
move a2, a0 # a2<- methodToCall
move a0, a1 # a0<- newFp
add a1, rSELF, offThread_retval # a1<- &retval
move a3, rSELF # a3<- self
andi ra, kSubModeMethodTrace
beqz ra, 121f
# a2: methodToCall
# preserve a0-a3
SCRATCH_STORE(a0, 0)
SCRATCH_STORE(a1, 4)
SCRATCH_STORE(a2, 8)
SCRATCH_STORE(a3, 12)
move rTEMP, a2 # preserve a2
move a0, rTEMP
move a1, rSELF
la t9, dvmFastMethodTraceEnter
JALR(t9)
lw gp, STACK_OFFSET_GP(sp)
# restore a0-a3
SCRATCH_LOAD(a3, 12)
SCRATCH_LOAD(a2, 8)
SCRATCH_LOAD(a1, 4)
SCRATCH_LOAD(a0, 0)
lw t9, offMethod_nativeFunc(a2)
JALR(t9) # call methodToCall->nativeFunc
lw gp, STACK_OFFSET_GP(sp)
move a0, rTEMP
move a1, rSELF
la t9, dvmFastNativeMethodTraceExit
JALR(t9)
lw gp, STACK_OFFSET_GP(sp)
b 212f
121:
lw t9, offMethod_nativeFunc(a2)
JALR(t9) # call methodToCall->nativeFunc
lw gp, STACK_OFFSET_GP(sp)
212:
# native return; rBIX=newSaveArea
# equivalent to dvmPopJniLocals
lw a2, offStackSaveArea_returnAddr(rBIX) # a2 = chaining cell ret addr
lw a0, offStackSaveArea_localRefCookie(rBIX) # a0<- saved->top
lw a1, offThread_exception(rSELF) # check for exception
sw rFP, offThread_curFrame(rSELF) # self->curFrame = fp
sw a0, offThread_jniLocal_topCookie(rSELF) # new top <- old top
lw a0, offStackSaveArea_savedPc(rBIX) # reload rPC
# a0 = dalvikCallsitePC
bnez a1, .LhandleException # handle exception if any
sw a2, offThread_inJitCodeCache(rSELF) # set the mode properly
beqz a2, 3f
jr a2 # go if return chaining cell still exist
3:
# continue executing the next instruction through the interpreter
la a1, .LdvmJitToInterpTraceSelectNoChain # defined in footer.S
lw a1, (a1)
add rPC, a0, 3*2 # reconstruct new rPC
#if defined(WITH_JIT_TUNING)
li a0, kCallsiteInterpreted
#endif
jr a1
/*
* On entry:
* a0 Faulting Dalvik PC
*/
.LhandleException:
#if defined(WITH_SELF_VERIFICATION)
la t0, .LdeadFood
lw t0, (t0) # should not see this under self-verification mode
jr t0
.LdeadFood:
.word 0xdeadf00d
#endif
sw zero, offThread_inJitCodeCache(rSELF) # in interpreter land
la a1, .LdvmMterpCommonExceptionThrown # PIC way of getting &func
lw a1, (a1)
la rIBASE, .LdvmAsmInstructionStart # PIC way of getting &func
lw rIBASE, (rIBASE)
move rPC, a0 # reload the faulting Dalvid address
jr a1 # branch to dvmMterpCommonExeceptionThrown
.align 4
.LdvmAsmInstructionStart:
.word dvmAsmInstructionStart
.LdvmJitToInterpNoChainNoProfile:
.word dvmJitToInterpNoChainNoProfile
.LdvmJitToInterpTraceSelectNoChain:
.word dvmJitToInterpTraceSelectNoChain
.LdvmJitToInterpNoChain:
.word dvmJitToInterpNoChain
.LdvmMterpStdBail:
.word dvmMterpStdBail
.LdvmMterpCommonExceptionThrown:
.word dvmMterpCommonExceptionThrown
.LdvmLockObject:
.word dvmLockObject
#if defined(WITH_JIT_TUNING)
.LdvmICHitCount:
.word gDvmICHitCount
#endif
#if defined(WITH_SELF_VERIFICATION)
.LdvmSelfVerificationMemOpDecode:
.word dvmSelfVerificationMemOpDecode
#endif
.global dmvCompilerTemplateEnd
dmvCompilerTemplateEnd:
#endif /* WITH_JIT */