| /* |
| * This file was generated automatically by gen-template.py for 'mips'. |
| * |
| * --> DO NOT EDIT <-- |
| */ |
| |
| /* File: mips/header.S */ |
| /* |
| * Copyright (C) 2008 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #if defined(WITH_JIT) |
| |
| /* |
| * This is a #include, not a %include, because we want the C pre-processor |
| * to expand the macros into assembler assignment statements. |
| */ |
| #include "../../../mterp/common/asm-constants.h" |
| #include "../../../mterp/common/mips-defines.h" |
| #include "../../../mterp/common/jit-config.h" |
| #include <asm/regdef.h> |
| #include <asm/fpregdef.h> |
| |
| #ifdef __mips_hard_float |
| #define HARD_FLOAT |
| #else |
| #define SOFT_FLOAT |
| #endif |
| |
| /* MIPS definitions and declarations |
| |
| reg nick purpose |
| s0 rPC interpreted program counter, used for fetching instructions |
| s1 rFP interpreted frame pointer, used for accessing locals and args |
| s2 rSELF pointer to thread |
| s3 rIBASE interpreted instruction base pointer, used for computed goto |
| s4 rINST first 16-bit code unit of current instruction |
| */ |
| |
| /* register offsets */ |
| #define r_ZERO 0 |
| #define r_AT 1 |
| #define r_V0 2 |
| #define r_V1 3 |
| #define r_A0 4 |
| #define r_A1 5 |
| #define r_A2 6 |
| #define r_A3 7 |
| #define r_T0 8 |
| #define r_T1 9 |
| #define r_T2 10 |
| #define r_T3 11 |
| #define r_T4 12 |
| #define r_T5 13 |
| #define r_T6 14 |
| #define r_T7 15 |
| #define r_S0 16 |
| #define r_S1 17 |
| #define r_S2 18 |
| #define r_S3 19 |
| #define r_S4 20 |
| #define r_S5 21 |
| #define r_S6 22 |
| #define r_S7 23 |
| #define r_T8 24 |
| #define r_T9 25 |
| #define r_K0 26 |
| #define r_K1 27 |
| #define r_GP 28 |
| #define r_SP 29 |
| #define r_FP 30 |
| #define r_RA 31 |
| #define r_F0 32 |
| #define r_F1 33 |
| #define r_F2 34 |
| #define r_F3 35 |
| #define r_F4 36 |
| #define r_F5 37 |
| #define r_F6 38 |
| #define r_F7 39 |
| #define r_F8 40 |
| #define r_F9 41 |
| #define r_F10 42 |
| #define r_F11 43 |
| #define r_F12 44 |
| #define r_F13 45 |
| #define r_F14 46 |
| #define r_F15 47 |
| #define r_F16 48 |
| #define r_F17 49 |
| #define r_F18 50 |
| #define r_F19 51 |
| #define r_F20 52 |
| #define r_F21 53 |
| #define r_F22 54 |
| #define r_F23 55 |
| #define r_F24 56 |
| #define r_F25 57 |
| #define r_F26 58 |
| #define r_F27 59 |
| #define r_F28 60 |
| #define r_F29 61 |
| #define r_F30 62 |
| #define r_F31 63 |
| |
| /* single-purpose registers, given names for clarity */ |
| #define rPC s0 |
| #define rFP s1 |
| #define rSELF s2 |
| #define rIBASE s3 |
| #define rINST s4 |
| #define rOBJ s5 |
| #define rBIX s6 |
| #define rTEMP s7 |
| |
| /* The long arguments sent to function calls in Big-endian mode should be register |
| swapped when sent to functions in little endian mode. In other words long variable |
| sent as a0(MSW), a1(LSW) for a function call in LE mode should be sent as a1, a0 in |
| Big Endian mode */ |
| |
| #ifdef HAVE_LITTLE_ENDIAN |
| #define rARG0 a0 |
| #define rARG1 a1 |
| #define rARG2 a2 |
| #define rARG3 a3 |
| #define rRESULT0 v0 |
| #define rRESULT1 v1 |
| #else |
| #define rARG0 a1 |
| #define rARG1 a0 |
| #define rARG2 a3 |
| #define rARG3 a2 |
| #define rRESULT0 v1 |
| #define rRESULT1 v0 |
| #endif |
| |
| |
| /* save/restore the PC and/or FP from the thread struct */ |
| #define LOAD_PC_FROM_SELF() lw rPC, offThread_pc(rSELF) |
| #define SAVE_PC_TO_SELF() sw rPC, offThread_pc(rSELF) |
| #define LOAD_FP_FROM_SELF() lw rFP, offThread_curFrame(rSELF) |
| #define SAVE_FP_TO_SELF() sw rFP, offThread_curFrame(rSELF) |
| |
| #define EXPORT_PC() \ |
| sw rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP) |
| |
| #define SAVEAREA_FROM_FP(rd, _fpreg) \ |
| subu rd, _fpreg, sizeofStackSaveArea |
| |
| #define FETCH_INST() lhu rINST, (rPC) |
| |
| #define FETCH_ADVANCE_INST(_count) lhu rINST, (_count*2)(rPC); \ |
| addu rPC, rPC, (_count * 2) |
| |
| #define FETCH_ADVANCE_INST_RB(rd) addu rPC, rPC, rd; \ |
| lhu rINST, (rPC) |
| |
| #define FETCH(rd, _count) lhu rd, (_count * 2)(rPC) |
| #define FETCH_S(rd, _count) lh rd, (_count * 2)(rPC) |
| |
| #ifdef HAVE_LITTLE_ENDIAN |
| |
| #define FETCH_B(rd, _count) lbu rd, (_count * 2)(rPC) |
| #define FETCH_C(rd, _count) lbu rd, (_count * 2 + 1)(rPC) |
| |
| #else |
| |
| #define FETCH_B(rd, _count) lbu rd, (_count * 2 + 1)(rPC) |
| #define FETCH_C(rd, _count) lbu rd, (_count * 2)(rPC) |
| |
| #endif |
| |
| #define GET_INST_OPCODE(rd) and rd, rINST, 0xFF |
| |
| #define GOTO_OPCODE(rd) sll rd, rd, -1000; \ |
| addu rd, rIBASE, rd; \ |
| jr rd |
| |
| |
| #define LOAD(rd, rbase) lw rd, 0(rbase) |
| #define LOAD_F(rd, rbase) l.s rd, (rbase) |
| #define STORE(rd, rbase) sw rd, 0(rbase) |
| #define STORE_F(rd, rbase) s.s rd, (rbase) |
| |
| #define GET_VREG(rd, rix) LOAD_eas2(rd,rFP,rix) |
| |
| #define GET_VREG_F(rd, rix) EAS2(AT, rFP, rix); \ |
| .set noat; l.s rd, (AT); .set at |
| |
| #define SET_VREG(rd, rix) STORE_eas2(rd, rFP, rix) |
| |
| #define SET_VREG_GOTO(rd, rix, dst) .set noreorder; \ |
| sll dst, dst, -1000; \ |
| addu dst, rIBASE, dst; \ |
| sll t8, rix, 2; \ |
| addu t8, t8, rFP; \ |
| jr dst; \ |
| sw rd, 0(t8); \ |
| .set reorder |
| |
| #define SET_VREG_F(rd, rix) EAS2(AT, rFP, rix); \ |
| .set noat; s.s rd, (AT); .set at |
| |
| |
| #define GET_OPA(rd) srl rd, rINST, 8 |
| #ifndef MIPS32R2 |
| #define GET_OPA4(rd) GET_OPA(rd); and rd, 0xf |
| #else |
| #define GET_OPA4(rd) ext rd, rd, 8, 4 |
| #endif |
| #define GET_OPB(rd) srl rd, rINST, 12 |
| |
| #define LOAD_rSELF_OFF(rd,off) lw rd, offThread_##off##(rSELF) |
| |
| #define LOAD_rSELF_method(rd) LOAD_rSELF_OFF(rd, method) |
| #define LOAD_rSELF_methodClassDex(rd) LOAD_rSELF_OFF(rd, methodClassDex) |
| #define LOAD_rSELF_interpStackEnd(rd) LOAD_rSELF_OFF(rd, interpStackEnd) |
| #define LOAD_rSELF_retval(rd) LOAD_rSELF_OFF(rd, retval) |
| #define LOAD_rSELF_pActiveProfilers(rd) LOAD_rSELF_OFF(rd, pActiveProfilers) |
| #define LOAD_rSELF_bailPtr(rd) LOAD_rSELF_OFF(rd, bailPtr) |
| |
| #define GET_JIT_PROF_TABLE(rd) LOAD_rSELF_OFF(rd,pJitProfTable) |
| #define GET_JIT_THRESHOLD(rd) LOAD_rSELF_OFF(rd,jitThreshold) |
| |
| /* |
| * Form an Effective Address rd = rbase + roff<<n; |
| * Uses reg AT |
| */ |
| #define EASN(rd,rbase,roff,rshift) .set noat; \ |
| sll AT, roff, rshift; \ |
| addu rd, rbase, AT; \ |
| .set at |
| |
| #define EAS1(rd,rbase,roff) EASN(rd,rbase,roff,1) |
| #define EAS2(rd,rbase,roff) EASN(rd,rbase,roff,2) |
| #define EAS3(rd,rbase,roff) EASN(rd,rbase,roff,3) |
| #define EAS4(rd,rbase,roff) EASN(rd,rbase,roff,4) |
| |
| /* |
| * Form an Effective Shift Right rd = rbase + roff>>n; |
| * Uses reg AT |
| */ |
| #define ESRN(rd,rbase,roff,rshift) .set noat; \ |
| srl AT, roff, rshift; \ |
| addu rd, rbase, AT; \ |
| .set at |
| |
| #define LOAD_eas2(rd,rbase,roff) EAS2(AT, rbase, roff); \ |
| .set noat; lw rd, 0(AT); .set at |
| |
| #define STORE_eas2(rd,rbase,roff) EAS2(AT, rbase, roff); \ |
| .set noat; sw rd, 0(AT); .set at |
| |
| #define LOAD_RB_OFF(rd,rbase,off) lw rd, off(rbase) |
| #define LOADu2_RB_OFF(rd,rbase,off) lhu rd, off(rbase) |
| #define STORE_RB_OFF(rd,rbase,off) sw rd, off(rbase) |
| |
| #ifdef HAVE_LITTLE_ENDIAN |
| |
| #define STORE64_off(rlo,rhi,rbase,off) sw rlo, off(rbase); \ |
| sw rhi, (off+4)(rbase) |
| #define LOAD64_off(rlo,rhi,rbase,off) lw rlo, off(rbase); \ |
| lw rhi, (off+4)(rbase) |
| |
| #define STORE64_off_F(rlo,rhi,rbase,off) s.s rlo, off(rbase); \ |
| s.s rhi, (off+4)(rbase) |
| #define LOAD64_off_F(rlo,rhi,rbase,off) l.s rlo, off(rbase); \ |
| l.s rhi, (off+4)(rbase) |
| #else |
| |
| #define STORE64_off(rlo,rhi,rbase,off) sw rlo, (off+4)(rbase); \ |
| sw rhi, (off)(rbase) |
| #define LOAD64_off(rlo,rhi,rbase,off) lw rlo, (off+4)(rbase); \ |
| lw rhi, (off)(rbase) |
| #define STORE64_off_F(rlo,rhi,rbase,off) s.s rlo, (off+4)(rbase); \ |
| s.s rhi, (off)(rbase) |
| #define LOAD64_off_F(rlo,rhi,rbase,off) l.s rlo, (off+4)(rbase); \ |
| l.s rhi, (off)(rbase) |
| #endif |
| |
| #define STORE64(rlo,rhi,rbase) STORE64_off(rlo,rhi,rbase,0) |
| #define LOAD64(rlo,rhi,rbase) LOAD64_off(rlo,rhi,rbase,0) |
| |
| #define STORE64_F(rlo,rhi,rbase) STORE64_off_F(rlo,rhi,rbase,0) |
| #define LOAD64_F(rlo,rhi,rbase) LOAD64_off_F(rlo,rhi,rbase,0) |
| |
| #define STORE64_lo(rd,rbase) sw rd, 0(rbase) |
| #define STORE64_hi(rd,rbase) sw rd, 4(rbase) |
| |
| |
| #define LOAD_offThread_exception(rd,rbase) LOAD_RB_OFF(rd,rbase,offThread_exception) |
| #define LOAD_base_offArrayObject_length(rd,rbase) LOAD_RB_OFF(rd,rbase,offArrayObject_length) |
| #define LOAD_base_offClassObject_accessFlags(rd,rbase) LOAD_RB_OFF(rd,rbase,offClassObject_accessFlags) |
| #define LOAD_base_offClassObject_descriptor(rd,rbase) LOAD_RB_OFF(rd,rbase,offClassObject_descriptor) |
| #define LOAD_base_offClassObject_super(rd,rbase) LOAD_RB_OFF(rd,rbase,offClassObject_super) |
| |
| #define LOAD_base_offClassObject_vtable(rd,rbase) LOAD_RB_OFF(rd,rbase,offClassObject_vtable) |
| #define LOAD_base_offClassObject_vtableCount(rd,rbase) LOAD_RB_OFF(rd,rbase,offClassObject_vtableCount) |
| #define LOAD_base_offDvmDex_pResClasses(rd,rbase) LOAD_RB_OFF(rd,rbase,offDvmDex_pResClasses) |
| #define LOAD_base_offDvmDex_pResFields(rd,rbase) LOAD_RB_OFF(rd,rbase,offDvmDex_pResFields) |
| |
| #define LOAD_base_offDvmDex_pResMethods(rd,rbase) LOAD_RB_OFF(rd,rbase,offDvmDex_pResMethods) |
| #define LOAD_base_offDvmDex_pResStrings(rd,rbase) LOAD_RB_OFF(rd,rbase,offDvmDex_pResStrings) |
| #define LOAD_base_offInstField_byteOffset(rd,rbase) LOAD_RB_OFF(rd,rbase,offInstField_byteOffset) |
| #define LOAD_base_offStaticField_value(rd,rbase) LOAD_RB_OFF(rd,rbase,offStaticField_value) |
| #define LOAD_base_offMethod_clazz(rd,rbase) LOAD_RB_OFF(rd,rbase,offMethod_clazz) |
| |
| #define LOAD_base_offMethod_name(rd,rbase) LOAD_RB_OFF(rd,rbase,offMethod_name) |
| #define LOAD_base_offObject_clazz(rd,rbase) LOAD_RB_OFF(rd,rbase,offObject_clazz) |
| |
| #define LOADu2_offMethod_methodIndex(rd,rbase) LOADu2_RB_OFF(rd,rbase,offMethod_methodIndex) |
| |
| |
| #define STORE_offThread_exception(rd,rbase) STORE_RB_OFF(rd,rbase,offThread_exception) |
| |
| |
| #define STACK_STORE(rd,off) sw rd, off(sp) |
| #define STACK_LOAD(rd,off) lw rd, off(sp) |
| #define CREATE_STACK(n) subu sp, sp, n |
| #define DELETE_STACK(n) addu sp, sp, n |
| |
| #define SAVE_RA(offset) STACK_STORE(ra, offset) |
| #define LOAD_RA(offset) STACK_LOAD(ra, offset) |
| |
| #define LOAD_ADDR(dest,addr) la dest, addr |
| #define LOAD_IMM(dest, imm) li dest, imm |
| #define MOVE_REG(dest,src) move dest, src |
| #define RETURN jr ra |
| #define STACK_SIZE 128 |
| |
| #define STACK_OFFSET_ARG04 16 |
| #define STACK_OFFSET_GP 84 |
| #define STACK_OFFSET_rFP 112 |
| |
| /* This directive will make sure all subsequent jal restore gp at a known offset */ |
| .cprestore STACK_OFFSET_GP |
| |
| #define JAL(func) move rTEMP, ra; \ |
| jal func; \ |
| move ra, rTEMP |
| |
| #define JALR(reg) move rTEMP, ra; \ |
| jalr ra, reg; \ |
| move ra, rTEMP |
| |
| #define BAL(n) bal n |
| |
| #define STACK_STORE_RA() CREATE_STACK(STACK_SIZE); \ |
| STACK_STORE(gp, STACK_OFFSET_GP); \ |
| STACK_STORE(ra, 124) |
| |
| #define STACK_STORE_S0() STACK_STORE_RA(); \ |
| STACK_STORE(s0, 116) |
| |
| #define STACK_STORE_S0S1() STACK_STORE_S0(); \ |
| STACK_STORE(s1, STACK_OFFSET_rFP) |
| |
| #define STACK_LOAD_RA() STACK_LOAD(ra, 124); \ |
| STACK_LOAD(gp, STACK_OFFSET_GP); \ |
| DELETE_STACK(STACK_SIZE) |
| |
| #define STACK_LOAD_S0() STACK_LOAD(s0, 116); \ |
| STACK_LOAD_RA() |
| |
| #define STACK_LOAD_S0S1() STACK_LOAD(s1, STACK_OFFSET_rFP); \ |
| STACK_LOAD_S0() |
| |
| #define STACK_STORE_FULL() CREATE_STACK(STACK_SIZE); \ |
| STACK_STORE(ra, 124); \ |
| STACK_STORE(fp, 120); \ |
| STACK_STORE(s0, 116); \ |
| STACK_STORE(s1, STACK_OFFSET_rFP); \ |
| STACK_STORE(s2, 108); \ |
| STACK_STORE(s3, 104); \ |
| STACK_STORE(s4, 100); \ |
| STACK_STORE(s5, 96); \ |
| STACK_STORE(s6, 92); \ |
| STACK_STORE(s7, 88); |
| |
| #define STACK_LOAD_FULL() STACK_LOAD(gp, STACK_OFFSET_GP); \ |
| STACK_LOAD(s7, 88); \ |
| STACK_LOAD(s6, 92); \ |
| STACK_LOAD(s5, 96); \ |
| STACK_LOAD(s4, 100); \ |
| STACK_LOAD(s3, 104); \ |
| STACK_LOAD(s2, 108); \ |
| STACK_LOAD(s1, STACK_OFFSET_rFP); \ |
| STACK_LOAD(s0, 116); \ |
| STACK_LOAD(fp, 120); \ |
| STACK_LOAD(ra, 124); \ |
| DELETE_STACK(STACK_SIZE) |
| |
| /* |
| * first 8 words are reserved for function calls |
| * Maximum offset is STACK_OFFSET_SCRMX-STACK_OFFSET_SCR |
| */ |
| #define STACK_OFFSET_SCR 32 |
| #define SCRATCH_STORE(r,off) \ |
| STACK_STORE(r, STACK_OFFSET_SCR+off); |
| #define SCRATCH_LOAD(r,off) \ |
| STACK_LOAD(r, STACK_OFFSET_SCR+off); |
| |
| /* File: mips/platform.S */ |
| /* |
| * =========================================================================== |
| * CPU-version-specific defines and utility |
| * =========================================================================== |
| */ |
| |
| |
| |
| .global dvmCompilerTemplateStart |
| .type dvmCompilerTemplateStart, %function |
| .section .data.rel.ro |
| |
| dvmCompilerTemplateStart: |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_CMP_LONG |
| dvmCompiler_TEMPLATE_CMP_LONG: |
| /* File: mips/TEMPLATE_CMP_LONG.S */ |
| /* |
| * Compare two 64-bit values |
| * x = y return 0 |
| * x < y return -1 |
| * x > y return 1 |
| * |
| * I think I can improve on the ARM code by the following observation |
| * slt t0, x.hi, y.hi; # (x.hi < y.hi) ? 1:0 |
| * sgt t1, x.hi, y.hi; # (y.hi > x.hi) ? 1:0 |
| * subu v0, t0, t1 # v0= -1:1:0 for [ < > = ] |
| * |
| * This code assumes the register pair ordering will depend on endianess (a1:a0 or a0:a1). |
| * a1:a0 => vBB |
| * a3:a2 => vCC |
| */ |
| /* cmp-long vAA, vBB, vCC */ |
| slt t0, rARG1, rARG3 # compare hi |
| sgt t1, rARG1, rARG3 |
| subu v0, t1, t0 # v0<- (-1,1,0) |
| bnez v0, .LTEMPLATE_CMP_LONG_finish |
| # at this point x.hi==y.hi |
| sltu t0, rARG0, rARG2 # compare lo |
| sgtu t1, rARG0, rARG2 |
| subu v0, t1, t0 # v0<- (-1,1,0) for [< > =] |
| .LTEMPLATE_CMP_LONG_finish: |
| RETURN |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_RETURN |
| dvmCompiler_TEMPLATE_RETURN: |
| /* File: mips/TEMPLATE_RETURN.S */ |
| /* |
| * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX. |
| * If the stored value in returnAddr |
| * is non-zero, the caller is compiled by the JIT thus return to the |
| * address in the code cache following the invoke instruction. Otherwise |
| * return to the special dvmJitToInterpNoChain entry point. |
| */ |
| #if defined(TEMPLATE_INLINE_PROFILING) |
| # preserve a0-a2 and ra |
| SCRATCH_STORE(a0, 0) |
| SCRATCH_STORE(a1, 4) |
| SCRATCH_STORE(a2, 8) |
| SCRATCH_STORE(ra, 12) |
| |
| # a0=rSELF |
| move a0, rSELF |
| la t9, dvmFastMethodTraceExit |
| JALR(t9) |
| lw gp, STACK_OFFSET_GP(sp) |
| |
| # restore a0-a2 and ra |
| SCRATCH_LOAD(ra, 12) |
| SCRATCH_LOAD(a2, 8) |
| SCRATCH_LOAD(a1, 4) |
| SCRATCH_LOAD(a0, 0) |
| #endif |
| SAVEAREA_FROM_FP(a0, rFP) # a0<- saveArea (old) |
| lw t0, offStackSaveArea_prevFrame(a0) # t0<- saveArea->prevFrame |
| lbu t1, offThread_breakFlags(rSELF) # t1<- breakFlags |
| lw rPC, offStackSaveArea_savedPc(a0) # rPC<- saveArea->savedPc |
| #if !defined(WITH_SELF_VERIFICATION) |
| lw t2, offStackSaveArea_returnAddr(a0) # t2<- chaining cell ret |
| #else |
| move t2, zero # disable chaining |
| #endif |
| lw a2, offStackSaveArea_method - sizeofStackSaveArea(t0) |
| # a2<- method we're returning to |
| #if !defined(WITH_SELF_VERIFICATION) |
| beq a2, zero, 1f # bail to interpreter |
| #else |
| bne a2, zero, 2f |
| JALR(ra) # punt to interpreter and compare state |
| # DOUG: assume this does not return ??? |
| 2: |
| #endif |
| la t4, .LdvmJitToInterpNoChainNoProfile # defined in footer.S |
| lw a1, (t4) |
| move rFP, t0 # publish new FP |
| beq a2, zero, 4f |
| lw t0, offMethod_clazz(a2) # t0<- method->clazz |
| 4: |
| |
| sw a2, offThread_method(rSELF) # self->method = newSave->method |
| lw a0, offClassObject_pDvmDex(t0) # a0<- method->clazz->pDvmDex |
| sw rFP, offThread_curFrame(rSELF) # self->curFrame = fp |
| add rPC, rPC, 3*2 # publish new rPC |
| sw a0, offThread_methodClassDex(rSELF) |
| movn t2, zero, t1 # check the breadFlags and |
| # clear the chaining cell address |
| sw t2, offThread_inJitCodeCache(rSELF) # in code cache or not |
| beq t2, zero, 3f # chaining cell exists? |
| JALR(t2) # jump to the chaining cell |
| # DOUG: assume this does not return ??? |
| 3: |
| #if defined(WITH_JIT_TUNING) |
| li a0, kCallsiteInterpreted |
| #endif |
| j a1 # callsite is interpreted |
| 1: |
| sw zero, offThread_inJitCodeCache(rSELF) # reset inJitCodeCache |
| SAVE_PC_TO_SELF() # SAVE_PC_FP_TO_SELF() |
| SAVE_FP_TO_SELF() |
| la t4, .LdvmMterpStdBail # defined in footer.S |
| lw a2, (t4) |
| move a0, rSELF # Expecting rSELF in a0 |
| JALR(a2) # exit the interpreter |
| # DOUG: assume this does not return ??? |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT |
| dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT: |
| /* File: mips/TEMPLATE_INVOKE_METHOD_NO_OPT.S */ |
| /* |
| * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC |
| * into rPC then jump to dvmJitToInterpNoChain to dispatch the |
| * runtime-resolved callee. |
| */ |
| # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite |
| lh t7, offMethod_registersSize(a0) # t7<- methodToCall->regsSize |
| lh a2, offMethod_outsSize(a0) # a2<- methodToCall->outsSize |
| lw t9, offThread_interpStackEnd(rSELF) # t9<- interpStackEnd |
| lbu t8, offThread_breakFlags(rSELF) # t8<- breakFlags |
| move a3, a1 # a3<- returnCell |
| SAVEAREA_FROM_FP(a1, rFP) # a1<- stack save area |
| sll t6, t7, 2 # multiply regsSize by 4 (4 bytes per reg) |
| sub a1, a1, t6 # a1<- newFp(old savearea-regsSize) |
| SAVEAREA_FROM_FP(t0, a1) # t0<- stack save area |
| sll t6, a2, 2 # multiply outsSize by 4 (4 bytes per reg) |
| sub t0, t0, t6 # t0<- bottom (newsave-outsSize) |
| bgeu t0, t9, 1f # bottom < interpStackEnd? |
| RETURN # return to raise stack overflow excep. |
| |
| 1: |
| # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite |
| lw t9, offMethod_clazz(a0) # t9<- methodToCall->clazz |
| lw t0, offMethod_accessFlags(a0) # t0<- methodToCall->accessFlags |
| sw rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP) |
| sw rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1) |
| lw rPC, offMethod_insns(a0) # rPC<- methodToCall->insns |
| |
| # set up newSaveArea |
| sw rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1) |
| sw a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1) |
| sw a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1) |
| beqz t8, 2f # breakFlags != 0 |
| RETURN # bail to the interpreter |
| |
| 2: |
| and t6, t0, ACC_NATIVE |
| beqz t6, 3f |
| #if !defined(WITH_SELF_VERIFICATION) |
| j .LinvokeNative |
| #else |
| RETURN # bail to the interpreter |
| #endif |
| |
| 3: |
| # continue executing the next instruction through the interpreter |
| la t0, .LdvmJitToInterpTraceSelectNoChain # defined in footer.S |
| lw rTEMP, (t0) |
| lw a3, offClassObject_pDvmDex(t9) # a3<- method->clazz->pDvmDex |
| |
| # Update "thread" values for the new method |
| sw a0, offThread_method(rSELF) # self->method = methodToCall |
| sw a3, offThread_methodClassDex(rSELF) # self->methodClassDex = ... |
| move rFP, a1 # fp = newFp |
| sw rFP, offThread_curFrame(rSELF) # self->curFrame = newFp |
| #if defined(TEMPLATE_INLINE_PROFILING) |
| # preserve a0-a3 |
| SCRATCH_STORE(a0, 0) |
| SCRATCH_STORE(a1, 4) |
| SCRATCH_STORE(a2, 8) |
| SCRATCH_STORE(a3, 12) |
| |
| # a0=methodToCall, a1=rSELF |
| move a1, rSELF |
| la t9, dvmFastMethodTraceEnter |
| JALR(t9) |
| lw gp, STACK_OFFSET_GP(sp) |
| |
| # restore a0-a3 |
| SCRATCH_LOAD(a3, 12) |
| SCRATCH_LOAD(a2, 8) |
| SCRATCH_LOAD(a1, 4) |
| SCRATCH_LOAD(a0, 0) |
| #endif |
| |
| # Start executing the callee |
| #if defined(WITH_JIT_TUNING) |
| li a0, kInlineCacheMiss |
| #endif |
| jr rTEMP # dvmJitToInterpTraceSelectNoChain |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN |
| dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN: |
| /* File: mips/TEMPLATE_INVOKE_METHOD_CHAIN.S */ |
| /* |
| * For monomorphic callsite, setup the Dalvik frame and return to the |
| * Thumb code through the link register to transfer control to the callee |
| * method through a dedicated chaining cell. |
| */ |
| # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite |
| # methodToCall is guaranteed to be non-native |
| .LinvokeChain: |
| lh t7, offMethod_registersSize(a0) # t7<- methodToCall->regsSize |
| lh a2, offMethod_outsSize(a0) # a2<- methodToCall->outsSize |
| lw t9, offThread_interpStackEnd(rSELF) # t9<- interpStackEnd |
| lbu t8, offThread_breakFlags(rSELF) # t8<- breakFlags |
| move a3, a1 # a3<- returnCell |
| SAVEAREA_FROM_FP(a1, rFP) # a1<- stack save area |
| sll t6, t7, 2 # multiply regsSize by 4 (4 bytes per reg) |
| sub a1, a1, t6 # a1<- newFp(old savearea-regsSize) |
| SAVEAREA_FROM_FP(t0, a1) # t0<- stack save area |
| add t2, ra, 8 # setup the punt-to-interp address |
| # 8 bytes skips branch and delay slot |
| sll t6, a2, 2 # multiply outsSize by 4 (4 bytes per reg) |
| sub t0, t0, t6 # t0<- bottom (newsave-outsSize) |
| bgeu t0, t9, 1f # bottom < interpStackEnd? |
| jr t2 # return to raise stack overflow excep. |
| |
| 1: |
| # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite |
| lw t9, offMethod_clazz(a0) # t9<- methodToCall->clazz |
| sw rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP) |
| sw rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1) |
| lw rPC, offMethod_insns(a0) # rPC<- methodToCall->insns |
| |
| # set up newSaveArea |
| sw rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1) |
| sw a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1) |
| sw a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1) |
| beqz t8, 2f # breakFlags != 0 |
| jr t2 # bail to the interpreter |
| |
| 2: |
| lw a3, offClassObject_pDvmDex(t9) # a3<- methodToCall->clazz->pDvmDex |
| |
| # Update "thread" values for the new method |
| sw a0, offThread_method(rSELF) # self->method = methodToCall |
| sw a3, offThread_methodClassDex(rSELF) # self->methodClassDex = ... |
| move rFP, a1 # fp = newFp |
| sw rFP, offThread_curFrame(rSELF) # self->curFrame = newFp |
| #if defined(TEMPLATE_INLINE_PROFILING) |
| # preserve a0-a2 and ra |
| SCRATCH_STORE(a0, 0) |
| SCRATCH_STORE(a1, 4) |
| SCRATCH_STORE(a2, 8) |
| SCRATCH_STORE(ra, 12) |
| |
| move a1, rSELF |
| # a0=methodToCall, a1=rSELF |
| la t9, dvmFastMethodTraceEnter |
| jalr t9 |
| lw gp, STACK_OFFSET_GP(sp) |
| |
| # restore a0-a2 and ra |
| SCRATCH_LOAD(ra, 12) |
| SCRATCH_LOAD(a2, 8) |
| SCRATCH_LOAD(a1, 4) |
| SCRATCH_LOAD(a0, 0) |
| #endif |
| RETURN # return to the callee-chaining cell |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN |
| dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN: |
| /* File: mips/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */ |
| /* |
| * For polymorphic callsite, check whether the cached class pointer matches |
| * the current one. If so setup the Dalvik frame and return to the |
| * Thumb code through the link register to transfer control to the callee |
| * method through a dedicated chaining cell. |
| * |
| * The predicted chaining cell is declared in ArmLIR.h with the |
| * following layout: |
| * |
| * typedef struct PredictedChainingCell { |
| * u4 branch; |
| * u4 delay_slot; |
| * const ClassObject *clazz; |
| * const Method *method; |
| * u4 counter; |
| * } PredictedChainingCell; |
| * |
| * Upon returning to the callsite: |
| * - lr : to branch to the chaining cell |
| * - lr+8 : to punt to the interpreter |
| * - lr+16: to fully resolve the callee and may rechain. |
| * a3 <- class |
| */ |
| # a0 = this, a1 = returnCell, a2 = predictedChainCell, rPC = dalvikCallsite |
| lw a3, offObject_clazz(a0) # a3 <- this->class |
| lw rIBASE, 8(a2) # t0 <- predictedChainCell->clazz |
| lw a0, 12(a2) # a0 <- predictedChainCell->method |
| lw t1, offThread_icRechainCount(rSELF) # t1 <- shared rechainCount |
| |
| #if defined(WITH_JIT_TUNING) |
| la rINST, .LdvmICHitCount |
| #add t2, t2, 1 |
| bne a3, rIBASE, 1f |
| nop |
| lw t2, 0(rINST) |
| add t2, t2, 1 |
| sw t2, 0(rINST) |
| 1: |
| #add t2, t2, 1 |
| #endif |
| beq a3, rIBASE, .LinvokeChain # branch if predicted chain is valid |
| lw rINST, offClassObject_vtable(a3) # rINST <- this->class->vtable |
| beqz rIBASE, 2f # initialized class or not |
| sub a1, t1, 1 # count-- |
| sw a1, offThread_icRechainCount(rSELF) # write back to InterpState |
| b 3f |
| 2: |
| move a1, zero |
| 3: |
| add ra, ra, 16 # return to fully-resolve landing pad |
| /* |
| * a1 <- count |
| * a2 <- &predictedChainCell |
| * a3 <- this->class |
| * rPC <- dPC |
| * rINST <- this->class->vtable |
| */ |
| RETURN |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE |
| dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE: |
| /* File: mips/TEMPLATE_INVOKE_METHOD_NATIVE.S */ |
| # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite |
| lh t7, offMethod_registersSize(a0) # t7<- methodToCall->regsSize |
| lw t9, offThread_interpStackEnd(rSELF) # t9<- interpStackEnd |
| lbu t8, offThread_breakFlags(rSELF) # t8<- breakFlags |
| move a3, a1 # a3<- returnCell |
| SAVEAREA_FROM_FP(a1, rFP) # a1<- stack save area |
| sll t6, t7, 2 # multiply regsSize by 4 (4 bytes per reg) |
| sub a1, a1, t6 # a1<- newFp(old savearea-regsSize) |
| SAVEAREA_FROM_FP(t0, a1) # t0<- stack save area |
| bgeu t0, t9, 1f # bottom < interpStackEnd? |
| RETURN # return to raise stack overflow excep. |
| |
| 1: |
| # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite |
| sw rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP) |
| sw rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1) |
| lw rPC, offMethod_insns(a0) # rPC<- methodToCall->insns |
| |
| # set up newSaveArea |
| sw rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1) |
| sw a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1) |
| sw a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1) |
| lw rTEMP, offMethod_nativeFunc(a0) # t9<- method->nativeFunc |
| #if !defined(WITH_SELF_VERIFICATION) |
| beqz t8, 2f # breakFlags != 0 |
| RETURN # bail to the interpreter |
| 2: |
| #else |
| RETURN # bail to the interpreter unconditionally |
| #endif |
| |
| # go ahead and transfer control to the native code |
| lw t6, offThread_jniLocal_topCookie(rSELF) # t6<- thread->localRef->... |
| sw a1, offThread_curFrame(rSELF) # self->curFrame = newFp |
| sw zero, offThread_inJitCodeCache(rSELF) # not in the jit code cache |
| sw t6, (offStackSaveArea_localRefCookie - sizeofStackSaveArea)(a1) |
| # newFp->localRefCookie=top |
| SAVEAREA_FROM_FP(rBIX, a1) # rBIX<- new stack save area |
| move a2, a0 # a2<- methodToCall |
| move a0, a1 # a0<- newFp |
| add a1, rSELF, offThread_retval # a1<- &retval |
| move a3, rSELF # a3<- self |
| #if defined(TEMPLATE_INLINE_PROFILING) |
| # a2: methodToCall |
| # preserve a0-a3 |
| SCRATCH_STORE(a0, 0) |
| SCRATCH_STORE(a1, 4) |
| SCRATCH_STORE(a2, 8) |
| SCRATCH_STORE(a3, 12) |
| |
| move a0, a2 |
| move a1, rSELF |
| # a0=JNIMethod, a1=rSELF |
| la t9, dvmFastMethodTraceEnter |
| JALR(t9) # off to the native code |
| lw gp, STACK_OFFSET_GP(sp) |
| |
| # restore a0-a3 |
| SCRATCH_LOAD(a3, 12) |
| SCRATCH_LOAD(a2, 8) |
| SCRATCH_LOAD(a1, 4) |
| SCRATCH_LOAD(a0, 0) |
| |
| move rOBJ, a2 # save a2 |
| #endif |
| |
| JALR(rTEMP) # off to the native code |
| lw gp, STACK_OFFSET_GP(sp) |
| |
| #if defined(TEMPLATE_INLINE_PROFILING) |
| move a0, rOBJ |
| move a1, rSELF |
| # a0=JNIMethod, a1=rSELF |
| la t9, dvmFastNativeMethodTraceExit |
| JALR(t9) |
| lw gp, STACK_OFFSET_GP(sp) |
| #endif |
| |
| # native return; rBIX=newSaveArea |
| # equivalent to dvmPopJniLocals |
| lw a2, offStackSaveArea_returnAddr(rBIX) # a2 = chaining cell ret addr |
| lw a0, offStackSaveArea_localRefCookie(rBIX) # a0<- saved->top |
| lw a1, offThread_exception(rSELF) # check for exception |
| sw rFP, offThread_curFrame(rSELF) # self->curFrame = fp |
| sw a0, offThread_jniLocal_topCookie(rSELF) # new top <- old top |
| lw a0, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP) |
| |
| # a0 = dalvikCallsitePC |
| bnez a1, .LhandleException # handle exception if any |
| |
| sw a2, offThread_inJitCodeCache(rSELF) # set the mode properly |
| beqz a2, 3f |
| jr a2 # go if return chaining cell still exist |
| |
| 3: |
| # continue executing the next instruction through the interpreter |
| la a1, .LdvmJitToInterpTraceSelectNoChain # defined in footer.S |
| lw a1, (a1) |
| add rPC, a0, 3*2 # reconstruct new rPC (advance 3 dalvik instr) |
| |
| #if defined(WITH_JIT_TUNING) |
| li a0, kCallsiteInterpreted |
| #endif |
| jr a1 |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_MUL_LONG |
| dvmCompiler_TEMPLATE_MUL_LONG: |
| /* File: mips/TEMPLATE_MUL_LONG.S */ |
| /* |
| * Signed 64-bit integer multiply. |
| * |
| * For JIT: op1 in a0/a1, op2 in a2/a3, return in v0/v1 |
| * |
| * Consider WXxYZ (a1a0 x a3a2) with a long multiply: |
| * |
| * a1 a0 |
| * x a3 a2 |
| * ------------- |
| * a2a1 a2a0 |
| * a3a0 |
| * a3a1 (<= unused) |
| * --------------- |
| * v1 v0 |
| * |
| */ |
| /* mul-long vAA, vBB, vCC */ |
| mul rRESULT1,rARG3,rARG0 # v1= a3a0 |
| multu rARG2,rARG0 |
| mfhi t1 |
| mflo rRESULT0 # v0= a2a0 |
| mul t0,rARG2,rARG1 # t0= a2a1 |
| addu rRESULT1,rRESULT1,t1 # v1= a3a0 + hi(a2a0) |
| addu rRESULT1,rRESULT1,t0 # v1= a3a0 + hi(a2a0) + a2a1; |
| RETURN |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_SHL_LONG |
| dvmCompiler_TEMPLATE_SHL_LONG: |
| /* File: mips/TEMPLATE_SHL_LONG.S */ |
| /* |
| * Long integer shift. This is different from the generic 32/64-bit |
| * binary operations because vAA/vBB are 64-bit but vCC (the shift |
| * distance) is 32-bit. Also, Dalvik requires us to ignore all but the low |
| * 6 bits. |
| */ |
| /* shl-long vAA:vBB(rARG1:rARG0), vCC(a2) - result in (rRESULT1:rRESULT0) */ |
| sll rRESULT0, rARG0, a2 # rlo<- alo << (shift&31) |
| not rRESULT1, a2 # rhi<- 31-shift (shift is 5b) |
| srl rARG0, 1 |
| srl rARG0, rRESULT1 # alo<- alo >> (32-(shift&31)) |
| sll rRESULT1, rARG1, a2 # rhi<- ahi << (shift&31) |
| or rRESULT1, rARG0 # rhi<- rhi | alo |
| andi a2, 0x20 # shift< shift & 0x20 |
| movn rRESULT1, rRESULT0, a2 # rhi<- rlo (if shift&0x20) |
| movn rRESULT0, zero, a2 # rlo<- 0 (if shift&0x20) |
| RETURN |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_SHR_LONG |
| dvmCompiler_TEMPLATE_SHR_LONG: |
| /* File: mips/TEMPLATE_SHR_LONG.S */ |
| /* |
| * Long integer shift. This is different from the generic 32/64-bit |
| * binary operations because vAA/vBB are 64-bit but vCC (the shift |
| * distance) is 32-bit. Also, Dalvik requires us to ignore all but the low |
| * 6 bits. |
| */ |
| /* shr-long vAA:vBB(rARG1:rARG0), vCC(a2) - result in (rRESULT1:rRESULT0) */ |
| sra rRESULT1, rARG1, a2 # rhi<- ahi >> (shift&31) |
| srl rRESULT0, rARG0, a2 # rlo<- alo >> (shift&31) |
| sra a3, rARG1, 31 # a3<- sign(ah) |
| not rARG0, a2 # alo<- 31-shift (shift is 5b) |
| sll rARG1, 1 |
| sll rARG1, rARG0 # ahi<- ahi << (32-(shift&31)) |
| or rRESULT0, rARG1 # rlo<- rlo | ahi |
| andi a2, 0x20 # shift & 0x20 |
| movn rRESULT0, rRESULT1, a2 # rlo<- rhi (if shift&0x20) |
| movn rRESULT1, a3, a2 # rhi<- sign(ahi) (if shift&0x20) |
| RETURN |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_USHR_LONG |
| dvmCompiler_TEMPLATE_USHR_LONG: |
| /* File: mips/TEMPLATE_USHR_LONG.S */ |
| /* |
| * Long integer shift. This is different from the generic 32/64-bit |
| * binary operations because vAA/vBB are 64-bit but vCC (the shift |
| * distance) is 32-bit. Also, Dalvik requires us to ignore all but the low |
| * 6 bits. |
| */ |
| /* ushr-long vAA:vBB(rARG1:rARG0), vCC(a2) - result in (rRESULT1:rRESULT0) */ |
| srl rRESULT1, rARG1, a2 # rhi<- ahi >> (shift&31) |
| srl rRESULT0, rARG0, a2 # rlo<- alo >> (shift&31) |
| not rARG0, a2 # alo<- 31-n (shift is 5b) |
| sll rARG1, 1 |
| sll rARG1, rARG0 # ahi<- ahi << (32-(shift&31)) |
| or rRESULT0, rARG1 # rlo<- rlo | ahi |
| andi a2, 0x20 # shift & 0x20 |
| movn rRESULT0, rRESULT1, a2 # rlo<- rhi (if shift&0x20) |
| movn rRESULT1, zero, a2 # rhi<- 0 (if shift&0x20) |
| RETURN |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_ADD_FLOAT_VFP |
| dvmCompiler_TEMPLATE_ADD_FLOAT_VFP: |
| /* File: mips/TEMPLATE_ADD_FLOAT_VFP.S */ |
| /* File: mips/fbinop.S */ |
| /* |
| * Generic 32-bit binary float operation. a0 = a1 op a2. |
| * |
| * For: add-fp, sub-fp, mul-fp, div-fp |
| * |
| * On entry: |
| * a0 = target dalvik register address |
| * a1 = op1 address |
| * a2 = op2 address |
| * |
| * IMPORTANT: you may specify "chkzero" or "preinstr" but not both. |
| * |
| */ |
| move rOBJ, a0 # save a0 |
| #ifdef SOFT_FLOAT |
| LOAD(a0, a1) # a0<- vBB |
| LOAD(a1, a2) # a1<- vCC |
| .if 0 |
| beqz a1, common_errDivideByZero # is second operand zero? |
| .endif |
| # optional op |
| JAL(__addsf3) # v0 = result |
| STORE(v0, rOBJ) # vAA <- v0 |
| #else |
| LOAD_F(fa0, a1) # fa0<- vBB |
| LOAD_F(fa1, a2) # fa1<- vCC |
| .if 0 |
| # is second operand zero? |
| li.s ft0, 0 |
| c.eq.s fcc0, ft0, fa1 # condition bit and comparision with 0 |
| bc1t fcc0, common_errDivideByZero |
| .endif |
| # optional op |
| add.s fv0, fa0, fa1 # fv0 = result |
| STORE_F(fv0, rOBJ) # vAA <- fv0 |
| #endif |
| RETURN |
| |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_SUB_FLOAT_VFP |
| dvmCompiler_TEMPLATE_SUB_FLOAT_VFP: |
| /* File: mips/TEMPLATE_SUB_FLOAT_VFP.S */ |
| /* File: mips/fbinop.S */ |
| /* |
| * Generic 32-bit binary float operation. a0 = a1 op a2. |
| * |
| * For: add-fp, sub-fp, mul-fp, div-fp |
| * |
| * On entry: |
| * a0 = target dalvik register address |
| * a1 = op1 address |
| * a2 = op2 address |
| * |
| * IMPORTANT: you may specify "chkzero" or "preinstr" but not both. |
| * |
| */ |
| move rOBJ, a0 # save a0 |
| #ifdef SOFT_FLOAT |
| LOAD(a0, a1) # a0<- vBB |
| LOAD(a1, a2) # a1<- vCC |
| .if 0 |
| beqz a1, common_errDivideByZero # is second operand zero? |
| .endif |
| # optional op |
| JAL(__subsf3) # v0 = result |
| STORE(v0, rOBJ) # vAA <- v0 |
| #else |
| LOAD_F(fa0, a1) # fa0<- vBB |
| LOAD_F(fa1, a2) # fa1<- vCC |
| .if 0 |
| # is second operand zero? |
| li.s ft0, 0 |
| c.eq.s fcc0, ft0, fa1 # condition bit and comparision with 0 |
| bc1t fcc0, common_errDivideByZero |
| .endif |
| # optional op |
| sub.s fv0, fa0, fa1 # fv0 = result |
| STORE_F(fv0, rOBJ) # vAA <- fv0 |
| #endif |
| RETURN |
| |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_MUL_FLOAT_VFP |
| dvmCompiler_TEMPLATE_MUL_FLOAT_VFP: |
| /* File: mips/TEMPLATE_MUL_FLOAT_VFP.S */ |
| /* File: mips/fbinop.S */ |
| /* |
| * Generic 32-bit binary float operation. a0 = a1 op a2. |
| * |
| * For: add-fp, sub-fp, mul-fp, div-fp |
| * |
| * On entry: |
| * a0 = target dalvik register address |
| * a1 = op1 address |
| * a2 = op2 address |
| * |
| * IMPORTANT: you may specify "chkzero" or "preinstr" but not both. |
| * |
| */ |
| move rOBJ, a0 # save a0 |
| #ifdef SOFT_FLOAT |
| LOAD(a0, a1) # a0<- vBB |
| LOAD(a1, a2) # a1<- vCC |
| .if 0 |
| beqz a1, common_errDivideByZero # is second operand zero? |
| .endif |
| # optional op |
| JAL(__mulsf3) # v0 = result |
| STORE(v0, rOBJ) # vAA <- v0 |
| #else |
| LOAD_F(fa0, a1) # fa0<- vBB |
| LOAD_F(fa1, a2) # fa1<- vCC |
| .if 0 |
| # is second operand zero? |
| li.s ft0, 0 |
| c.eq.s fcc0, ft0, fa1 # condition bit and comparision with 0 |
| bc1t fcc0, common_errDivideByZero |
| .endif |
| # optional op |
| mul.s fv0, fa0, fa1 # fv0 = result |
| STORE_F(fv0, rOBJ) # vAA <- fv0 |
| #endif |
| RETURN |
| |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_DIV_FLOAT_VFP |
| dvmCompiler_TEMPLATE_DIV_FLOAT_VFP: |
| /* File: mips/TEMPLATE_DIV_FLOAT_VFP.S */ |
| /* File: mips/fbinop.S */ |
| /* |
| * Generic 32-bit binary float operation. a0 = a1 op a2. |
| * |
| * For: add-fp, sub-fp, mul-fp, div-fp |
| * |
| * On entry: |
| * a0 = target dalvik register address |
| * a1 = op1 address |
| * a2 = op2 address |
| * |
| * IMPORTANT: you may specify "chkzero" or "preinstr" but not both. |
| * |
| */ |
| move rOBJ, a0 # save a0 |
| #ifdef SOFT_FLOAT |
| LOAD(a0, a1) # a0<- vBB |
| LOAD(a1, a2) # a1<- vCC |
| .if 0 |
| beqz a1, common_errDivideByZero # is second operand zero? |
| .endif |
| # optional op |
| JAL(__divsf3) # v0 = result |
| STORE(v0, rOBJ) # vAA <- v0 |
| #else |
| LOAD_F(fa0, a1) # fa0<- vBB |
| LOAD_F(fa1, a2) # fa1<- vCC |
| .if 0 |
| # is second operand zero? |
| li.s ft0, 0 |
| c.eq.s fcc0, ft0, fa1 # condition bit and comparision with 0 |
| bc1t fcc0, common_errDivideByZero |
| .endif |
| # optional op |
| div.s fv0, fa0, fa1 # fv0 = result |
| STORE_F(fv0, rOBJ) # vAA <- fv0 |
| #endif |
| RETURN |
| |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP |
| dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP: |
| /* File: mips/TEMPLATE_ADD_DOUBLE_VFP.S */ |
| /* File: mips/fbinopWide.S */ |
| /* |
| * Generic 64-bit binary operation. Provide an "instr" line that |
| * specifies an instruction that performs "result = a0-a1 op a2-a3". |
| * This could be an MIPS instruction or a function call. |
| * If "chkzero" is set to 1, we perform a divide-by-zero check on |
| * vCC (a1). Useful for integer division and modulus. |
| * |
| * for: add-long, sub-long, div-long, rem-long, and-long, or-long, |
| * xor-long, add-double, sub-double, mul-double, div-double, |
| * rem-double |
| * |
| * On entry: |
| * a0 = target dalvik register address |
| * a1 = op1 address |
| * a2 = op2 address |
| * |
| * IMPORTANT: you may specify "chkzero" or "preinstr" but not both. |
| */ |
| move rOBJ, a0 # save a0 |
| #ifdef SOFT_FLOAT |
| move t0, a1 # save a1 |
| move t1, a2 # save a2 |
| LOAD64(rARG0, rARG1, t0) # a0/a1<- vBB/vBB+1 |
| LOAD64(rARG2, rARG3, t1) # a2/a3<- vCC/vCC+1 |
| .if 0 |
| or t0, rARG2, rARG3 # second arg (a2-a3) is zero? |
| beqz t0, common_errDivideByZero |
| .endif |
| # optional op |
| JAL(__adddf3) # result<- op, a0-a3 changed |
| STORE64(rRESULT0, rRESULT1, rOBJ) |
| #else |
| LOAD64_F(fa0, fa0f, a1) |
| LOAD64_F(fa1, fa1f, a2) |
| .if 0 |
| li.d ft0, 0 |
| c.eq.d fcc0, fa1, ft0 |
| bc1t fcc0, common_errDivideByZero |
| .endif |
| # optional op |
| add.d fv0, fa0, fa1 |
| STORE64_F(fv0, fv0f, rOBJ) |
| #endif |
| RETURN |
| |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP |
| dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP: |
| /* File: mips/TEMPLATE_SUB_DOUBLE_VFP.S */ |
| /* File: mips/fbinopWide.S */ |
| /* |
| * Generic 64-bit binary operation. Provide an "instr" line that |
| * specifies an instruction that performs "result = a0-a1 op a2-a3". |
| * This could be an MIPS instruction or a function call. |
| * If "chkzero" is set to 1, we perform a divide-by-zero check on |
| * vCC (a1). Useful for integer division and modulus. |
| * |
| * for: add-long, sub-long, div-long, rem-long, and-long, or-long, |
| * xor-long, add-double, sub-double, mul-double, div-double, |
| * rem-double |
| * |
| * On entry: |
| * a0 = target dalvik register address |
| * a1 = op1 address |
| * a2 = op2 address |
| * |
| * IMPORTANT: you may specify "chkzero" or "preinstr" but not both. |
| */ |
| move rOBJ, a0 # save a0 |
| #ifdef SOFT_FLOAT |
| move t0, a1 # save a1 |
| move t1, a2 # save a2 |
| LOAD64(rARG0, rARG1, t0) # a0/a1<- vBB/vBB+1 |
| LOAD64(rARG2, rARG3, t1) # a2/a3<- vCC/vCC+1 |
| .if 0 |
| or t0, rARG2, rARG3 # second arg (a2-a3) is zero? |
| beqz t0, common_errDivideByZero |
| .endif |
| # optional op |
| JAL(__subdf3) # result<- op, a0-a3 changed |
| STORE64(rRESULT0, rRESULT1, rOBJ) |
| #else |
| LOAD64_F(fa0, fa0f, a1) |
| LOAD64_F(fa1, fa1f, a2) |
| .if 0 |
| li.d ft0, 0 |
| c.eq.d fcc0, fa1, ft0 |
| bc1t fcc0, common_errDivideByZero |
| .endif |
| # optional op |
| sub.d fv0, fa0, fa1 |
| STORE64_F(fv0, fv0f, rOBJ) |
| #endif |
| RETURN |
| |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP |
| dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP: |
| /* File: mips/TEMPLATE_MUL_DOUBLE_VFP.S */ |
| /* File: mips/fbinopWide.S */ |
| /* |
| * Generic 64-bit binary operation. Provide an "instr" line that |
| * specifies an instruction that performs "result = a0-a1 op a2-a3". |
| * This could be an MIPS instruction or a function call. |
| * If "chkzero" is set to 1, we perform a divide-by-zero check on |
| * vCC (a1). Useful for integer division and modulus. |
| * |
| * for: add-long, sub-long, div-long, rem-long, and-long, or-long, |
| * xor-long, add-double, sub-double, mul-double, div-double, |
| * rem-double |
| * |
| * On entry: |
| * a0 = target dalvik register address |
| * a1 = op1 address |
| * a2 = op2 address |
| * |
| * IMPORTANT: you may specify "chkzero" or "preinstr" but not both. |
| */ |
| move rOBJ, a0 # save a0 |
| #ifdef SOFT_FLOAT |
| move t0, a1 # save a1 |
| move t1, a2 # save a2 |
| LOAD64(rARG0, rARG1, t0) # a0/a1<- vBB/vBB+1 |
| LOAD64(rARG2, rARG3, t1) # a2/a3<- vCC/vCC+1 |
| .if 0 |
| or t0, rARG2, rARG3 # second arg (a2-a3) is zero? |
| beqz t0, common_errDivideByZero |
| .endif |
| # optional op |
| JAL(__muldf3) # result<- op, a0-a3 changed |
| STORE64(rRESULT0, rRESULT1, rOBJ) |
| #else |
| LOAD64_F(fa0, fa0f, a1) |
| LOAD64_F(fa1, fa1f, a2) |
| .if 0 |
| li.d ft0, 0 |
| c.eq.d fcc0, fa1, ft0 |
| bc1t fcc0, common_errDivideByZero |
| .endif |
| # optional op |
| mul.d fv0, fa0, fa1 |
| STORE64_F(fv0, fv0f, rOBJ) |
| #endif |
| RETURN |
| |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP |
| dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP: |
| /* File: mips/TEMPLATE_DIV_DOUBLE_VFP.S */ |
| /* File: mips/fbinopWide.S */ |
| /* |
| * Generic 64-bit binary operation. Provide an "instr" line that |
| * specifies an instruction that performs "result = a0-a1 op a2-a3". |
| * This could be an MIPS instruction or a function call. |
| * If "chkzero" is set to 1, we perform a divide-by-zero check on |
| * vCC (a1). Useful for integer division and modulus. |
| * |
| * for: add-long, sub-long, div-long, rem-long, and-long, or-long, |
| * xor-long, add-double, sub-double, mul-double, div-double, |
| * rem-double |
| * |
| * On entry: |
| * a0 = target dalvik register address |
| * a1 = op1 address |
| * a2 = op2 address |
| * |
| * IMPORTANT: you may specify "chkzero" or "preinstr" but not both. |
| */ |
| move rOBJ, a0 # save a0 |
| #ifdef SOFT_FLOAT |
| move t0, a1 # save a1 |
| move t1, a2 # save a2 |
| LOAD64(rARG0, rARG1, t0) # a0/a1<- vBB/vBB+1 |
| LOAD64(rARG2, rARG3, t1) # a2/a3<- vCC/vCC+1 |
| .if 0 |
| or t0, rARG2, rARG3 # second arg (a2-a3) is zero? |
| beqz t0, common_errDivideByZero |
| .endif |
| # optional op |
| JAL(__divdf3) # result<- op, a0-a3 changed |
| STORE64(rRESULT0, rRESULT1, rOBJ) |
| #else |
| LOAD64_F(fa0, fa0f, a1) |
| LOAD64_F(fa1, fa1f, a2) |
| .if 0 |
| li.d ft0, 0 |
| c.eq.d fcc0, fa1, ft0 |
| bc1t fcc0, common_errDivideByZero |
| .endif |
| # optional op |
| div.d fv0, fa0, fa1 |
| STORE64_F(fv0, fv0f, rOBJ) |
| #endif |
| RETURN |
| |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP |
| dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP: |
| /* File: mips/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S */ |
| /* File: mips/funopNarrower.S */ |
| /* |
| * Generic 64bit-to-32bit unary operation. Provide an "instr" line |
| * that specifies an instruction that performs "result = op a0/a1", where |
| * "result" is a 32-bit quantity in a0. |
| * |
| * For: long-to-float, double-to-int, double-to-float |
| * If hard floating point support is available, use fa0 as the parameter, except for |
| * long-to-float opcode. |
| * (This would work for long-to-int, but that instruction is actually |
| * an exact match for OP_MOVE.) |
| * |
| * On entry: |
| * a0 = target dalvik register address |
| * a1 = src dalvik register address |
| * |
| */ |
| move rINST, a0 # save a0 |
| #ifdef SOFT_FLOAT |
| move t0, a1 # save a1 |
| LOAD64(rARG0, rARG1, t0) # a0/a1<- vB/vB+1 |
| # optional op |
| JAL(__truncdfsf2) # v0<- op, a0-a3 changed |
| .LTEMPLATE_DOUBLE_TO_FLOAT_VFP_set_vreg: |
| STORE(v0, rINST) # vA<- v0 |
| #else |
| LOAD64_F(fa0, fa0f, a1) |
| # optional op |
| cvt.s.d fv0,fa0 # fv0 = result |
| .LTEMPLATE_DOUBLE_TO_FLOAT_VFP_set_vreg_f: |
| STORE_F(fv0, rINST) # vA<- fv0 |
| #endif |
| RETURN |
| |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP |
| dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP: |
| /* File: mips/TEMPLATE_DOUBLE_TO_INT_VFP.S */ |
| /* File: mips/funopNarrower.S */ |
| /* |
| * Generic 64bit-to-32bit unary operation. Provide an "instr" line |
| * that specifies an instruction that performs "result = op a0/a1", where |
| * "result" is a 32-bit quantity in a0. |
| * |
| * For: long-to-float, double-to-int, double-to-float |
| * If hard floating point support is available, use fa0 as the parameter, except for |
| * long-to-float opcode. |
| * (This would work for long-to-int, but that instruction is actually |
| * an exact match for OP_MOVE.) |
| * |
| * On entry: |
| * a0 = target dalvik register address |
| * a1 = src dalvik register address |
| * |
| */ |
| move rINST, a0 # save a0 |
| #ifdef SOFT_FLOAT |
| move t0, a1 # save a1 |
| LOAD64(rARG0, rARG1, t0) # a0/a1<- vB/vB+1 |
| # optional op |
| b d2i_doconv # v0<- op, a0-a3 changed |
| .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg: |
| STORE(v0, rINST) # vA<- v0 |
| #else |
| LOAD64_F(fa0, fa0f, a1) |
| # optional op |
| b d2i_doconv # fv0 = result |
| .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f: |
| STORE_F(fv0, rINST) # vA<- fv0 |
| #endif |
| RETURN |
| |
| |
| /* |
| * Convert the double in a0/a1 to an int in a0. |
| * |
| * We have to clip values to int min/max per the specification. The |
| * expected common case is a "reasonable" value that converts directly |
| * to modest integer. The EABI convert function isn't doing this for us. |
| * Use rBIX / rOBJ as global to hold arguments (they are not bound to a global var) |
| */ |
| |
| d2i_doconv: |
| #ifdef SOFT_FLOAT |
| la t0, .LDOUBLE_TO_INT_max |
| LOAD64(rARG2, rARG3, t0) |
| move rBIX, rARG0 # save a0 |
| move rOBJ, rARG1 # and a1 |
| JAL(__gedf2) # is arg >= maxint? |
| |
| move t0, v0 |
| li v0, ~0x80000000 # return maxint (7fffffff) |
| bgez t0, .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg # nonzero == yes |
| |
| move rARG0, rBIX # recover arg |
| move rARG1, rOBJ |
| la t0, .LDOUBLE_TO_INT_min |
| LOAD64(rARG2, rARG3, t0) |
| JAL(__ledf2) # is arg <= minint? |
| |
| move t0, v0 |
| li v0, 0x80000000 # return minint (80000000) |
| blez t0, .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg # nonzero == yes |
| |
| move rARG0, rBIX # recover arg |
| move rARG1, rOBJ |
| move rARG2, rBIX # compare against self |
| move rARG3, rOBJ |
| JAL(__nedf2) # is arg == self? |
| |
| move t0, v0 # zero == no |
| li v0, 0 |
| bnez t0, .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg # return zero for NaN |
| |
| move rARG0, rBIX # recover arg |
| move rARG1, rOBJ |
| JAL(__fixdfsi) # convert double to int |
| b .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg |
| #else |
| la t0, .LDOUBLE_TO_INT_max |
| LOAD64_F(fa1, fa1f, t0) |
| c.ole.d fcc0, fa1, fa0 |
| l.s fv0, .LDOUBLE_TO_INT_maxret |
| bc1t .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f |
| |
| la t0, .LDOUBLE_TO_INT_min |
| LOAD64_F(fa1, fa1f, t0) |
| c.ole.d fcc0, fa0, fa1 |
| l.s fv0, .LDOUBLE_TO_INT_minret |
| bc1t .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f |
| |
| mov.d fa1, fa0 |
| c.un.d fcc0, fa0, fa1 |
| li.s fv0, 0 |
| bc1t .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f |
| |
| trunc.w.d fv0, fa0 |
| b .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f |
| #endif |
| |
| |
| .LDOUBLE_TO_INT_max: |
| .dword 0x41dfffffffc00000 |
| .LDOUBLE_TO_INT_min: |
| .dword 0xc1e0000000000000 # minint, as a double (high word) |
| .LDOUBLE_TO_INT_maxret: |
| .word 0x7fffffff |
| .LDOUBLE_TO_INT_minret: |
| .word 0x80000000 |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP |
| dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP: |
| /* File: mips/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S */ |
| /* File: mips/funopWider.S */ |
| /* |
| * Generic 32bit-to-64bit floating point unary operation. Provide an |
| * "instr" line that specifies an instruction that performs "d0 = op s0". |
| * |
| * For: int-to-double, float-to-double |
| * |
| * On entry: |
| * a0 = target dalvik register address |
| * a1 = src dalvik register address |
| */ |
| /* unop vA, vB */ |
| move rOBJ, a0 # save a0 |
| #ifdef SOFT_FLOAT |
| LOAD(a0, a1) # a0<- vB |
| # optional op |
| JAL(__extendsfdf2) # result<- op, a0-a3 changed |
| |
| .LTEMPLATE_FLOAT_TO_DOUBLE_VFP_set_vreg: |
| STORE64(rRESULT0, rRESULT1, rOBJ) # vA/vA+1<- v0/v1 |
| #else |
| LOAD_F(fa0, a1) # fa0<- vB |
| # optional op |
| cvt.d.s fv0, fa0 |
| |
| .LTEMPLATE_FLOAT_TO_DOUBLE_VFP_set_vreg: |
| STORE64_F(fv0, fv0f, rOBJ) # vA/vA+1<- fv0/fv0f |
| #endif |
| RETURN |
| |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP |
| dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP: |
| /* File: mips/TEMPLATE_FLOAT_TO_INT_VFP.S */ |
| /* File: mips/funop.S */ |
| /* |
| * Generic 32-bit unary operation. Provide an "instr" line that |
| * specifies an instruction that performs "result = op a0". |
| * This could be a MIPS instruction or a function call. |
| * |
| * for: int-to-float, float-to-int |
| * |
| * On entry: |
| * a0 = target dalvik register address |
| * a1 = src dalvik register address |
| * |
| * IMPORTANT: you may specify "chkzero" or "preinstr" but not both. |
| * |
| */ |
| move rOBJ, a0 # save a0 |
| #ifdef SOFT_FLOAT |
| LOAD(a0, a1) # a0<- vBB |
| # optional op |
| b f2i_doconv # v0<- op, a0-a3 changed |
| .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg: |
| STORE(v0, rOBJ) # vAA<- v0 |
| #else |
| LOAD_F(fa0, a1) # fa0<- vBB |
| # optional op |
| b f2i_doconv # fv0 = result |
| .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f: |
| STORE_F(fv0, rOBJ) # vAA <- fv0 |
| #endif |
| RETURN |
| |
| |
| /* |
| * Not an entry point as it is used only once !! |
| */ |
| f2i_doconv: |
| #ifdef SOFT_FLOAT |
| li a1, 0x4f000000 # (float)maxint |
| move rBIX, a0 |
| JAL(__gesf2) # is arg >= maxint? |
| move t0, v0 |
| li v0, ~0x80000000 # return maxint (7fffffff) |
| bgez t0, .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg |
| |
| move a0, rBIX # recover arg |
| li a1, 0xcf000000 # (float)minint |
| JAL(__lesf2) |
| |
| move t0, v0 |
| li v0, 0x80000000 # return minint (80000000) |
| blez t0, .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg |
| move a0, rBIX |
| move a1, rBIX |
| JAL(__nesf2) |
| |
| move t0, v0 |
| li v0, 0 # return zero for NaN |
| bnez t0, .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg |
| |
| move a0, rBIX |
| JAL(__fixsfsi) |
| b .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg |
| #else |
| l.s fa1, .LFLOAT_TO_INT_max |
| c.ole.s fcc0, fa1, fa0 |
| l.s fv0, .LFLOAT_TO_INT_ret_max |
| bc1t .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f |
| |
| l.s fa1, .LFLOAT_TO_INT_min |
| c.ole.s fcc0, fa0, fa1 |
| l.s fv0, .LFLOAT_TO_INT_ret_min |
| bc1t .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f |
| |
| mov.s fa1, fa0 |
| c.un.s fcc0, fa0, fa1 |
| li.s fv0, 0 |
| bc1t .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f |
| |
| trunc.w.s fv0, fa0 |
| b .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f |
| #endif |
| |
| .LFLOAT_TO_INT_max: |
| .word 0x4f000000 |
| .LFLOAT_TO_INT_min: |
| .word 0xcf000000 |
| .LFLOAT_TO_INT_ret_max: |
| .word 0x7fffffff |
| .LFLOAT_TO_INT_ret_min: |
| .word 0x80000000 |
| |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP |
| dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP: |
| /* File: mips/TEMPLATE_INT_TO_DOUBLE_VFP.S */ |
| /* File: mips/funopWider.S */ |
| /* |
| * Generic 32bit-to-64bit floating point unary operation. Provide an |
| * "instr" line that specifies an instruction that performs "d0 = op s0". |
| * |
| * For: int-to-double, float-to-double |
| * |
| * On entry: |
| * a0 = target dalvik register address |
| * a1 = src dalvik register address |
| */ |
| /* unop vA, vB */ |
| move rOBJ, a0 # save a0 |
| #ifdef SOFT_FLOAT |
| LOAD(a0, a1) # a0<- vB |
| # optional op |
| JAL(__floatsidf) # result<- op, a0-a3 changed |
| |
| .LTEMPLATE_INT_TO_DOUBLE_VFP_set_vreg: |
| STORE64(rRESULT0, rRESULT1, rOBJ) # vA/vA+1<- v0/v1 |
| #else |
| LOAD_F(fa0, a1) # fa0<- vB |
| # optional op |
| cvt.d.w fv0, fa0 |
| |
| .LTEMPLATE_INT_TO_DOUBLE_VFP_set_vreg: |
| STORE64_F(fv0, fv0f, rOBJ) # vA/vA+1<- fv0/fv0f |
| #endif |
| RETURN |
| |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP |
| dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP: |
| /* File: mips/TEMPLATE_INT_TO_FLOAT_VFP.S */ |
| /* File: mips/funop.S */ |
| /* |
| * Generic 32-bit unary operation. Provide an "instr" line that |
| * specifies an instruction that performs "result = op a0". |
| * This could be a MIPS instruction or a function call. |
| * |
| * for: int-to-float, float-to-int |
| * |
| * On entry: |
| * a0 = target dalvik register address |
| * a1 = src dalvik register address |
| * |
| * IMPORTANT: you may specify "chkzero" or "preinstr" but not both. |
| * |
| */ |
| move rOBJ, a0 # save a0 |
| #ifdef SOFT_FLOAT |
| LOAD(a0, a1) # a0<- vBB |
| # optional op |
| JAL(__floatsisf) # v0<- op, a0-a3 changed |
| .LTEMPLATE_INT_TO_FLOAT_VFP_set_vreg: |
| STORE(v0, rOBJ) # vAA<- v0 |
| #else |
| LOAD_F(fa0, a1) # fa0<- vBB |
| # optional op |
| cvt.s.w fv0, fa0 # fv0 = result |
| .LTEMPLATE_INT_TO_FLOAT_VFP_set_vreg_f: |
| STORE_F(fv0, rOBJ) # vAA <- fv0 |
| #endif |
| RETURN |
| |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP |
| dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP: |
| /* File: mips/TEMPLATE_CMPG_DOUBLE_VFP.S */ |
| /* File: mips/TEMPLATE_CMPL_DOUBLE_VFP.S */ |
| /* |
| * Compare two double precision floating-point values. Puts 0, 1, or -1 into the |
| * destination register based on the results of the comparison. |
| * |
| * Provide a "naninst" instruction that puts 1 or -1 into a1 depending |
| * on what value we'd like to return when one of the operands is NaN. |
| * |
| * The operation we're implementing is: |
| * if (x == y) |
| * return 0; |
| * else if (x < y) |
| * return -1; |
| * else if (x > y) |
| * return 1; |
| * else |
| * return {-1,1}; // one or both operands was NaN |
| * |
| * On entry: |
| * a0 = &op1 [vBB] |
| * a1 = &op2 [vCC] |
| * |
| * for: cmpl-double, cmpg-double |
| */ |
| /* op vAA, vBB, vCC */ |
| |
| /* "clasic" form */ |
| #ifdef SOFT_FLOAT |
| move rOBJ, a0 # save a0 |
| move rBIX, a1 # save a1 |
| LOAD64(rARG0, rARG1, rOBJ) # a0/a1<- vBB/vBB+1 |
| LOAD64(rARG2, rARG3, rBIX) # a2/a3<- vCC/vCC+1 |
| JAL(__eqdf2) # v0<- (vBB == vCC) |
| li rTEMP, 0 # vAA<- 0 |
| beqz v0, TEMPLATE_CMPG_DOUBLE_VFP_finish |
| LOAD64(rARG0, rARG1, rOBJ) # a0/a1<- vBB/vBB+1 |
| LOAD64(rARG2, rARG3, rBIX) # a2/a3<- vCC/vCC+1 |
| JAL(__ltdf2) # a0<- (vBB < vCC) |
| li rTEMP, -1 # vAA<- -1 |
| bltz v0, TEMPLATE_CMPG_DOUBLE_VFP_finish |
| LOAD64(rARG0, rARG1, rOBJ) # a0/a1<- vBB/vBB+1 |
| LOAD64(rARG2, rARG3, rBIX) # a2/a3<- vCC/vCC+1 |
| JAL(__gtdf2) # v0<- (vBB > vCC) |
| li rTEMP, 1 # vAA<- 1 |
| bgtz v0, TEMPLATE_CMPG_DOUBLE_VFP_finish |
| #else |
| LOAD64_F(fs0, fs0f, a0) # fs0<- vBB |
| LOAD64_F(fs1, fs1f, a1) # fs1<- vCC |
| c.olt.d fcc0, fs0, fs1 # Is fs0 < fs1 |
| li rTEMP, -1 |
| bc1t fcc0, TEMPLATE_CMPG_DOUBLE_VFP_finish |
| c.olt.d fcc0, fs1, fs0 |
| li rTEMP, 1 |
| bc1t fcc0, TEMPLATE_CMPG_DOUBLE_VFP_finish |
| c.eq.d fcc0, fs0, fs1 |
| li rTEMP, 0 |
| bc1t fcc0, TEMPLATE_CMPG_DOUBLE_VFP_finish |
| #endif |
| |
| li rTEMP, 1 |
| |
| TEMPLATE_CMPG_DOUBLE_VFP_finish: |
| move v0, rTEMP # v0<- vAA |
| RETURN |
| |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP |
| dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP: |
| /* File: mips/TEMPLATE_CMPL_DOUBLE_VFP.S */ |
| /* |
| * Compare two double precision floating-point values. Puts 0, 1, or -1 into the |
| * destination register based on the results of the comparison. |
| * |
| * Provide a "naninst" instruction that puts 1 or -1 into a1 depending |
| * on what value we'd like to return when one of the operands is NaN. |
| * |
| * The operation we're implementing is: |
| * if (x == y) |
| * return 0; |
| * else if (x < y) |
| * return -1; |
| * else if (x > y) |
| * return 1; |
| * else |
| * return {-1,1}; // one or both operands was NaN |
| * |
| * On entry: |
| * a0 = &op1 [vBB] |
| * a1 = &op2 [vCC] |
| * |
| * for: cmpl-double, cmpg-double |
| */ |
| /* op vAA, vBB, vCC */ |
| |
| /* "clasic" form */ |
| #ifdef SOFT_FLOAT |
| move rOBJ, a0 # save a0 |
| move rBIX, a1 # save a1 |
| LOAD64(rARG0, rARG1, rOBJ) # a0/a1<- vBB/vBB+1 |
| LOAD64(rARG2, rARG3, rBIX) # a2/a3<- vCC/vCC+1 |
| JAL(__eqdf2) # v0<- (vBB == vCC) |
| li rTEMP, 0 # vAA<- 0 |
| beqz v0, TEMPLATE_CMPL_DOUBLE_VFP_finish |
| LOAD64(rARG0, rARG1, rOBJ) # a0/a1<- vBB/vBB+1 |
| LOAD64(rARG2, rARG3, rBIX) # a2/a3<- vCC/vCC+1 |
| JAL(__ltdf2) # a0<- (vBB < vCC) |
| li rTEMP, -1 # vAA<- -1 |
| bltz v0, TEMPLATE_CMPL_DOUBLE_VFP_finish |
| LOAD64(rARG0, rARG1, rOBJ) # a0/a1<- vBB/vBB+1 |
| LOAD64(rARG2, rARG3, rBIX) # a2/a3<- vCC/vCC+1 |
| JAL(__gtdf2) # v0<- (vBB > vCC) |
| li rTEMP, 1 # vAA<- 1 |
| bgtz v0, TEMPLATE_CMPL_DOUBLE_VFP_finish |
| #else |
| LOAD64_F(fs0, fs0f, a0) # fs0<- vBB |
| LOAD64_F(fs1, fs1f, a1) # fs1<- vCC |
| c.olt.d fcc0, fs0, fs1 # Is fs0 < fs1 |
| li rTEMP, -1 |
| bc1t fcc0, TEMPLATE_CMPL_DOUBLE_VFP_finish |
| c.olt.d fcc0, fs1, fs0 |
| li rTEMP, 1 |
| bc1t fcc0, TEMPLATE_CMPL_DOUBLE_VFP_finish |
| c.eq.d fcc0, fs0, fs1 |
| li rTEMP, 0 |
| bc1t fcc0, TEMPLATE_CMPL_DOUBLE_VFP_finish |
| #endif |
| |
| li rTEMP, -1 |
| |
| TEMPLATE_CMPL_DOUBLE_VFP_finish: |
| move v0, rTEMP # v0<- vAA |
| RETURN |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP |
| dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP: |
| /* File: mips/TEMPLATE_CMPG_FLOAT_VFP.S */ |
| /* File: mips/TEMPLATE_CMPL_FLOAT_VFP.S */ |
| /* |
| * Compare two floating-point values. Puts 0, 1, or -1 into the |
| * destination register based on the results of the comparison. |
| * |
| * Provide a "naninst" instruction that puts 1 or -1 into a1 depending |
| * on what value we'd like to return when one of the operands is NaN. |
| * |
| * The operation we're implementing is: |
| * if (x == y) |
| * return 0; |
| * else if (x < y) |
| * return -1; |
| * else if (x > y) |
| * return 1; |
| * else |
| * return {-1,1}; // one or both operands was NaN |
| * |
| * On entry: |
| * a0 = &op1 [vBB] |
| * a1 = &op2 [vCC] |
| * |
| * for: cmpl-float, cmpg-float |
| */ |
| /* op vAA, vBB, vCC */ |
| |
| /* "clasic" form */ |
| #ifdef SOFT_FLOAT |
| LOAD(rOBJ, a0) # rOBJ<- vBB |
| LOAD(rBIX, a1) # rBIX<- vCC |
| move a0, rOBJ # a0<- vBB |
| move a1, rBIX # a1<- vCC |
| JAL(__eqsf2) # v0<- (vBB == vCC) |
| li rTEMP, 0 # vAA<- 0 |
| beqz v0, TEMPLATE_CMPG_FLOAT_VFP_finish |
| move a0, rOBJ # a0<- vBB |
| move a1, rBIX # a1<- vCC |
| JAL(__ltsf2) # a0<- (vBB < vCC) |
| li rTEMP, -1 # vAA<- -1 |
| bltz v0, TEMPLATE_CMPG_FLOAT_VFP_finish |
| move a0, rOBJ # a0<- vBB |
| move a1, rBIX # a1<- vCC |
| JAL(__gtsf2) # v0<- (vBB > vCC) |
| li rTEMP, 1 # vAA<- 1 |
| bgtz v0, TEMPLATE_CMPG_FLOAT_VFP_finish |
| #else |
| LOAD_F(fs0, a0) # fs0<- vBB |
| LOAD_F(fs1, a1) # fs1<- vCC |
| c.olt.s fcc0, fs0, fs1 #Is fs0 < fs1 |
| li rTEMP, -1 |
| bc1t fcc0, TEMPLATE_CMPG_FLOAT_VFP_finish |
| c.olt.s fcc0, fs1, fs0 |
| li rTEMP, 1 |
| bc1t fcc0, TEMPLATE_CMPG_FLOAT_VFP_finish |
| c.eq.s fcc0, fs0, fs1 |
| li rTEMP, 0 |
| bc1t fcc0, TEMPLATE_CMPG_FLOAT_VFP_finish |
| #endif |
| |
| li rTEMP, 1 |
| |
| TEMPLATE_CMPG_FLOAT_VFP_finish: |
| move v0, rTEMP # v0<- vAA |
| RETURN |
| |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP |
| dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP: |
| /* File: mips/TEMPLATE_CMPL_FLOAT_VFP.S */ |
| /* |
| * Compare two floating-point values. Puts 0, 1, or -1 into the |
| * destination register based on the results of the comparison. |
| * |
| * Provide a "naninst" instruction that puts 1 or -1 into a1 depending |
| * on what value we'd like to return when one of the operands is NaN. |
| * |
| * The operation we're implementing is: |
| * if (x == y) |
| * return 0; |
| * else if (x < y) |
| * return -1; |
| * else if (x > y) |
| * return 1; |
| * else |
| * return {-1,1}; // one or both operands was NaN |
| * |
| * On entry: |
| * a0 = &op1 [vBB] |
| * a1 = &op2 [vCC] |
| * |
| * for: cmpl-float, cmpg-float |
| */ |
| /* op vAA, vBB, vCC */ |
| |
| /* "clasic" form */ |
| #ifdef SOFT_FLOAT |
| LOAD(rOBJ, a0) # rOBJ<- vBB |
| LOAD(rBIX, a1) # rBIX<- vCC |
| move a0, rOBJ # a0<- vBB |
| move a1, rBIX # a1<- vCC |
| JAL(__eqsf2) # v0<- (vBB == vCC) |
| li rTEMP, 0 # vAA<- 0 |
| beqz v0, TEMPLATE_CMPL_FLOAT_VFP_finish |
| move a0, rOBJ # a0<- vBB |
| move a1, rBIX # a1<- vCC |
| JAL(__ltsf2) # a0<- (vBB < vCC) |
| li rTEMP, -1 # vAA<- -1 |
| bltz v0, TEMPLATE_CMPL_FLOAT_VFP_finish |
| move a0, rOBJ # a0<- vBB |
| move a1, rBIX # a1<- vCC |
| JAL(__gtsf2) # v0<- (vBB > vCC) |
| li rTEMP, 1 # vAA<- 1 |
| bgtz v0, TEMPLATE_CMPL_FLOAT_VFP_finish |
| #else |
| LOAD_F(fs0, a0) # fs0<- vBB |
| LOAD_F(fs1, a1) # fs1<- vCC |
| c.olt.s fcc0, fs0, fs1 #Is fs0 < fs1 |
| li rTEMP, -1 |
| bc1t fcc0, TEMPLATE_CMPL_FLOAT_VFP_finish |
| c.olt.s fcc0, fs1, fs0 |
| li rTEMP, 1 |
| bc1t fcc0, TEMPLATE_CMPL_FLOAT_VFP_finish |
| c.eq.s fcc0, fs0, fs1 |
| li rTEMP, 0 |
| bc1t fcc0, TEMPLATE_CMPL_FLOAT_VFP_finish |
| #endif |
| |
| li rTEMP, -1 |
| |
| TEMPLATE_CMPL_FLOAT_VFP_finish: |
| move v0, rTEMP # v0<- vAA |
| RETURN |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP |
| dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP: |
| /* File: mips/TEMPLATE_SQRT_DOUBLE_VFP.S */ |
| |
| /* |
| * 64-bit floating point sqrt operation. |
| * If the result is a NaN, bail out to library code to do |
| * the right thing. |
| * |
| * On entry: |
| * a2 src addr of op1 |
| * On exit: |
| * v0,v1/fv0 = res |
| */ |
| #ifdef SOFT_FLOAT |
| LOAD64(rARG0, rARG1, a2) # a0/a1<- vBB/vBB+1 |
| #else |
| LOAD64_F(fa0, fa0f, a2) # fa0/fa0f<- vBB/vBB+1 |
| sqrt.d fv0, fa0 |
| c.eq.d fv0, fv0 |
| bc1t 1f |
| #endif |
| JAL(sqrt) |
| 1: |
| RETURN |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON |
| dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON: |
| /* File: mips/TEMPLATE_THROW_EXCEPTION_COMMON.S */ |
| /* |
| * Throw an exception from JIT'ed code. |
| * On entry: |
| * a0 Dalvik PC that raises the exception |
| */ |
| j .LhandleException |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_MEM_OP_DECODE |
| dvmCompiler_TEMPLATE_MEM_OP_DECODE: |
| /* File: mips/TEMPLATE_MEM_OP_DECODE.S */ |
| #if defined(WITH_SELF_VERIFICATION) |
| /* |
| * This handler encapsulates heap memory ops for selfVerification mode. |
| * |
| * The call to the handler is inserted prior to a heap memory operation. |
| * This handler then calls a function to decode the memory op, and process |
| * it accordingly. Afterwards, the handler changes the return address to |
| * skip the memory op so it never gets executed. |
| */ |
| #ifdef HARD_FLOAT |
| /* push f0-f31 onto stack */ |
| sw f0, fr0*-4(sp) # push f0 |
| sw f1, fr1*-4(sp) # push f1 |
| sw f2, fr2*-4(sp) # push f2 |
| sw f3, fr3*-4(sp) # push f3 |
| sw f4, fr4*-4(sp) # push f4 |
| sw f5, fr5*-4(sp) # push f5 |
| sw f6, fr6*-4(sp) # push f6 |
| sw f7, fr7*-4(sp) # push f7 |
| sw f8, fr8*-4(sp) # push f8 |
| sw f9, fr9*-4(sp) # push f9 |
| sw f10, fr10*-4(sp) # push f10 |
| sw f11, fr11*-4(sp) # push f11 |
| sw f12, fr12*-4(sp) # push f12 |
| sw f13, fr13*-4(sp) # push f13 |
| sw f14, fr14*-4(sp) # push f14 |
| sw f15, fr15*-4(sp) # push f15 |
| sw f16, fr16*-4(sp) # push f16 |
| sw f17, fr17*-4(sp) # push f17 |
| sw f18, fr18*-4(sp) # push f18 |
| sw f19, fr19*-4(sp) # push f19 |
| sw f20, fr20*-4(sp) # push f20 |
| sw f21, fr21*-4(sp) # push f21 |
| sw f22, fr22*-4(sp) # push f22 |
| sw f23, fr23*-4(sp) # push f23 |
| sw f24, fr24*-4(sp) # push f24 |
| sw f25, fr25*-4(sp) # push f25 |
| sw f26, fr26*-4(sp) # push f26 |
| sw f27, fr27*-4(sp) # push f27 |
| sw f28, fr28*-4(sp) # push f28 |
| sw f29, fr29*-4(sp) # push f29 |
| sw f30, fr30*-4(sp) # push f30 |
| sw f31, fr31*-4(sp) # push f31 |
| |
| sub sp, (32-0)*4 # adjust stack pointer |
| #endif |
| |
| /* push gp registers (except zero, gp, sp, and fp) */ |
| .set noat |
| sw AT, r_AT*-4(sp) # push at |
| .set at |
| sw v0, r_V0*-4(sp) # push v0 |
| sw v1, r_V1*-4(sp) # push v1 |
| sw a0, r_A0*-4(sp) # push a0 |
| sw a1, r_A1*-4(sp) # push a1 |
| sw a2, r_A2*-4(sp) # push a2 |
| sw a3, r_A3*-4(sp) # push a3 |
| sw t0, r_T0*-4(sp) # push t0 |
| sw t1, r_T1*-4(sp) # push t1 |
| sw t2, r_T2*-4(sp) # push t2 |
| sw t3, r_T3*-4(sp) # push t3 |
| sw t4, r_T4*-4(sp) # push t4 |
| sw t5, r_T5*-4(sp) # push t5 |
| sw t6, r_T6*-4(sp) # push t6 |
| sw t7, r_T7*-4(sp) # push t7 |
| sw s0, r_S0*-4(sp) # push s0 |
| sw s1, r_S1*-4(sp) # push s1 |
| sw s2, r_S2*-4(sp) # push s2 |
| sw s3, r_S3*-4(sp) # push s3 |
| sw s4, r_S4*-4(sp) # push s4 |
| sw s5, r_S5*-4(sp) # push s5 |
| sw s6, r_S6*-4(sp) # push s6 |
| sw s7, r_S7*-4(sp) # push s7 |
| sw t8, r_T8*-4(sp) # push t8 |
| sw t9, r_T9*-4(sp) # push t9 |
| sw k0, r_K0*-4(sp) # push k0 |
| sw k1, r_K1*-4(sp) # push k1 |
| sw ra, r_RA*-4(sp) # push RA |
| |
| # Note: even if we don't save all 32 registers, we still need to |
| # adjust SP by 32 registers due to the way we are storing |
| # the registers on the stack. |
| sub sp, (32-0)*4 # adjust stack pointer |
| |
| la a2, .LdvmSelfVerificationMemOpDecode # defined in footer.S |
| lw a2, (a2) |
| move a0, ra # a0<- link register |
| move a1, sp # a1<- stack pointer |
| JALR(a2) |
| |
| /* pop gp registers (except zero, gp, sp, and fp) */ |
| # Note: even if we don't save all 32 registers, we still need to |
| # adjust SP by 32 registers due to the way we are storing |
| # the registers on the stack. |
| add sp, (32-0)*4 # adjust stack pointer |
| .set noat |
| lw AT, r_AT*-4(sp) # pop at |
| .set at |
| lw v0, r_V0*-4(sp) # pop v0 |
| lw v1, r_V1*-4(sp) # pop v1 |
| lw a0, r_A0*-4(sp) # pop a0 |
| lw a1, r_A1*-4(sp) # pop a1 |
| lw a2, r_A2*-4(sp) # pop a2 |
| lw a3, r_A3*-4(sp) # pop a3 |
| lw t0, r_T0*-4(sp) # pop t0 |
| lw t1, r_T1*-4(sp) # pop t1 |
| lw t2, r_T2*-4(sp) # pop t2 |
| lw t3, r_T3*-4(sp) # pop t3 |
| lw t4, r_T4*-4(sp) # pop t4 |
| lw t5, r_T5*-4(sp) # pop t5 |
| lw t6, r_T6*-4(sp) # pop t6 |
| lw t7, r_T7*-4(sp) # pop t7 |
| lw s0, r_S0*-4(sp) # pop s0 |
| lw s1, r_S1*-4(sp) # pop s1 |
| lw s2, r_S2*-4(sp) # pop s2 |
| lw s3, r_S3*-4(sp) # pop s3 |
| lw s4, r_S4*-4(sp) # pop s4 |
| lw s5, r_S5*-4(sp) # pop s5 |
| lw s6, r_S6*-4(sp) # pop s6 |
| lw s7, r_S7*-4(sp) # pop s7 |
| lw t8, r_T8*-4(sp) # pop t8 |
| lw t9, r_T9*-4(sp) # pop t9 |
| lw k0, r_K0*-4(sp) # pop k0 |
| lw k1, r_K1*-4(sp) # pop k1 |
| lw ra, r_RA*-4(sp) # pop RA |
| |
| #ifdef HARD_FLOAT |
| /* pop f0-f31 from stack */ |
| add sp, (32-0)*4 # adjust stack pointer |
| lw f0, fr0*-4(sp) # pop f0 |
| lw f1, fr1*-4(sp) # pop f1 |
| lw f2, fr2*-4(sp) # pop f2 |
| lw f3, fr3*-4(sp) # pop f3 |
| lw f4, fr4*-4(sp) # pop f4 |
| lw f5, fr5*-4(sp) # pop f5 |
| lw f6, fr6*-4(sp) # pop f6 |
| lw f7, fr7*-4(sp) # pop f7 |
| lw f8, fr8*-4(sp) # pop f8 |
| lw f9, fr9*-4(sp) # pop f9 |
| lw f10, fr10*-4(sp) # pop f10 |
| lw f11, fr11*-4(sp) # pop f11 |
| lw f12, fr12*-4(sp) # pop f12 |
| lw f13, fr13*-4(sp) # pop f13 |
| lw f14, fr14*-4(sp) # pop f14 |
| lw f15, fr15*-4(sp) # pop f15 |
| lw f16, fr16*-4(sp) # pop f16 |
| lw f17, fr17*-4(sp) # pop f17 |
| lw f18, fr18*-4(sp) # pop f18 |
| lw f19, fr19*-4(sp) # pop f19 |
| lw f20, fr20*-4(sp) # pop f20 |
| lw f21, fr21*-4(sp) # pop f21 |
| lw f22, fr22*-4(sp) # pop f22 |
| lw f23, fr23*-4(sp) # pop f23 |
| lw f24, fr24*-4(sp) # pop f24 |
| lw f25, fr25*-4(sp) # pop f25 |
| lw f26, fr26*-4(sp) # pop f26 |
| lw f27, fr27*-4(sp) # pop f27 |
| lw f28, fr28*-4(sp) # pop f28 |
| lw f29, fr29*-4(sp) # pop f29 |
| lw f30, fr30*-4(sp) # pop f30 |
| lw f31, fr31*-4(sp) # pop f31 |
| #endif |
| |
| RETURN |
| #endif |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_STRING_COMPARETO |
| dvmCompiler_TEMPLATE_STRING_COMPARETO: |
| /* File: mips/TEMPLATE_STRING_COMPARETO.S */ |
| /* |
| * String's compareTo. |
| * |
| * Requires a0/a1 to have been previously checked for null. Will |
| * return negative if this's string is < comp, 0 if they are the |
| * same and positive if >. |
| * |
| * IMPORTANT NOTE: |
| * |
| * This code relies on hard-coded offsets for string objects, and must be |
| * kept in sync with definitions in UtfString.h. See asm-constants.h |
| * |
| * On entry: |
| * a0: this object pointer |
| * a1: comp object pointer |
| * |
| */ |
| |
| subu v0, a0, a1 # Same? |
| bnez v0, 1f |
| RETURN |
| 1: |
| lw t0, STRING_FIELDOFF_OFFSET(a0) |
| lw t1, STRING_FIELDOFF_OFFSET(a1) |
| lw t2, STRING_FIELDOFF_COUNT(a0) |
| lw a2, STRING_FIELDOFF_COUNT(a1) |
| lw a0, STRING_FIELDOFF_VALUE(a0) |
| lw a1, STRING_FIELDOFF_VALUE(a1) |
| |
| /* |
| * At this point, we have this/comp: |
| * offset: t0/t1 |
| * count: t2/a2 |
| * value: a0/a1 |
| * We're going to compute |
| * a3 <- countDiff |
| * a2 <- minCount |
| */ |
| subu a3, t2, a2 # a3<- countDiff |
| sleu t7, t2, a2 |
| movn a2, t2, t7 # a2<- minCount |
| |
| /* |
| * Note: data pointers point to first element. |
| */ |
| addu a0, 16 # point to contents[0] |
| addu a1, 16 # point to contents[0] |
| |
| /* Now, build pointers to the string data */ |
| sll t7, t0, 1 # multiply offset by 2 |
| addu a0, a0, t7 |
| sll t7, t1, 1 # multiply offset by 2 |
| addu a1, a1, t7 |
| |
| /* |
| * At this point we have: |
| * a0: *this string data |
| * a1: *comp string data |
| * a2: iteration count for comparison |
| * a3: value to return if the first part of the string is equal |
| * v0: reserved for result |
| * t0-t5 available for loading string data |
| */ |
| |
| subu a2, 2 |
| bltz a2, do_remainder2 |
| |
| /* |
| * Unroll the first two checks so we can quickly catch early mismatch |
| * on long strings (but preserve incoming alignment) |
| */ |
| lhu t0, 0(a0) |
| lhu t1, 0(a1) |
| subu v0, t0, t1 |
| beqz v0, 1f |
| RETURN |
| 1: |
| lhu t2, 2(a0) |
| lhu t3, 2(a1) |
| subu v0, t2, t3 |
| beqz v0, 2f |
| RETURN |
| 2: |
| addu a0, 4 # offset to contents[2] |
| addu a1, 4 # offset to contents[2] |
| li t7, 28 |
| bgt a2, t7, do_memcmp16 |
| subu a2, 3 |
| bltz a2, do_remainder |
| |
| loopback_triple: |
| lhu t0, 0(a0) |
| lhu t1, 0(a1) |
| subu v0, t0, t1 |
| beqz v0, 1f |
| RETURN |
| 1: |
| lhu t2, 2(a0) |
| lhu t3, 2(a1) |
| subu v0, t2, t3 |
| beqz v0, 2f |
| RETURN |
| 2: |
| lhu t4, 4(a0) |
| lhu t5, 4(a1) |
| subu v0, t4, t5 |
| beqz v0, 3f |
| RETURN |
| 3: |
| addu a0, 6 # offset to contents[i+3] |
| addu a1, 6 # offset to contents[i+3] |
| subu a2, 3 |
| bgez a2, loopback_triple |
| |
| do_remainder: |
| addu a2, 3 |
| beqz a2, returnDiff |
| |
| loopback_single: |
| lhu t0, 0(a0) |
| lhu t1, 0(a1) |
| subu v0, t0, t1 |
| bnez v0, 1f |
| addu a0, 2 # offset to contents[i+1] |
| addu a1, 2 # offset to contents[i+1] |
| subu a2, 1 |
| bnez a2, loopback_single |
| |
| returnDiff: |
| move v0, a3 |
| 1: |
| RETURN |
| |
| do_remainder2: |
| addu a2, 2 |
| bnez a2, loopback_single |
| move v0, a3 |
| RETURN |
| |
| /* Long string case */ |
| do_memcmp16: |
| move rOBJ, a3 # save return value if strings are equal |
| JAL(__memcmp16) |
| seq t0, v0, zero |
| movn v0, rOBJ, t0 # overwrite return value if strings are equal |
| RETURN |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_STRING_INDEXOF |
| dvmCompiler_TEMPLATE_STRING_INDEXOF: |
| /* File: mips/TEMPLATE_STRING_INDEXOF.S */ |
| /* |
| * String's indexOf. |
| * |
| * Requires a0 to have been previously checked for null. Will |
| * return index of match of a1 in v0. |
| * |
| * IMPORTANT NOTE: |
| * |
| * This code relies on hard-coded offsets for string objects, and must be |
| * kept in sync wth definitions in UtfString.h See asm-constants.h |
| * |
| * On entry: |
| * a0: string object pointer |
| * a1: char to match |
| * a2: Starting offset in string data |
| */ |
| |
| lw t0, STRING_FIELDOFF_OFFSET(a0) |
| lw t1, STRING_FIELDOFF_COUNT(a0) |
| lw v0, STRING_FIELDOFF_VALUE(a0) |
| |
| /* |
| * At this point, we have: |
| * v0: object pointer |
| * a1: char to match |
| * a2: starting offset |
| * t0: offset |
| * t1: string length |
| */ |
| |
| /* Point to first element */ |
| addu v0, 16 # point to contents[0] |
| |
| /* Build pointer to start of string data */ |
| sll t7, t0, 1 # multiply offset by 2 |
| addu v0, v0, t7 |
| |
| /* Save a copy of starting data in v1 */ |
| move v1, v0 |
| |
| /* Clamp start to [0..count] */ |
| slt t7, a2, zero |
| movn a2, zero, t7 |
| sgt t7, a2, t1 |
| movn a2, t1, t7 |
| |
| /* Build pointer to start of data to compare */ |
| sll t7, a2, 1 # multiply offset by 2 |
| addu v0, v0, t7 |
| |
| /* Compute iteration count */ |
| subu a3, t1, a2 |
| |
| /* |
| * At this point we have: |
| * v0: start of data to test |
| * a1: char to compare |
| * a3: iteration count |
| * v1: original start of string |
| * t0-t7 available for loading string data |
| */ |
| subu a3, 4 |
| bltz a3, indexof_remainder |
| |
| indexof_loop4: |
| lhu t0, 0(v0) |
| beq t0, a1, match_0 |
| lhu t0, 2(v0) |
| beq t0, a1, match_1 |
| lhu t0, 4(v0) |
| beq t0, a1, match_2 |
| lhu t0, 6(v0) |
| beq t0, a1, match_3 |
| addu v0, 8 # offset to contents[i+4] |
| subu a3, 4 |
| bgez a3, indexof_loop4 |
| |
| indexof_remainder: |
| addu a3, 4 |
| beqz a3, indexof_nomatch |
| |
| indexof_loop1: |
| lhu t0, 0(v0) |
| beq t0, a1, match_0 |
| addu v0, 2 # offset to contents[i+1] |
| subu a3, 1 |
| bnez a3, indexof_loop1 |
| |
| indexof_nomatch: |
| li v0, -1 |
| RETURN |
| |
| match_0: |
| subu v0, v1 |
| sra v0, v0, 1 # divide by 2 |
| RETURN |
| match_1: |
| addu v0, 2 |
| subu v0, v1 |
| sra v0, v0, 1 # divide by 2 |
| RETURN |
| match_2: |
| addu v0, 4 |
| subu v0, v1 |
| sra v0, v0, 1 # divide by 2 |
| RETURN |
| match_3: |
| addu v0, 6 |
| subu v0, v1 |
| sra v0, v0, 1 # divide by 2 |
| RETURN |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_INTERPRET |
| dvmCompiler_TEMPLATE_INTERPRET: |
| /* File: mips/TEMPLATE_INTERPRET.S */ |
| /* |
| * This handler transfers control to the interpeter without performing |
| * any lookups. It may be called either as part of a normal chaining |
| * operation, or from the transition code in header.S. We distinquish |
| * the two cases by looking at the link register. If called from a |
| * translation chain, it will point to the chaining Dalvik PC. |
| * On entry: |
| * ra - if NULL: |
| * a1 - the Dalvik PC to begin interpretation. |
| * else |
| * [ra] contains Dalvik PC to begin interpretation |
| * rSELF - pointer to thread |
| * rFP - Dalvik frame pointer |
| */ |
| la t0, dvmJitToInterpPunt |
| move a0, a1 |
| beq ra, zero, 1f |
| lw a0, 0(ra) |
| 1: |
| jr t0 |
| # doesn't return |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_MONITOR_ENTER |
| dvmCompiler_TEMPLATE_MONITOR_ENTER: |
| /* File: mips/TEMPLATE_MONITOR_ENTER.S */ |
| /* |
| * Call out to the runtime to lock an object. Because this thread |
| * may have been suspended in THREAD_MONITOR state and the Jit's |
| * translation cache subsequently cleared, we cannot return directly. |
| * Instead, unconditionally transition to the interpreter to resume. |
| * |
| * On entry: |
| * a0 - self pointer |
| * a1 - the object (which has already been null-checked by the caller |
| * rPC - the Dalvik PC of the following instruction. |
| */ |
| la a2, .LdvmLockObject |
| lw t9, (a2) |
| sw zero, offThread_inJitCodeCache(a0) # record that we're not returning |
| JALR(t9) # dvmLockObject(self, obj) |
| lw gp, STACK_OFFSET_GP(sp) |
| |
| la a2, .LdvmJitToInterpNoChain |
| lw a2, (a2) |
| |
| # Bail to interpreter - no chain [note - rPC still contains dPC] |
| #if defined(WITH_JIT_TUNING) |
| li a0, kHeavyweightMonitor |
| #endif |
| jr a2 |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG |
| dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG: |
| /* File: mips/TEMPLATE_MONITOR_ENTER_DEBUG.S */ |
| /* |
| * To support deadlock prediction, this version of MONITOR_ENTER |
| * will always call the heavyweight dvmLockObject, check for an |
| * exception and then bail out to the interpreter. |
| * |
| * On entry: |
| * a0 - self pointer |
| * a1 - the object (which has already been null-checked by the caller |
| * rPC - the Dalvik PC of the following instruction. |
| * |
| */ |
| la a2, .LdvmLockObject |
| lw t9, (a2) |
| sw zero, offThread_inJitCodeCache(a0) # record that we're not returning |
| JALR(t9) # dvmLockObject(self, obj) |
| lw gp, STACK_OFFSET_GP(sp) |
| |
| # test for exception |
| lw a1, offThread_exception(rSELF) |
| beqz a1, 1f |
| sub a0, rPC, 2 # roll dPC back to this monitor instruction |
| j .LhandleException |
| 1: |
| # Bail to interpreter - no chain [note - rPC still contains dPC] |
| #if defined(WITH_JIT_TUNING) |
| li a0, kHeavyweightMonitor |
| #endif |
| la a2, .LdvmJitToInterpNoChain |
| lw a2, (a2) |
| jr a2 |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_RESTORE_STATE |
| dvmCompiler_TEMPLATE_RESTORE_STATE: |
| /* File: mips/TEMPLATE_RESTORE_STATE.S */ |
| /* |
| * This handler restores state following a selfVerification memory access. |
| * On entry: |
| * a0 - offset from rSELF to the 1st element of the coreRegs save array. |
| * Note: the following registers are not restored |
| * zero, AT, gp, sp, fp, ra |
| */ |
| |
| add a0, a0, rSELF # pointer to heapArgSpace.coreRegs[0] |
| #if 0 |
| lw zero, r_ZERO*4(a0) # restore zero |
| #endif |
| .set noat |
| lw AT, r_AT*4(a0) # restore at |
| .set at |
| lw v0, r_V0*4(a0) # restore v0 |
| lw v1, r_V1*4(a0) # restore v1 |
| |
| lw a1, r_A1*4(a0) # restore a1 |
| lw a2, r_A2*4(a0) # restore a2 |
| lw a3, r_A3*4(a0) # restore a3 |
| |
| lw t0, r_T0*4(a0) # restore t0 |
| lw t1, r_T1*4(a0) # restore t1 |
| lw t2, r_T2*4(a0) # restore t2 |
| lw t3, r_T3*4(a0) # restore t3 |
| lw t4, r_T4*4(a0) # restore t4 |
| lw t5, r_T5*4(a0) # restore t5 |
| lw t6, r_T6*4(a0) # restore t6 |
| lw t7, r_T7*4(a0) # restore t7 |
| |
| lw s0, r_S0*4(a0) # restore s0 |
| lw s1, r_S1*4(a0) # restore s1 |
| lw s2, r_S2*4(a0) # restore s2 |
| lw s3, r_S3*4(a0) # restore s3 |
| lw s4, r_S4*4(a0) # restore s4 |
| lw s5, r_S5*4(a0) # restore s5 |
| lw s6, r_S6*4(a0) # restore s6 |
| lw s7, r_S7*4(a0) # restore s7 |
| |
| lw t8, r_T8*4(a0) # restore t8 |
| lw t9, r_T9*4(a0) # restore t9 |
| |
| lw k0, r_K0*4(a0) # restore k0 |
| lw k1, r_K1*4(a0) # restore k1 |
| |
| #if 0 |
| lw gp, r_GP*4(a0) # restore gp |
| lw sp, r_SP*4(a0) # restore sp |
| lw fp, r_FP*4(a0) # restore fp |
| lw ra, r_RA*4(a0) # restore ra |
| #endif |
| |
| /* #ifdef HARD_FLOAT */ |
| #if 0 |
| lw f0, fr0*4(a0) # restore f0 |
| lw f1, fr1*4(a0) # restore f1 |
| lw f2, fr2*4(a0) # restore f2 |
| lw f3, fr3*4(a0) # restore f3 |
| lw f4, fr4*4(a0) # restore f4 |
| lw f5, fr5*4(a0) # restore f5 |
| lw f6, fr6*4(a0) # restore f6 |
| lw f7, fr7*4(a0) # restore f7 |
| lw f8, fr8*4(a0) # restore f8 |
| lw f9, fr9*4(a0) # restore f9 |
| lw f10, fr10*4(a0) # restore f10 |
| lw f11, fr11*4(a0) # restore f11 |
| lw f12, fr12*4(a0) # restore f12 |
| lw f13, fr13*4(a0) # restore f13 |
| lw f14, fr14*4(a0) # restore f14 |
| lw f15, fr15*4(a0) # restore f15 |
| lw f16, fr16*4(a0) # restore f16 |
| lw f17, fr17*4(a0) # restore f17 |
| lw f18, fr18*4(a0) # restore f18 |
| lw f19, fr19*4(a0) # restore f19 |
| lw f20, fr20*4(a0) # restore f20 |
| lw f21, fr21*4(a0) # restore f21 |
| lw f22, fr22*4(a0) # restore f22 |
| lw f23, fr23*4(a0) # restore f23 |
| lw f24, fr24*4(a0) # restore f24 |
| lw f25, fr25*4(a0) # restore f25 |
| lw f26, fr26*4(a0) # restore f26 |
| lw f27, fr27*4(a0) # restore f27 |
| lw f28, fr28*4(a0) # restore f28 |
| lw f29, fr29*4(a0) # restore f29 |
| lw f30, fr30*4(a0) # restore f30 |
| lw f31, fr31*4(a0) # restore f31 |
| #endif |
| |
| lw a0, r_A1*4(a0) # restore a0 |
| RETURN |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_SAVE_STATE |
| dvmCompiler_TEMPLATE_SAVE_STATE: |
| /* File: mips/TEMPLATE_SAVE_STATE.S */ |
| /* |
| * This handler performs a register save for selfVerification mode. |
| * On entry: |
| * Top of stack + 4: a1 value to save |
| * Top of stack + 0: a0 value to save |
| * a0 - offset from rSELF to the beginning of the heapArgSpace record |
| * a1 - the value of regMap |
| * |
| * The handler must save regMap, r0-r31, f0-f31 if FPU, and then return with |
| * r0-r31 with their original values (note that this means a0 and a1 must take |
| * the values on the stack - not the ones in those registers on entry. |
| * Finally, the two registers previously pushed must be popped. |
| * Note: the following registers are not saved |
| * zero, AT, gp, sp, fp, ra |
| */ |
| add a0, a0, rSELF # pointer to heapArgSpace |
| sw a1, 0(a0) # save regMap |
| add a0, a0, 4 # pointer to coreRegs |
| #if 0 |
| sw zero, r_ZERO*4(a0) # save zero |
| #endif |
| .set noat |
| sw AT, r_AT*4(a0) # save at |
| .set at |
| sw v0, r_V0*4(a0) # save v0 |
| sw v1, r_V1*4(a0) # save v1 |
| |
| lw a1, 0(sp) # recover a0 value |
| sw a1, r_A0*4(a0) # save a0 |
| lw a1, 4(sp) # recover a1 value |
| sw a1, r_A1*4(a0) # save a1 |
| sw a2, r_A2*4(a0) # save a2 |
| sw a3, r_A3*4(a0) # save a3 |
| |
| sw t0, r_T0*4(a0) # save t0 |
| sw t1, r_T1*4(a0) # save t1 |
| sw t2, r_T2*4(a0) # save t2 |
| sw t3, r_T3*4(a0) # save t3 |
| sw t4, r_T4*4(a0) # save t4 |
| sw t5, r_T5*4(a0) # save t5 |
| sw t6, r_T6*4(a0) # save t6 |
| sw t7, r_T7*4(a0) # save t7 |
| |
| sw s0, r_S0*4(a0) # save s0 |
| sw s1, r_S1*4(a0) # save s1 |
| sw s2, r_S2*4(a0) # save s2 |
| sw s3, r_S3*4(a0) # save s3 |
| sw s4, r_S4*4(a0) # save s4 |
| sw s5, r_S5*4(a0) # save s5 |
| sw s6, r_S6*4(a0) # save s6 |
| sw s7, r_S7*4(a0) # save s7 |
| |
| sw t8, r_T8*4(a0) # save t8 |
| sw t9, r_T9*4(a0) # save t9 |
| |
| sw k0, r_K0*4(a0) # save k0 |
| sw k1, r_K1*4(a0) # save k1 |
| |
| #if 0 |
| sw gp, r_GP*4(a0) # save gp |
| sw sp, r_SP*4(a0) # save sp (need to adjust??? ) |
| sw fp, r_FP*4(a0) # save fp |
| sw ra, r_RA*4(a0) # save ra |
| #endif |
| |
| /* #ifdef HARD_FLOAT */ |
| #if 0 |
| sw f0, fr0*4(a0) # save f0 |
| sw f1, fr1*4(a0) # save f1 |
| sw f2, fr2*4(a0) # save f2 |
| sw f3, fr3*4(a0) # save f3 |
| sw f4, fr4*4(a0) # save f4 |
| sw f5, fr5*4(a0) # save f5 |
| sw f6, fr6*4(a0) # save f6 |
| sw f7, fr7*4(a0) # save f7 |
| sw f8, fr8*4(a0) # save f8 |
| sw f9, fr9*4(a0) # save f9 |
| sw f10, fr10*4(a0) # save f10 |
| sw f11, fr11*4(a0) # save f11 |
| sw f12, fr12*4(a0) # save f12 |
| sw f13, fr13*4(a0) # save f13 |
| sw f14, fr14*4(a0) # save f14 |
| sw f15, fr15*4(a0) # save f15 |
| sw f16, fr16*4(a0) # save f16 |
| sw f17, fr17*4(a0) # save f17 |
| sw f18, fr18*4(a0) # save f18 |
| sw f19, fr19*4(a0) # save f19 |
| sw f20, fr20*4(a0) # save f20 |
| sw f21, fr21*4(a0) # save f21 |
| sw f22, fr22*4(a0) # save f22 |
| sw f23, fr23*4(a0) # save f23 |
| sw f24, fr24*4(a0) # save f24 |
| sw f25, fr25*4(a0) # save f25 |
| sw f26, fr26*4(a0) # save f26 |
| sw f27, fr27*4(a0) # save f27 |
| sw f28, fr28*4(a0) # save f28 |
| sw f29, fr29*4(a0) # save f29 |
| sw f30, fr30*4(a0) # save f30 |
| sw f31, fr31*4(a0) # save f31 |
| #endif |
| |
| lw a1, 0(sp) # recover a0 value |
| lw a1, 4(sp) # recover a1 value |
| sub sp, sp, 8 # adjust stack ptr |
| RETURN |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_PERIODIC_PROFILING |
| dvmCompiler_TEMPLATE_PERIODIC_PROFILING: |
| /* File: mips/TEMPLATE_PERIODIC_PROFILING.S */ |
| /* |
| * Increment profile counter for this trace, and decrement |
| * sample counter. If sample counter goes below zero, turn |
| * off profiling. |
| * |
| * On entry |
| * (ra-16) is address of pointer to counter. Note: the counter |
| * actually exists 16 bytes before the return target for mips. |
| * - 4 bytes for prof count addr. |
| * - 4 bytes for chain cell offset (2bytes 32 bit aligned). |
| * - 4 bytes for call TEMPLATE_PERIODIC_PROFILING. |
| * - 4 bytes for call delay slot. |
| */ |
| lw a0, -16(ra) |
| lw a1, offThread_pProfileCountdown(rSELF) |
| lw a2, 0(a0) # get counter |
| lw a3, 0(a1) # get countdown timer |
| addu a2, 1 |
| sub a3, 1 # FIXME - bug in ARM code??? |
| bltz a3, .LTEMPLATE_PERIODIC_PROFILING_disable_profiling |
| sw a2, 0(a0) |
| sw a3, 0(a1) |
| RETURN |
| .LTEMPLATE_PERIODIC_PROFILING_disable_profiling: |
| move rTEMP, ra # preserve ra |
| la a0, dvmJitTraceProfilingOff |
| JALR(a0) |
| jr rTEMP |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_RETURN_PROF |
| dvmCompiler_TEMPLATE_RETURN_PROF: |
| /* File: mips/TEMPLATE_RETURN_PROF.S */ |
| #define TEMPLATE_INLINE_PROFILING |
| /* File: mips/TEMPLATE_RETURN.S */ |
| /* |
| * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX. |
| * If the stored value in returnAddr |
| * is non-zero, the caller is compiled by the JIT thus return to the |
| * address in the code cache following the invoke instruction. Otherwise |
| * return to the special dvmJitToInterpNoChain entry point. |
| */ |
| #if defined(TEMPLATE_INLINE_PROFILING) |
| # preserve a0-a2 and ra |
| SCRATCH_STORE(a0, 0) |
| SCRATCH_STORE(a1, 4) |
| SCRATCH_STORE(a2, 8) |
| SCRATCH_STORE(ra, 12) |
| |
| # a0=rSELF |
| move a0, rSELF |
| la t9, dvmFastMethodTraceExit |
| JALR(t9) |
| lw gp, STACK_OFFSET_GP(sp) |
| |
| # restore a0-a2 and ra |
| SCRATCH_LOAD(ra, 12) |
| SCRATCH_LOAD(a2, 8) |
| SCRATCH_LOAD(a1, 4) |
| SCRATCH_LOAD(a0, 0) |
| #endif |
| SAVEAREA_FROM_FP(a0, rFP) # a0<- saveArea (old) |
| lw t0, offStackSaveArea_prevFrame(a0) # t0<- saveArea->prevFrame |
| lbu t1, offThread_breakFlags(rSELF) # t1<- breakFlags |
| lw rPC, offStackSaveArea_savedPc(a0) # rPC<- saveArea->savedPc |
| #if !defined(WITH_SELF_VERIFICATION) |
| lw t2, offStackSaveArea_returnAddr(a0) # t2<- chaining cell ret |
| #else |
| move t2, zero # disable chaining |
| #endif |
| lw a2, offStackSaveArea_method - sizeofStackSaveArea(t0) |
| # a2<- method we're returning to |
| #if !defined(WITH_SELF_VERIFICATION) |
| beq a2, zero, 1f # bail to interpreter |
| #else |
| bne a2, zero, 2f |
| JALR(ra) # punt to interpreter and compare state |
| # DOUG: assume this does not return ??? |
| 2: |
| #endif |
| la t4, .LdvmJitToInterpNoChainNoProfile # defined in footer.S |
| lw a1, (t4) |
| move rFP, t0 # publish new FP |
| beq a2, zero, 4f |
| lw t0, offMethod_clazz(a2) # t0<- method->clazz |
| 4: |
| |
| sw a2, offThread_method(rSELF) # self->method = newSave->method |
| lw a0, offClassObject_pDvmDex(t0) # a0<- method->clazz->pDvmDex |
| sw rFP, offThread_curFrame(rSELF) # self->curFrame = fp |
| add rPC, rPC, 3*2 # publish new rPC |
| sw a0, offThread_methodClassDex(rSELF) |
| movn t2, zero, t1 # check the breadFlags and |
| # clear the chaining cell address |
| sw t2, offThread_inJitCodeCache(rSELF) # in code cache or not |
| beq t2, zero, 3f # chaining cell exists? |
| JALR(t2) # jump to the chaining cell |
| # DOUG: assume this does not return ??? |
| 3: |
| #if defined(WITH_JIT_TUNING) |
| li a0, kCallsiteInterpreted |
| #endif |
| j a1 # callsite is interpreted |
| 1: |
| sw zero, offThread_inJitCodeCache(rSELF) # reset inJitCodeCache |
| SAVE_PC_TO_SELF() # SAVE_PC_FP_TO_SELF() |
| SAVE_FP_TO_SELF() |
| la t4, .LdvmMterpStdBail # defined in footer.S |
| lw a2, (t4) |
| move a0, rSELF # Expecting rSELF in a0 |
| JALR(a2) # exit the interpreter |
| # DOUG: assume this does not return ??? |
| |
| #undef TEMPLATE_INLINE_PROFILING |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT_PROF |
| dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT_PROF: |
| /* File: mips/TEMPLATE_INVOKE_METHOD_NO_OPT_PROF.S */ |
| #define TEMPLATE_INLINE_PROFILING |
| /* File: mips/TEMPLATE_INVOKE_METHOD_NO_OPT.S */ |
| /* |
| * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC |
| * into rPC then jump to dvmJitToInterpNoChain to dispatch the |
| * runtime-resolved callee. |
| */ |
| # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite |
| lh t7, offMethod_registersSize(a0) # t7<- methodToCall->regsSize |
| lh a2, offMethod_outsSize(a0) # a2<- methodToCall->outsSize |
| lw t9, offThread_interpStackEnd(rSELF) # t9<- interpStackEnd |
| lbu t8, offThread_breakFlags(rSELF) # t8<- breakFlags |
| move a3, a1 # a3<- returnCell |
| SAVEAREA_FROM_FP(a1, rFP) # a1<- stack save area |
| sll t6, t7, 2 # multiply regsSize by 4 (4 bytes per reg) |
| sub a1, a1, t6 # a1<- newFp(old savearea-regsSize) |
| SAVEAREA_FROM_FP(t0, a1) # t0<- stack save area |
| sll t6, a2, 2 # multiply outsSize by 4 (4 bytes per reg) |
| sub t0, t0, t6 # t0<- bottom (newsave-outsSize) |
| bgeu t0, t9, 1f # bottom < interpStackEnd? |
| RETURN # return to raise stack overflow excep. |
| |
| 1: |
| # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite |
| lw t9, offMethod_clazz(a0) # t9<- methodToCall->clazz |
| lw t0, offMethod_accessFlags(a0) # t0<- methodToCall->accessFlags |
| sw rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP) |
| sw rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1) |
| lw rPC, offMethod_insns(a0) # rPC<- methodToCall->insns |
| |
| # set up newSaveArea |
| sw rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1) |
| sw a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1) |
| sw a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1) |
| beqz t8, 2f # breakFlags != 0 |
| RETURN # bail to the interpreter |
| |
| 2: |
| and t6, t0, ACC_NATIVE |
| beqz t6, 3f |
| #if !defined(WITH_SELF_VERIFICATION) |
| j .LinvokeNative |
| #else |
| RETURN # bail to the interpreter |
| #endif |
| |
| 3: |
| # continue executing the next instruction through the interpreter |
| la t0, .LdvmJitToInterpTraceSelectNoChain # defined in footer.S |
| lw rTEMP, (t0) |
| lw a3, offClassObject_pDvmDex(t9) # a3<- method->clazz->pDvmDex |
| |
| # Update "thread" values for the new method |
| sw a0, offThread_method(rSELF) # self->method = methodToCall |
| sw a3, offThread_methodClassDex(rSELF) # self->methodClassDex = ... |
| move rFP, a1 # fp = newFp |
| sw rFP, offThread_curFrame(rSELF) # self->curFrame = newFp |
| #if defined(TEMPLATE_INLINE_PROFILING) |
| # preserve a0-a3 |
| SCRATCH_STORE(a0, 0) |
| SCRATCH_STORE(a1, 4) |
| SCRATCH_STORE(a2, 8) |
| SCRATCH_STORE(a3, 12) |
| |
| # a0=methodToCall, a1=rSELF |
| move a1, rSELF |
| la t9, dvmFastMethodTraceEnter |
| JALR(t9) |
| lw gp, STACK_OFFSET_GP(sp) |
| |
| # restore a0-a3 |
| SCRATCH_LOAD(a3, 12) |
| SCRATCH_LOAD(a2, 8) |
| SCRATCH_LOAD(a1, 4) |
| SCRATCH_LOAD(a0, 0) |
| #endif |
| |
| # Start executing the callee |
| #if defined(WITH_JIT_TUNING) |
| li a0, kInlineCacheMiss |
| #endif |
| jr rTEMP # dvmJitToInterpTraceSelectNoChain |
| |
| #undef TEMPLATE_INLINE_PROFILING |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN_PROF |
| dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN_PROF: |
| /* File: mips/TEMPLATE_INVOKE_METHOD_CHAIN_PROF.S */ |
| #define TEMPLATE_INLINE_PROFILING |
| /* File: mips/TEMPLATE_INVOKE_METHOD_CHAIN.S */ |
| /* |
| * For monomorphic callsite, setup the Dalvik frame and return to the |
| * Thumb code through the link register to transfer control to the callee |
| * method through a dedicated chaining cell. |
| */ |
| # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite |
| # methodToCall is guaranteed to be non-native |
| .LinvokeChainProf: |
| lh t7, offMethod_registersSize(a0) # t7<- methodToCall->regsSize |
| lh a2, offMethod_outsSize(a0) # a2<- methodToCall->outsSize |
| lw t9, offThread_interpStackEnd(rSELF) # t9<- interpStackEnd |
| lbu t8, offThread_breakFlags(rSELF) # t8<- breakFlags |
| move a3, a1 # a3<- returnCell |
| SAVEAREA_FROM_FP(a1, rFP) # a1<- stack save area |
| sll t6, t7, 2 # multiply regsSize by 4 (4 bytes per reg) |
| sub a1, a1, t6 # a1<- newFp(old savearea-regsSize) |
| SAVEAREA_FROM_FP(t0, a1) # t0<- stack save area |
| add t2, ra, 8 # setup the punt-to-interp address |
| # 8 bytes skips branch and delay slot |
| sll t6, a2, 2 # multiply outsSize by 4 (4 bytes per reg) |
| sub t0, t0, t6 # t0<- bottom (newsave-outsSize) |
| bgeu t0, t9, 1f # bottom < interpStackEnd? |
| jr t2 # return to raise stack overflow excep. |
| |
| 1: |
| # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite |
| lw t9, offMethod_clazz(a0) # t9<- methodToCall->clazz |
| sw rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP) |
| sw rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1) |
| lw rPC, offMethod_insns(a0) # rPC<- methodToCall->insns |
| |
| # set up newSaveArea |
| sw rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1) |
| sw a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1) |
| sw a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1) |
| beqz t8, 2f # breakFlags != 0 |
| jr t2 # bail to the interpreter |
| |
| 2: |
| lw a3, offClassObject_pDvmDex(t9) # a3<- methodToCall->clazz->pDvmDex |
| |
| # Update "thread" values for the new method |
| sw a0, offThread_method(rSELF) # self->method = methodToCall |
| sw a3, offThread_methodClassDex(rSELF) # self->methodClassDex = ... |
| move rFP, a1 # fp = newFp |
| sw rFP, offThread_curFrame(rSELF) # self->curFrame = newFp |
| #if defined(TEMPLATE_INLINE_PROFILING) |
| # preserve a0-a2 and ra |
| SCRATCH_STORE(a0, 0) |
| SCRATCH_STORE(a1, 4) |
| SCRATCH_STORE(a2, 8) |
| SCRATCH_STORE(ra, 12) |
| |
| move a1, rSELF |
| # a0=methodToCall, a1=rSELF |
| la t9, dvmFastMethodTraceEnter |
| jalr t9 |
| lw gp, STACK_OFFSET_GP(sp) |
| |
| # restore a0-a2 and ra |
| SCRATCH_LOAD(ra, 12) |
| SCRATCH_LOAD(a2, 8) |
| SCRATCH_LOAD(a1, 4) |
| SCRATCH_LOAD(a0, 0) |
| #endif |
| RETURN # return to the callee-chaining cell |
| |
| #undef TEMPLATE_INLINE_PROFILING |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF |
| dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF: |
| /* File: mips/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF.S */ |
| #define TEMPLATE_INLINE_PROFILING |
| /* File: mips/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */ |
| /* |
| * For polymorphic callsite, check whether the cached class pointer matches |
| * the current one. If so setup the Dalvik frame and return to the |
| * Thumb code through the link register to transfer control to the callee |
| * method through a dedicated chaining cell. |
| * |
| * The predicted chaining cell is declared in ArmLIR.h with the |
| * following layout: |
| * |
| * typedef struct PredictedChainingCell { |
| * u4 branch; |
| * u4 delay_slot; |
| * const ClassObject *clazz; |
| * const Method *method; |
| * u4 counter; |
| * } PredictedChainingCell; |
| * |
| * Upon returning to the callsite: |
| * - lr : to branch to the chaining cell |
| * - lr+8 : to punt to the interpreter |
| * - lr+16: to fully resolve the callee and may rechain. |
| * a3 <- class |
| */ |
| # a0 = this, a1 = returnCell, a2 = predictedChainCell, rPC = dalvikCallsite |
| lw a3, offObject_clazz(a0) # a3 <- this->class |
| lw rIBASE, 8(a2) # t0 <- predictedChainCell->clazz |
| lw a0, 12(a2) # a0 <- predictedChainCell->method |
| lw t1, offThread_icRechainCount(rSELF) # t1 <- shared rechainCount |
| |
| #if defined(WITH_JIT_TUNING) |
| la rINST, .LdvmICHitCount |
| #add t2, t2, 1 |
| bne a3, rIBASE, 1f |
| nop |
| lw t2, 0(rINST) |
| add t2, t2, 1 |
| sw t2, 0(rINST) |
| 1: |
| #add t2, t2, 1 |
| #endif |
| beq a3, rIBASE, .LinvokeChainProf # branch if predicted chain is valid |
| lw rINST, offClassObject_vtable(a3) # rINST <- this->class->vtable |
| beqz rIBASE, 2f # initialized class or not |
| sub a1, t1, 1 # count-- |
| sw a1, offThread_icRechainCount(rSELF) # write back to InterpState |
| b 3f |
| 2: |
| move a1, zero |
| 3: |
| add ra, ra, 16 # return to fully-resolve landing pad |
| /* |
| * a1 <- count |
| * a2 <- &predictedChainCell |
| * a3 <- this->class |
| * rPC <- dPC |
| * rINST <- this->class->vtable |
| */ |
| RETURN |
| |
| #undef TEMPLATE_INLINE_PROFILING |
| |
| /* ------------------------------ */ |
| .balign 4 |
| .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE_PROF |
| dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE_PROF: |
| /* File: mips/TEMPLATE_INVOKE_METHOD_NATIVE_PROF.S */ |
| #define TEMPLATE_INLINE_PROFILING |
| /* File: mips/TEMPLATE_INVOKE_METHOD_NATIVE.S */ |
| # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite |
| lh t7, offMethod_registersSize(a0) # t7<- methodToCall->regsSize |
| lw t9, offThread_interpStackEnd(rSELF) # t9<- interpStackEnd |
| lbu t8, offThread_breakFlags(rSELF) # t8<- breakFlags |
| move a3, a1 # a3<- returnCell |
| SAVEAREA_FROM_FP(a1, rFP) # a1<- stack save area |
| sll t6, t7, 2 # multiply regsSize by 4 (4 bytes per reg) |
| sub a1, a1, t6 # a1<- newFp(old savearea-regsSize) |
| SAVEAREA_FROM_FP(t0, a1) # t0<- stack save area |
| bgeu t0, t9, 1f # bottom < interpStackEnd? |
| RETURN # return to raise stack overflow excep. |
| |
| 1: |
| # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite |
| sw rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP) |
| sw rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1) |
| lw rPC, offMethod_insns(a0) # rPC<- methodToCall->insns |
| |
| # set up newSaveArea |
| sw rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1) |
| sw a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1) |
| sw a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1) |
| lw rTEMP, offMethod_nativeFunc(a0) # t9<- method->nativeFunc |
| #if !defined(WITH_SELF_VERIFICATION) |
| beqz t8, 2f # breakFlags != 0 |
| RETURN # bail to the interpreter |
| 2: |
| #else |
| RETURN # bail to the interpreter unconditionally |
| #endif |
| |
| # go ahead and transfer control to the native code |
| lw t6, offThread_jniLocal_topCookie(rSELF) # t6<- thread->localRef->... |
| sw a1, offThread_curFrame(rSELF) # self->curFrame = newFp |
| sw zero, offThread_inJitCodeCache(rSELF) # not in the jit code cache |
| sw t6, (offStackSaveArea_localRefCookie - sizeofStackSaveArea)(a1) |
| # newFp->localRefCookie=top |
| SAVEAREA_FROM_FP(rBIX, a1) # rBIX<- new stack save area |
| move a2, a0 # a2<- methodToCall |
| move a0, a1 # a0<- newFp |
| add a1, rSELF, offThread_retval # a1<- &retval |
| move a3, rSELF # a3<- self |
| #if defined(TEMPLATE_INLINE_PROFILING) |
| # a2: methodToCall |
| # preserve a0-a3 |
| SCRATCH_STORE(a0, 0) |
| SCRATCH_STORE(a1, 4) |
| SCRATCH_STORE(a2, 8) |
| SCRATCH_STORE(a3, 12) |
| |
| move a0, a2 |
| move a1, rSELF |
| # a0=JNIMethod, a1=rSELF |
| la t9, dvmFastMethodTraceEnter |
| JALR(t9) # off to the native code |
| lw gp, STACK_OFFSET_GP(sp) |
| |
| # restore a0-a3 |
| SCRATCH_LOAD(a3, 12) |
| SCRATCH_LOAD(a2, 8) |
| SCRATCH_LOAD(a1, 4) |
| SCRATCH_LOAD(a0, 0) |
| |
| move rOBJ, a2 # save a2 |
| #endif |
| |
| JALR(rTEMP) # off to the native code |
| lw gp, STACK_OFFSET_GP(sp) |
| |
| #if defined(TEMPLATE_INLINE_PROFILING) |
| move a0, rOBJ |
| move a1, rSELF |
| # a0=JNIMethod, a1=rSELF |
| la t9, dvmFastNativeMethodTraceExit |
| JALR(t9) |
| lw gp, STACK_OFFSET_GP(sp) |
| #endif |
| |
| # native return; rBIX=newSaveArea |
| # equivalent to dvmPopJniLocals |
| lw a2, offStackSaveArea_returnAddr(rBIX) # a2 = chaining cell ret addr |
| lw a0, offStackSaveArea_localRefCookie(rBIX) # a0<- saved->top |
| lw a1, offThread_exception(rSELF) # check for exception |
| sw rFP, offThread_curFrame(rSELF) # self->curFrame = fp |
| sw a0, offThread_jniLocal_topCookie(rSELF) # new top <- old top |
| lw a0, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP) |
| |
| # a0 = dalvikCallsitePC |
| bnez a1, .LhandleException # handle exception if any |
| |
| sw a2, offThread_inJitCodeCache(rSELF) # set the mode properly |
| beqz a2, 3f |
| jr a2 # go if return chaining cell still exist |
| |
| 3: |
| # continue executing the next instruction through the interpreter |
| la a1, .LdvmJitToInterpTraceSelectNoChain # defined in footer.S |
| lw a1, (a1) |
| add rPC, a0, 3*2 # reconstruct new rPC (advance 3 dalvik instr) |
| |
| #if defined(WITH_JIT_TUNING) |
| li a0, kCallsiteInterpreted |
| #endif |
| jr a1 |
| |
| #undef TEMPLATE_INLINE_PROFILING |
| |
| .size dvmCompilerTemplateStart, .-dvmCompilerTemplateStart |
| /* File: mips/footer.S */ |
| /* |
| * =========================================================================== |
| * Common subroutines and data |
| * =========================================================================== |
| */ |
| |
| .section .data.rel.ro |
| .align 4 |
| .LinvokeNative: |
| # Prep for the native call |
| # a1 = newFP, a0 = methodToCall |
| lw t9, offThread_jniLocal_topCookie(rSELF) # t9<- thread->localRef->... |
| sw zero, offThread_inJitCodeCache(rSELF) # not in jit code cache |
| sw a1, offThread_curFrame(rSELF) # self->curFrame = newFp |
| sw t9, (offStackSaveArea_localRefCookie - sizeofStackSaveArea)(a1) |
| # newFp->localRefCookie=top |
| lhu ra, offThread_subMode(rSELF) |
| SAVEAREA_FROM_FP(rBIX, a1) # rBIX<- new stack save area |
| |
| move a2, a0 # a2<- methodToCall |
| move a0, a1 # a0<- newFp |
| add a1, rSELF, offThread_retval # a1<- &retval |
| move a3, rSELF # a3<- self |
| andi ra, kSubModeMethodTrace |
| beqz ra, 121f |
| # a2: methodToCall |
| # preserve a0-a3 |
| SCRATCH_STORE(a0, 0) |
| SCRATCH_STORE(a1, 4) |
| SCRATCH_STORE(a2, 8) |
| SCRATCH_STORE(a3, 12) |
| move rTEMP, a2 # preserve a2 |
| |
| move a0, rTEMP |
| move a1, rSELF |
| la t9, dvmFastMethodTraceEnter |
| JALR(t9) |
| lw gp, STACK_OFFSET_GP(sp) |
| |
| # restore a0-a3 |
| SCRATCH_LOAD(a3, 12) |
| SCRATCH_LOAD(a2, 8) |
| SCRATCH_LOAD(a1, 4) |
| SCRATCH_LOAD(a0, 0) |
| |
| lw t9, offMethod_nativeFunc(a2) |
| JALR(t9) # call methodToCall->nativeFunc |
| lw gp, STACK_OFFSET_GP(sp) |
| |
| move a0, rTEMP |
| move a1, rSELF |
| la t9, dvmFastNativeMethodTraceExit |
| JALR(t9) |
| lw gp, STACK_OFFSET_GP(sp) |
| b 212f |
| |
| 121: |
| lw t9, offMethod_nativeFunc(a2) |
| JALR(t9) # call methodToCall->nativeFunc |
| lw gp, STACK_OFFSET_GP(sp) |
| |
| 212: |
| # native return; rBIX=newSaveArea |
| # equivalent to dvmPopJniLocals |
| lw a2, offStackSaveArea_returnAddr(rBIX) # a2 = chaining cell ret addr |
| lw a0, offStackSaveArea_localRefCookie(rBIX) # a0<- saved->top |
| lw a1, offThread_exception(rSELF) # check for exception |
| sw rFP, offThread_curFrame(rSELF) # self->curFrame = fp |
| sw a0, offThread_jniLocal_topCookie(rSELF) # new top <- old top |
| lw a0, offStackSaveArea_savedPc(rBIX) # reload rPC |
| |
| # a0 = dalvikCallsitePC |
| bnez a1, .LhandleException # handle exception if any |
| |
| sw a2, offThread_inJitCodeCache(rSELF) # set the mode properly |
| beqz a2, 3f |
| jr a2 # go if return chaining cell still exist |
| |
| 3: |
| # continue executing the next instruction through the interpreter |
| la a1, .LdvmJitToInterpTraceSelectNoChain # defined in footer.S |
| lw a1, (a1) |
| add rPC, a0, 3*2 # reconstruct new rPC |
| |
| #if defined(WITH_JIT_TUNING) |
| li a0, kCallsiteInterpreted |
| #endif |
| jr a1 |
| |
| |
| /* |
| * On entry: |
| * a0 Faulting Dalvik PC |
| */ |
| .LhandleException: |
| #if defined(WITH_SELF_VERIFICATION) |
| la t0, .LdeadFood |
| lw t0, (t0) # should not see this under self-verification mode |
| jr t0 |
| .LdeadFood: |
| .word 0xdeadf00d |
| #endif |
| sw zero, offThread_inJitCodeCache(rSELF) # in interpreter land |
| la a1, .LdvmMterpCommonExceptionThrown # PIC way of getting &func |
| lw a1, (a1) |
| la rIBASE, .LdvmAsmInstructionStart # PIC way of getting &func |
| lw rIBASE, (rIBASE) |
| move rPC, a0 # reload the faulting Dalvid address |
| jr a1 # branch to dvmMterpCommonExeceptionThrown |
| |
| .align 4 |
| .LdvmAsmInstructionStart: |
| .word dvmAsmInstructionStart |
| .LdvmJitToInterpNoChainNoProfile: |
| .word dvmJitToInterpNoChainNoProfile |
| .LdvmJitToInterpTraceSelectNoChain: |
| .word dvmJitToInterpTraceSelectNoChain |
| .LdvmJitToInterpNoChain: |
| .word dvmJitToInterpNoChain |
| .LdvmMterpStdBail: |
| .word dvmMterpStdBail |
| .LdvmMterpCommonExceptionThrown: |
| .word dvmMterpCommonExceptionThrown |
| .LdvmLockObject: |
| .word dvmLockObject |
| #if defined(WITH_JIT_TUNING) |
| .LdvmICHitCount: |
| .word gDvmICHitCount |
| #endif |
| #if defined(WITH_SELF_VERIFICATION) |
| .LdvmSelfVerificationMemOpDecode: |
| .word dvmSelfVerificationMemOpDecode |
| #endif |
| |
| .global dmvCompilerTemplateEnd |
| dmvCompilerTemplateEnd: |
| |
| #endif /* WITH_JIT */ |
| |