| Ben Cheng | ba4fc8b | 2009-06-01 13:00:29 -0700 | [diff] [blame^] | 1 | /* |
| 2 | * This file was generated automatically by gen-template.py for 'armv5te'. |
| 3 | * |
| 4 | * --> DO NOT EDIT <-- |
| 5 | */ |
| 6 | |
| 7 | /* File: armv5te/header.S */ |
| 8 | /* |
| 9 | * Copyright (C) 2008 The Android Open Source Project |
| 10 | * |
| 11 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 12 | * you may not use this file except in compliance with the License. |
| 13 | * You may obtain a copy of the License at |
| 14 | * |
| 15 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 16 | * |
| 17 | * Unless required by applicable law or agreed to in writing, software |
| 18 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 19 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 20 | * See the License for the specific language governing permissions and |
| 21 | * limitations under the License. |
| 22 | */ |
| 23 | |
| 24 | #if defined(WITH_JIT) |
| 25 | |
| 26 | /* |
| 27 | * ARMv5 definitions and declarations. |
| 28 | */ |
| 29 | |
| 30 | /* |
| 31 | ARM EABI general notes: |
| 32 | |
| 33 | r0-r3 hold first 4 args to a method; they are not preserved across method calls |
| 34 | r4-r8 are available for general use |
| 35 | r9 is given special treatment in some situations, but not for us |
| 36 | r10 (sl) seems to be generally available |
| 37 | r11 (fp) is used by gcc (unless -fomit-frame-pointer is set) |
| 38 | r12 (ip) is scratch -- not preserved across method calls |
| 39 | r13 (sp) should be managed carefully in case a signal arrives |
| 40 | r14 (lr) must be preserved |
| 41 | r15 (pc) can be tinkered with directly |
| 42 | |
| 43 | r0 holds returns of <= 4 bytes |
| 44 | r0-r1 hold returns of 8 bytes, low word in r0 |
| 45 | |
| 46 | Callee must save/restore r4+ (except r12) if it modifies them. |
| 47 | |
| 48 | Stack is "full descending". Only the arguments that don't fit in the first 4 |
| 49 | registers are placed on the stack. "sp" points at the first stacked argument |
| 50 | (i.e. the 5th arg). |
| 51 | |
| 52 | VFP: single-precision results in s0, double-precision results in d0. |
| 53 | |
| 54 | In the EABI, "sp" must be 64-bit aligned on entry to a function, and any |
| 55 | 64-bit quantities (long long, double) must be 64-bit aligned. |
| 56 | */ |
| 57 | |
| 58 | /* |
| 59 | JIT and ARM notes: |
| 60 | |
| 61 | The following registers have fixed assignments: |
| 62 | |
| 63 | reg nick purpose |
| 64 | r5 rFP interpreted frame pointer, used for accessing locals and args |
| 65 | r6 rGLUE MterpGlue pointer |
| 66 | |
| 67 | The following registers have fixed assignments in mterp but are scratch |
| 68 | registers in compiled code |
| 69 | |
| 70 | reg nick purpose |
| 71 | r4 rPC interpreted program counter, used for fetching instructions |
| 72 | r7 rIBASE interpreted instruction base pointer, used for computed goto |
| 73 | r8 rINST first 16-bit code unit of current instruction |
| 74 | |
| 75 | Macros are provided for common operations. Each macro MUST emit only |
| 76 | one instruction to make instruction-counting easier. They MUST NOT alter |
| 77 | unspecified registers or condition codes. |
| 78 | */ |
| 79 | |
| 80 | /* single-purpose registers, given names for clarity */ |
| 81 | #define rPC r4 |
| 82 | #define rFP r5 |
| 83 | #define rGLUE r6 |
| 84 | #define rIBASE r7 |
| 85 | #define rINST r8 |
| 86 | |
| 87 | /* |
| 88 | * Given a frame pointer, find the stack save area. |
| 89 | * |
| 90 | * In C this is "((StackSaveArea*)(_fp) -1)". |
| 91 | */ |
| 92 | #define SAVEAREA_FROM_FP(_reg, _fpreg) \ |
| 93 | sub _reg, _fpreg, #sizeofStackSaveArea |
| 94 | |
| 95 | /* |
| 96 | * This is a #include, not a %include, because we want the C pre-processor |
| 97 | * to expand the macros into assembler assignment statements. |
| 98 | */ |
| 99 | #include "../../../mterp/common/asm-constants.h" |
| 100 | |
| 101 | |
| 102 | /* File: armv5te/platform.S */ |
| 103 | /* |
| 104 | * =========================================================================== |
| 105 | * CPU-version-specific defines |
| 106 | * =========================================================================== |
| 107 | */ |
| 108 | |
| 109 | /* |
| 110 | * Macro for "MOV LR,PC / LDR PC,xxx", which is not allowed pre-ARMv5. |
| 111 | * Jump to subroutine. |
| 112 | * |
| 113 | * May modify IP and LR. |
| 114 | */ |
| 115 | .macro LDR_PC_LR source |
| 116 | mov lr, pc |
| 117 | ldr pc, \source |
| 118 | .endm |
| 119 | |
| 120 | |
| 121 | .global dvmCompilerTemplateStart |
| 122 | .type dvmCompilerTemplateStart, %function |
| 123 | .text |
| 124 | |
| 125 | dvmCompilerTemplateStart: |
| 126 | |
| 127 | /* ------------------------------ */ |
| 128 | .balign 4 |
| 129 | .global dvmCompiler_TEMPLATE_CMP_LONG |
| 130 | dvmCompiler_TEMPLATE_CMP_LONG: |
| 131 | /* File: armv5te/TEMPLATE_CMP_LONG.S */ |
| 132 | /* |
| 133 | * Compare two 64-bit values. Puts 0, 1, or -1 into the destination |
| 134 | * register based on the results of the comparison. |
| 135 | * |
| 136 | * We load the full values with LDM, but in practice many values could |
| 137 | * be resolved by only looking at the high word. This could be made |
| 138 | * faster or slower by splitting the LDM into a pair of LDRs. |
| 139 | * |
| 140 | * If we just wanted to set condition flags, we could do this: |
| 141 | * subs ip, r0, r2 |
| 142 | * sbcs ip, r1, r3 |
| 143 | * subeqs ip, r0, r2 |
| 144 | * Leaving { <0, 0, >0 } in ip. However, we have to set it to a specific |
| 145 | * integer value, which we can do with 2 conditional mov/mvn instructions |
| 146 | * (set 1, set -1; if they're equal we already have 0 in ip), giving |
| 147 | * us a constant 5-cycle path plus a branch at the end to the |
| 148 | * instruction epilogue code. The multi-compare approach below needs |
| 149 | * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch |
| 150 | * in the worst case (the 64-bit values are equal). |
| 151 | */ |
| 152 | /* cmp-long vAA, vBB, vCC */ |
| 153 | cmp r1, r3 @ compare (vBB+1, vCC+1) |
| 154 | blt .LTEMPLATE_CMP_LONG_less @ signed compare on high part |
| 155 | bgt .LTEMPLATE_CMP_LONG_greater |
| 156 | subs r0, r0, r2 @ r0<- r0 - r2 |
| 157 | bxeq lr |
| 158 | bhi .LTEMPLATE_CMP_LONG_greater @ unsigned compare on low part |
| 159 | .LTEMPLATE_CMP_LONG_less: |
| 160 | mvn r0, #0 @ r0<- -1 |
| 161 | bx lr |
| 162 | .LTEMPLATE_CMP_LONG_greater: |
| 163 | mov r0, #1 @ r0<- 1 |
| 164 | bx lr |
| 165 | |
| 166 | |
| 167 | /* ------------------------------ */ |
| 168 | .balign 4 |
| 169 | .global dvmCompiler_TEMPLATE_RETURN |
| 170 | dvmCompiler_TEMPLATE_RETURN: |
| 171 | /* File: armv5te/TEMPLATE_RETURN.S */ |
| 172 | /* |
| 173 | * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX. |
| 174 | * If the stored value in returnAddr |
| 175 | * is non-zero, the caller is compiled by the JIT thus return to the |
| 176 | * address in the code cache following the invoke instruction. Otherwise |
| 177 | * return to the special dvmJitToInterpNoChain entry point. |
| 178 | */ |
| 179 | SAVEAREA_FROM_FP(r0, rFP) @ r0<- saveArea (old) |
| 180 | ldr r10, [r0, #offStackSaveArea_prevFrame] @ r10<- saveArea->prevFrame |
| 181 | ldr r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount |
| 182 | ldr rPC, [r0, #offStackSaveArea_savedPc] @ rPC<- saveArea->savedPc |
| 183 | ldr r9, [r0, #offStackSaveArea_returnAddr] @ r9<- chaining cell ret |
| 184 | ldr r2, [r10, #(offStackSaveArea_method - sizeofStackSaveArea)] |
| 185 | @ r2<- method we're returning to |
| 186 | ldr r3, [rGLUE, #offGlue_self] @ r3<- glue->self |
| 187 | cmp r2, #0 @ break frame? |
| 188 | beq 1f @ bail to interpreter |
| 189 | ldr r0, .LdvmJitToInterpNoChain @ defined in footer.S |
| 190 | mov rFP, r10 @ publish new FP |
| 191 | ldrne r10, [r2, #offMethod_clazz] @ r10<- method->clazz |
| 192 | ldr r8, [r8] @ r8<- suspendCount |
| 193 | |
| 194 | str r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method |
| 195 | ldr r1, [r10, #offClassObject_pDvmDex] @ r1<- method->clazz->pDvmDex |
| 196 | str rFP, [r3, #offThread_curFrame] @ self->curFrame = fp |
| 197 | add rPC, rPC, #6 @ publish new rPC (advance 6 bytes) |
| 198 | str r1, [rGLUE, #offGlue_methodClassDex] |
| 199 | cmp r8, #0 @ check the suspendCount |
| 200 | movne r9, #0 @ clear the chaining cell address |
| 201 | cmp r9, #0 @ chaining cell exists? |
| 202 | blxne r9 @ jump to the chaining cell |
| 203 | mov pc, r0 @ callsite is interpreted |
| 204 | 1: |
| 205 | stmia rGLUE, {rPC, rFP} @ SAVE_PC_FP_TO_GLUE() |
| 206 | ldr r2, .LdvmMterpStdBail @ defined in footer.S |
| 207 | mov r1, #0 @ changeInterp = false |
| 208 | mov r0, rGLUE @ Expecting rGLUE in r0 |
| 209 | blx r2 @ exit the interpreter |
| 210 | |
| 211 | /* ------------------------------ */ |
| 212 | .balign 4 |
| 213 | .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT |
| 214 | dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT: |
| 215 | /* File: armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S */ |
| 216 | /* |
| 217 | * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC |
| 218 | * into rPC then jump to dvmJitToInterpNoChain to dispatch the |
| 219 | * runtime-resolved callee. |
| 220 | */ |
| 221 | @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite |
| 222 | ldrh r7, [r0, #offMethod_registersSize] @ r7<- methodToCall->regsSize |
| 223 | ldrh r2, [r0, #offMethod_outsSize] @ r2<- methodToCall->outsSize |
| 224 | ldr r9, [rGLUE, #offGlue_interpStackEnd] @ r9<- interpStackEnd |
| 225 | ldr r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount |
| 226 | add r3, r1, #1 @ Thumb addr is odd |
| 227 | SAVEAREA_FROM_FP(r1, rFP) @ r1<- stack save area |
| 228 | sub r1, r1, r7, lsl #2 @ r1<- newFp (old savearea - regsSize) |
| 229 | SAVEAREA_FROM_FP(r10, r1) @ r10<- stack save area |
| 230 | sub r10, r10, r2, lsl #2 @ r10<- bottom (newsave - outsSize) |
| 231 | ldr r8, [r8] @ r3<- suspendCount (int) |
| 232 | cmp r10, r9 @ bottom < interpStackEnd? |
| 233 | bxlt lr @ return to raise stack overflow excep. |
| 234 | @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite |
| 235 | ldr r9, [r0, #offMethod_clazz] @ r9<- method->clazz |
| 236 | ldr r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags |
| 237 | str rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)] |
| 238 | str rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)] |
| 239 | ldr rPC, [r0, #offMethod_insns] @ rPC<- methodToCall->insns |
| 240 | |
| 241 | |
| 242 | @ set up newSaveArea |
| 243 | str rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)] |
| 244 | str r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)] |
| 245 | str r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)] |
| 246 | cmp r8, #0 @ suspendCount != 0 |
| 247 | bxne lr @ bail to the interpreter |
| 248 | tst r10, #ACC_NATIVE |
| 249 | bne .LinvokeNative |
| 250 | /* |
| 251 | * If we want to punt to the interpreter for native call, swap the bne with |
| 252 | * the following |
| 253 | * bxne lr |
| 254 | */ |
| 255 | |
| 256 | |
| 257 | ldr r10, .LdvmJitToInterpNoChain |
| 258 | ldr r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex |
| 259 | ldr r2, [rGLUE, #offGlue_self] @ r2<- glue->self |
| 260 | |
| 261 | @ Update "glue" values for the new method |
| 262 | str r0, [rGLUE, #offGlue_method] @ glue->method = methodToCall |
| 263 | str r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ... |
| 264 | mov rFP, r1 @ fp = newFp |
| 265 | str rFP, [r2, #offThread_curFrame] @ self->curFrame = newFp |
| 266 | |
| 267 | @ Start executing the callee |
| 268 | mov pc, r10 @ dvmJitToInterpNoChain |
| 269 | |
| 270 | /* ------------------------------ */ |
| 271 | .balign 4 |
| 272 | .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN |
| 273 | dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN: |
| 274 | /* File: armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S */ |
| 275 | /* |
| 276 | * For monomorphic callsite, setup the Dalvik frame and return to the |
| 277 | * Thumb code through the link register to transfer control to the callee |
| 278 | * method through a dedicated chaining cell. |
| 279 | */ |
| 280 | @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite |
| 281 | ldrh r7, [r0, #offMethod_registersSize] @ r7<- methodToCall->regsSize |
| 282 | ldrh r2, [r0, #offMethod_outsSize] @ r2<- methodToCall->outsSize |
| 283 | ldr r9, [rGLUE, #offGlue_interpStackEnd] @ r9<- interpStackEnd |
| 284 | ldr r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount |
| 285 | add r3, r1, #1 @ Thumb addr is odd |
| 286 | SAVEAREA_FROM_FP(r1, rFP) @ r1<- stack save area |
| 287 | sub r1, r1, r7, lsl #2 @ r1<- newFp (old savearea - regsSize) |
| 288 | SAVEAREA_FROM_FP(r10, r1) @ r10<- stack save area |
| 289 | add r12, lr, #2 @ setup the punt-to-interp address |
| 290 | sub r10, r10, r2, lsl #2 @ r10<- bottom (newsave - outsSize) |
| 291 | ldr r8, [r8] @ r3<- suspendCount (int) |
| 292 | cmp r10, r9 @ bottom < interpStackEnd? |
| 293 | bxlt r12 @ return to raise stack overflow excep. |
| 294 | @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite |
| 295 | ldr r9, [r0, #offMethod_clazz] @ r9<- method->clazz |
| 296 | ldr r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags |
| 297 | str rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)] |
| 298 | str rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)] |
| 299 | ldr rPC, [r0, #offMethod_insns] @ rPC<- methodToCall->insns |
| 300 | |
| 301 | |
| 302 | @ set up newSaveArea |
| 303 | str rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)] |
| 304 | str r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)] |
| 305 | str r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)] |
| 306 | cmp r8, #0 @ suspendCount != 0 |
| 307 | bxne r12 @ bail to the interpreter |
| 308 | tst r10, #ACC_NATIVE |
| 309 | bne .LinvokeNative |
| 310 | /* |
| 311 | * If we want to punt to the interpreter for native call, swap the bne with |
| 312 | * the following |
| 313 | * bxne r12 |
| 314 | */ |
| 315 | |
| 316 | |
| 317 | ldr r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex |
| 318 | ldr r2, [rGLUE, #offGlue_self] @ r2<- glue->self |
| 319 | |
| 320 | @ Update "glue" values for the new method |
| 321 | str r0, [rGLUE, #offGlue_method] @ glue->method = methodToCall |
| 322 | str r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ... |
| 323 | mov rFP, r1 @ fp = newFp |
| 324 | str rFP, [r2, #offThread_curFrame] @ self->curFrame = newFp |
| 325 | |
| 326 | bx lr @ return to the callee-chaining cell |
| 327 | |
| 328 | |
| 329 | |
| 330 | /* ------------------------------ */ |
| 331 | .balign 4 |
| 332 | .global dvmCompiler_TEMPLATE_CMPG_DOUBLE |
| 333 | dvmCompiler_TEMPLATE_CMPG_DOUBLE: |
| 334 | /* File: armv5te/TEMPLATE_CMPG_DOUBLE.S */ |
| 335 | /* File: armv5te/TEMPLATE_CMPL_DOUBLE.S */ |
| 336 | /* |
| 337 | * For the JIT: incoming arguments are pointers to the arguments in r0/r1 |
| 338 | * result in r0 |
| 339 | * |
| 340 | * Compare two floating-point values. Puts 0, 1, or -1 into the |
| 341 | * destination register based on the results of the comparison. |
| 342 | * |
| 343 | * Provide a "naninst" instruction that puts 1 or -1 into r1 depending |
| 344 | * on what value we'd like to return when one of the operands is NaN. |
| 345 | * |
| 346 | * See OP_CMPL_FLOAT for an explanation. |
| 347 | * |
| 348 | * For: cmpl-double, cmpg-double |
| 349 | */ |
| 350 | /* op vAA, vBB, vCC */ |
| 351 | mov r4, lr @ save return address |
| 352 | mov r9, r0 @ save copy of &arg1 |
| 353 | mov r10, r1 @ save copy of &arg2 |
| 354 | ldmia r9, {r0-r1} @ r0/r1<- vBB/vBB+1 |
| 355 | ldmia r10, {r2-r3} @ r2/r3<- vCC/vCC+1 |
| 356 | LDR_PC_LR ".L__aeabi_cdcmple" @ PIC way of "bl __aeabi_cdcmple" |
| 357 | bhi .LTEMPLATE_CMPG_DOUBLE_gt_or_nan @ C set and Z clear, disambiguate |
| 358 | mvncc r0, #0 @ (less than) r1<- -1 |
| 359 | moveq r0, #0 @ (equal) r1<- 0, trumps less than |
| 360 | bx r4 |
| 361 | |
| 362 | @ Test for NaN with a second comparison. EABI forbids testing bit |
| 363 | @ patterns, and we can't represent 0x7fc00000 in immediate form, so |
| 364 | @ make the library call. |
| 365 | .LTEMPLATE_CMPG_DOUBLE_gt_or_nan: |
| 366 | ldmia r10, {r0-r1} @ reverse order |
| 367 | ldmia r9, {r2-r3} |
| 368 | LDR_PC_LR ".L__aeabi_cdcmple" @ r0<- Z set if eq, C clear if < |
| 369 | movcc r0, #1 @ (greater than) r1<- 1 |
| 370 | bxcc r4 |
| 371 | mov r0, #1 @ r1<- 1 or -1 for NaN |
| 372 | bx r4 |
| 373 | |
| 374 | |
| 375 | |
| 376 | /* ------------------------------ */ |
| 377 | .balign 4 |
| 378 | .global dvmCompiler_TEMPLATE_CMPL_DOUBLE |
| 379 | dvmCompiler_TEMPLATE_CMPL_DOUBLE: |
| 380 | /* File: armv5te/TEMPLATE_CMPL_DOUBLE.S */ |
| 381 | /* |
| 382 | * For the JIT: incoming arguments are pointers to the arguments in r0/r1 |
| 383 | * result in r0 |
| 384 | * |
| 385 | * Compare two floating-point values. Puts 0, 1, or -1 into the |
| 386 | * destination register based on the results of the comparison. |
| 387 | * |
| 388 | * Provide a "naninst" instruction that puts 1 or -1 into r1 depending |
| 389 | * on what value we'd like to return when one of the operands is NaN. |
| 390 | * |
| 391 | * See OP_CMPL_FLOAT for an explanation. |
| 392 | * |
| 393 | * For: cmpl-double, cmpg-double |
| 394 | */ |
| 395 | /* op vAA, vBB, vCC */ |
| 396 | mov r4, lr @ save return address |
| 397 | mov r9, r0 @ save copy of &arg1 |
| 398 | mov r10, r1 @ save copy of &arg2 |
| 399 | ldmia r9, {r0-r1} @ r0/r1<- vBB/vBB+1 |
| 400 | ldmia r10, {r2-r3} @ r2/r3<- vCC/vCC+1 |
| 401 | LDR_PC_LR ".L__aeabi_cdcmple" @ PIC way of "bl __aeabi_cdcmple" |
| 402 | bhi .LTEMPLATE_CMPL_DOUBLE_gt_or_nan @ C set and Z clear, disambiguate |
| 403 | mvncc r0, #0 @ (less than) r1<- -1 |
| 404 | moveq r0, #0 @ (equal) r1<- 0, trumps less than |
| 405 | bx r4 |
| 406 | |
| 407 | @ Test for NaN with a second comparison. EABI forbids testing bit |
| 408 | @ patterns, and we can't represent 0x7fc00000 in immediate form, so |
| 409 | @ make the library call. |
| 410 | .LTEMPLATE_CMPL_DOUBLE_gt_or_nan: |
| 411 | ldmia r10, {r0-r1} @ reverse order |
| 412 | ldmia r9, {r2-r3} |
| 413 | LDR_PC_LR ".L__aeabi_cdcmple" @ r0<- Z set if eq, C clear if < |
| 414 | movcc r0, #1 @ (greater than) r1<- 1 |
| 415 | bxcc r4 |
| 416 | mvn r0, #0 @ r1<- 1 or -1 for NaN |
| 417 | bx r4 |
| 418 | |
| 419 | |
| 420 | /* ------------------------------ */ |
| 421 | .balign 4 |
| 422 | .global dvmCompiler_TEMPLATE_CMPG_FLOAT |
| 423 | dvmCompiler_TEMPLATE_CMPG_FLOAT: |
| 424 | /* File: armv5te/TEMPLATE_CMPG_FLOAT.S */ |
| 425 | /* File: armv5te/TEMPLATE_CMPL_FLOAT.S */ |
| 426 | /* |
| 427 | * For the JIT: incoming arguments in r0, r1 |
| 428 | * result in r0 |
| 429 | * |
| 430 | * Compare two floating-point values. Puts 0, 1, or -1 into the |
| 431 | * destination register based on the results of the comparison. |
| 432 | * |
| 433 | * Provide a "naninst" instruction that puts 1 or -1 into r1 depending |
| 434 | * on what value we'd like to return when one of the operands is NaN. |
| 435 | * |
| 436 | * The operation we're implementing is: |
| 437 | * if (x == y) |
| 438 | * return 0; |
| 439 | * else if (x < y) |
| 440 | * return -1; |
| 441 | * else if (x > y) |
| 442 | * return 1; |
| 443 | * else |
| 444 | * return {-1,1}; // one or both operands was NaN |
| 445 | * |
| 446 | * The straightforward implementation requires 3 calls to functions |
| 447 | * that return a result in r0. We can do it with two calls if our |
| 448 | * EABI library supports __aeabi_cfcmple (only one if we want to check |
| 449 | * for NaN directly): |
| 450 | * check x <= y |
| 451 | * if <, return -1 |
| 452 | * if ==, return 0 |
| 453 | * check y <= x |
| 454 | * if <, return 1 |
| 455 | * return {-1,1} |
| 456 | * |
| 457 | * for: cmpl-float, cmpg-float |
| 458 | */ |
| 459 | /* op vAA, vBB, vCC */ |
| 460 | mov r4, lr @ save return address |
| 461 | mov r9, r0 @ Save copies - we may need to redo |
| 462 | mov r10, r1 |
| 463 | LDR_PC_LR ".L__aeabi_cfcmple" @ cmp <=: C clear if <, Z set if eq |
| 464 | bhi .LTEMPLATE_CMPG_FLOAT_gt_or_nan @ C set and Z clear, disambiguate |
| 465 | mvncc r0, #0 @ (less than) r0<- -1 |
| 466 | moveq r0, #0 @ (equal) r0<- 0, trumps less than |
| 467 | bx r4 |
| 468 | @ Test for NaN with a second comparison. EABI forbids testing bit |
| 469 | @ patterns, and we can't represent 0x7fc00000 in immediate form, so |
| 470 | @ make the library call. |
| 471 | .LTEMPLATE_CMPG_FLOAT_gt_or_nan: |
| 472 | mov r1, r9 @ reverse order |
| 473 | mov r0, r10 |
| 474 | LDR_PC_LR ".L__aeabi_cfcmple" @ r0<- Z set if eq, C clear if < |
| 475 | movcc r0, #1 @ (greater than) r1<- 1 |
| 476 | bxcc r4 |
| 477 | mov r0, #1 @ r1<- 1 or -1 for NaN |
| 478 | bx r4 |
| 479 | |
| 480 | |
| 481 | |
| 482 | |
| 483 | /* ------------------------------ */ |
| 484 | .balign 4 |
| 485 | .global dvmCompiler_TEMPLATE_CMPL_FLOAT |
| 486 | dvmCompiler_TEMPLATE_CMPL_FLOAT: |
| 487 | /* File: armv5te/TEMPLATE_CMPL_FLOAT.S */ |
| 488 | /* |
| 489 | * For the JIT: incoming arguments in r0, r1 |
| 490 | * result in r0 |
| 491 | * |
| 492 | * Compare two floating-point values. Puts 0, 1, or -1 into the |
| 493 | * destination register based on the results of the comparison. |
| 494 | * |
| 495 | * Provide a "naninst" instruction that puts 1 or -1 into r1 depending |
| 496 | * on what value we'd like to return when one of the operands is NaN. |
| 497 | * |
| 498 | * The operation we're implementing is: |
| 499 | * if (x == y) |
| 500 | * return 0; |
| 501 | * else if (x < y) |
| 502 | * return -1; |
| 503 | * else if (x > y) |
| 504 | * return 1; |
| 505 | * else |
| 506 | * return {-1,1}; // one or both operands was NaN |
| 507 | * |
| 508 | * The straightforward implementation requires 3 calls to functions |
| 509 | * that return a result in r0. We can do it with two calls if our |
| 510 | * EABI library supports __aeabi_cfcmple (only one if we want to check |
| 511 | * for NaN directly): |
| 512 | * check x <= y |
| 513 | * if <, return -1 |
| 514 | * if ==, return 0 |
| 515 | * check y <= x |
| 516 | * if <, return 1 |
| 517 | * return {-1,1} |
| 518 | * |
| 519 | * for: cmpl-float, cmpg-float |
| 520 | */ |
| 521 | /* op vAA, vBB, vCC */ |
| 522 | mov r4, lr @ save return address |
| 523 | mov r9, r0 @ Save copies - we may need to redo |
| 524 | mov r10, r1 |
| 525 | LDR_PC_LR ".L__aeabi_cfcmple" @ cmp <=: C clear if <, Z set if eq |
| 526 | bhi .LTEMPLATE_CMPL_FLOAT_gt_or_nan @ C set and Z clear, disambiguate |
| 527 | mvncc r0, #0 @ (less than) r0<- -1 |
| 528 | moveq r0, #0 @ (equal) r0<- 0, trumps less than |
| 529 | bx r4 |
| 530 | @ Test for NaN with a second comparison. EABI forbids testing bit |
| 531 | @ patterns, and we can't represent 0x7fc00000 in immediate form, so |
| 532 | @ make the library call. |
| 533 | .LTEMPLATE_CMPL_FLOAT_gt_or_nan: |
| 534 | mov r1, r9 @ reverse order |
| 535 | mov r0, r10 |
| 536 | LDR_PC_LR ".L__aeabi_cfcmple" @ r0<- Z set if eq, C clear if < |
| 537 | movcc r0, #1 @ (greater than) r1<- 1 |
| 538 | bxcc r4 |
| 539 | mvn r0, #0 @ r1<- 1 or -1 for NaN |
| 540 | bx r4 |
| 541 | |
| 542 | |
| 543 | |
| 544 | /* ------------------------------ */ |
| 545 | .balign 4 |
| 546 | .global dvmCompiler_TEMPLATE_MUL_LONG |
| 547 | dvmCompiler_TEMPLATE_MUL_LONG: |
| 548 | /* File: armv5te/TEMPLATE_MUL_LONG.S */ |
| 549 | /* |
| 550 | * Signed 64-bit integer multiply. |
| 551 | * |
| 552 | * For JIT: op1 in r0/r1, op2 in r2/r3, return in r0/r1 |
| 553 | * |
| 554 | * Consider WXxYZ (r1r0 x r3r2) with a long multiply: |
| 555 | * WX |
| 556 | * x YZ |
| 557 | * -------- |
| 558 | * ZW ZX |
| 559 | * YW YX |
| 560 | * |
| 561 | * The low word of the result holds ZX, the high word holds |
| 562 | * (ZW+YX) + (the high overflow from ZX). YW doesn't matter because |
| 563 | * it doesn't fit in the low 64 bits. |
| 564 | * |
| 565 | * Unlike most ARM math operations, multiply instructions have |
| 566 | * restrictions on using the same register more than once (Rd and Rm |
| 567 | * cannot be the same). |
| 568 | */ |
| 569 | /* mul-long vAA, vBB, vCC */ |
| 570 | mul ip, r2, r1 @ ip<- ZxW |
| 571 | umull r9, r10, r2, r0 @ r9/r10 <- ZxX |
| 572 | mla r2, r0, r3, ip @ r2<- YxX + (ZxW) |
| 573 | add r10, r2, r10 @ r10<- r10 + low(ZxW + (YxX)) |
| 574 | mov r0,r9 |
| 575 | mov r1,r10 |
| 576 | bx lr |
| 577 | |
| 578 | /* ------------------------------ */ |
| 579 | .balign 4 |
| 580 | .global dvmCompiler_TEMPLATE_SHL_LONG |
| 581 | dvmCompiler_TEMPLATE_SHL_LONG: |
| 582 | /* File: armv5te/TEMPLATE_SHL_LONG.S */ |
| 583 | /* |
| 584 | * Long integer shift. This is different from the generic 32/64-bit |
| 585 | * binary operations because vAA/vBB are 64-bit but vCC (the shift |
| 586 | * distance) is 32-bit. Also, Dalvik requires us to ignore all but the low |
| 587 | * 6 bits. |
| 588 | */ |
| 589 | /* shl-long vAA, vBB, vCC */ |
| 590 | and r2, r2, #63 @ r2<- r2 & 0x3f |
| 591 | mov r1, r1, asl r2 @ r1<- r1 << r2 |
| 592 | rsb r3, r2, #32 @ r3<- 32 - r2 |
| 593 | orr r1, r1, r0, lsr r3 @ r1<- r1 | (r0 << (32-r2)) |
| 594 | subs ip, r2, #32 @ ip<- r2 - 32 |
| 595 | movpl r1, r0, asl ip @ if r2 >= 32, r1<- r0 << (r2-32) |
| 596 | mov r0, r0, asl r2 @ r0<- r0 << r2 |
| 597 | bx lr |
| 598 | |
| 599 | /* ------------------------------ */ |
| 600 | .balign 4 |
| 601 | .global dvmCompiler_TEMPLATE_SHR_LONG |
| 602 | dvmCompiler_TEMPLATE_SHR_LONG: |
| 603 | /* File: armv5te/TEMPLATE_SHR_LONG.S */ |
| 604 | /* |
| 605 | * Long integer shift. This is different from the generic 32/64-bit |
| 606 | * binary operations because vAA/vBB are 64-bit but vCC (the shift |
| 607 | * distance) is 32-bit. Also, Dalvik requires us to ignore all but the low |
| 608 | * 6 bits. |
| 609 | */ |
| 610 | /* shr-long vAA, vBB, vCC */ |
| 611 | and r2, r2, #63 @ r0<- r0 & 0x3f |
| 612 | mov r0, r0, lsr r2 @ r0<- r2 >> r2 |
| 613 | rsb r3, r2, #32 @ r3<- 32 - r2 |
| 614 | orr r0, r0, r1, asl r3 @ r0<- r0 | (r1 << (32-r2)) |
| 615 | subs ip, r2, #32 @ ip<- r2 - 32 |
| 616 | movpl r0, r1, asr ip @ if r2 >= 32, r0<-r1 >> (r2-32) |
| 617 | mov r1, r1, asr r2 @ r1<- r1 >> r2 |
| 618 | bx lr |
| 619 | |
| 620 | |
| 621 | /* ------------------------------ */ |
| 622 | .balign 4 |
| 623 | .global dvmCompiler_TEMPLATE_USHR_LONG |
| 624 | dvmCompiler_TEMPLATE_USHR_LONG: |
| 625 | /* File: armv5te/TEMPLATE_USHR_LONG.S */ |
| 626 | /* |
| 627 | * Long integer shift. This is different from the generic 32/64-bit |
| 628 | * binary operations because vAA/vBB are 64-bit but vCC (the shift |
| 629 | * distance) is 32-bit. Also, Dalvik requires us to ignore all but the low |
| 630 | * 6 bits. |
| 631 | */ |
| 632 | /* ushr-long vAA, vBB, vCC */ |
| 633 | and r2, r2, #63 @ r0<- r0 & 0x3f |
| 634 | mov r0, r0, lsr r2 @ r0<- r2 >> r2 |
| 635 | rsb r3, r2, #32 @ r3<- 32 - r2 |
| 636 | orr r0, r0, r1, asl r3 @ r0<- r0 | (r1 << (32-r2)) |
| 637 | subs ip, r2, #32 @ ip<- r2 - 32 |
| 638 | movpl r0, r1, lsr ip @ if r2 >= 32, r0<-r1 >>> (r2-32) |
| 639 | mov r1, r1, lsr r2 @ r1<- r1 >>> r2 |
| 640 | bx lr |
| 641 | |
| 642 | |
| 643 | .size dvmCompilerTemplateStart, .-dvmCompilerTemplateStart |
| 644 | /* File: armv5te/footer.S */ |
| 645 | /* |
| 646 | * =========================================================================== |
| 647 | * Common subroutines and data |
| 648 | * =========================================================================== |
| 649 | */ |
| 650 | |
| 651 | .text |
| 652 | .align 2 |
| 653 | .LinvokeNative: |
| 654 | @ Prep for the native call |
| 655 | @ r1 = newFP, r0 = methodToCall |
| 656 | ldr r3, [rGLUE, #offGlue_self] @ r3<- glue->self |
| 657 | ldr r9, [r3, #offThread_jniLocal_nextEntry] @ r9<- thread->refNext |
| 658 | str r1, [r3, #offThread_curFrame] @ self->curFrame = newFp |
| 659 | str r9, [r1, #(offStackSaveArea_localRefTop - sizeofStackSaveArea)] |
| 660 | @ newFp->localRefTop=refNext |
| 661 | mov r9, r3 @ r9<- glue->self (preserve) |
| 662 | SAVEAREA_FROM_FP(r10, r1) @ r10<- new stack save area |
| 663 | |
| 664 | mov r2, r0 @ r2<- methodToCall |
| 665 | mov r0, r1 @ r0<- newFP |
| 666 | add r1, rGLUE, #offGlue_retval @ r1<- &retval |
| 667 | |
| 668 | LDR_PC_LR "[r2, #offMethod_nativeFunc]" |
| 669 | |
| 670 | @ native return; r9=self, r10=newSaveArea |
| 671 | @ equivalent to dvmPopJniLocals |
| 672 | ldr r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret |
| 673 | ldr r0, [r10, #offStackSaveArea_localRefTop] @ r0<- newSave->localRefTop |
| 674 | ldr r1, [r9, #offThread_exception] @ check for exception |
| 675 | str rFP, [r9, #offThread_curFrame] @ self->curFrame = fp |
| 676 | cmp r1, #0 @ null? |
| 677 | str r0, [r9, #offThread_jniLocal_nextEntry] @ self->refNext<- r0 |
| 678 | bne .LhandleException @ no, handle exception |
| 679 | bx r2 |
| 680 | |
| 681 | /* FIXME - untested */ |
| 682 | .LhandleException: |
| 683 | ldr rIBASE, .LdvmAsmInstructionStart |
| 684 | ldr rPC, [r10, #offStackSaveArea_savedPc] @ reload rPC |
| 685 | b dvmMterpCommonExceptionThrown |
| 686 | |
| 687 | .align 2 |
| 688 | .LdvmAsmInstructionStart: |
| 689 | .word dvmAsmInstructionStart |
| 690 | .LdvmJitToInterpNoChain: |
| 691 | .word dvmJitToInterpNoChain |
| 692 | .LdvmMterpStdBail: |
| 693 | .word dvmMterpStdBail |
| 694 | .L__aeabi_cdcmple: |
| 695 | .word __aeabi_cdcmple |
| 696 | .L__aeabi_cfcmple: |
| 697 | .word __aeabi_cfcmple |
| 698 | |
| 699 | .global dmvCompilerTemplateEnd |
| 700 | dmvCompilerTemplateEnd: |
| 701 | |
| 702 | #endif /* WITH_JIT */ |
| 703 | |