sewardj | 7525c82 | 2006-10-17 01:06:44 +0000 | [diff] [blame] | 1 | |
| 2 | /*--------------------------------------------------------------------*/ |
| 3 | /*--- The core dispatch loop, for jumping to a code address. ---*/ |
| 4 | /*--- dispatch-ppc64-aix5.S ---*/ |
| 5 | /*--------------------------------------------------------------------*/ |
| 6 | |
| 7 | /* |
| 8 | This file is part of Valgrind, a dynamic binary instrumentation |
| 9 | framework. |
| 10 | |
sewardj | 4d474d0 | 2008-02-11 11:34:59 +0000 | [diff] [blame] | 11 | Copyright (C) 2006-2008 OpenWorks LLP |
sewardj | 7525c82 | 2006-10-17 01:06:44 +0000 | [diff] [blame] | 12 | info@open-works.co.uk |
| 13 | |
| 14 | This program is free software; you can redistribute it and/or |
| 15 | modify it under the terms of the GNU General Public License as |
| 16 | published by the Free Software Foundation; either version 2 of the |
| 17 | License, or (at your option) any later version. |
| 18 | |
| 19 | This program is distributed in the hope that it will be useful, but |
| 20 | WITHOUT ANY WARRANTY; without even the implied warranty of |
| 21 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 22 | General Public License for more details. |
| 23 | |
| 24 | You should have received a copy of the GNU General Public License |
| 25 | along with this program; if not, write to the Free Software |
| 26 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
| 27 | 02111-1307, USA. |
| 28 | |
| 29 | The GNU General Public License is contained in the file COPYING. |
sewardj | 38dba99 | 2007-04-29 09:06:40 +0000 | [diff] [blame] | 30 | |
| 31 | Neither the names of the U.S. Department of Energy nor the |
| 32 | University of California nor the names of its contributors may be |
| 33 | used to endorse or promote products derived from this software |
| 34 | without prior written permission. |
sewardj | 7525c82 | 2006-10-17 01:06:44 +0000 | [diff] [blame] | 35 | */ |
| 36 | |
| 37 | #include "pub_core_basics_asm.h" |
| 38 | #include "pub_core_dispatch_asm.h" |
| 39 | #include "pub_core_transtab_asm.h" |
| 40 | #include "libvex_guest_offsets.h" /* for OFFSET_ppc64_CIA */ |
| 41 | |
| 42 | |
| 43 | /*------------------------------------------------------------*/ |
| 44 | /*--- ---*/ |
| 45 | /*--- The dispatch loop. VG_(run_innerloop) is used to ---*/ |
| 46 | /*--- run all translations except no-redir ones. ---*/ |
| 47 | /*--- ---*/ |
| 48 | /*------------------------------------------------------------*/ |
| 49 | |
| 50 | /*----------------------------------------------------*/ |
| 51 | /*--- Incomprehensible TOC mumbo-jumbo nonsense. ---*/ |
| 52 | /*----------------------------------------------------*/ |
| 53 | |
| 54 | /* No, I don't have a clue either. I just compiled a bit of |
| 55 | C with gcc and copied the assembly code it produced. */ |
| 56 | |
| 57 | /* Basically "ld rd, tocent__foo(2)" gets &foo into rd. */ |
| 58 | |
| 59 | .file "dispatch-ppc64-aix5.S" |
| 60 | .machine "ppc64" |
| 61 | .toc |
| 62 | .csect .text[PR] |
| 63 | .toc |
| 64 | tocent__vgPlain_dispatch_ctr: |
| 65 | .tc vgPlain_dispatch_ctr[TC],vgPlain_dispatch_ctr[RW] |
| 66 | tocent__vgPlain_machine_ppc64_has_VMX: |
| 67 | .tc vgPlain_machine_ppc64_has_VMX[TC],vgPlain_machine_ppc64_has_VMX[RW] |
| 68 | tocent__vgPlain_tt_fast: |
| 69 | .tc vgPlain_tt_fast[TC],vgPlain_tt_fast[RW] |
| 70 | tocent__vgPlain_tt_fastN: |
| 71 | .tc vgPlain_tt_fast[TC],vgPlain_tt_fastN[RW] |
| 72 | .csect .text[PR] |
| 73 | .align 2 |
| 74 | .globl vgPlain_run_innerloop |
| 75 | .globl .vgPlain_run_innerloop |
| 76 | .csect vgPlain_run_innerloop[DS] |
| 77 | vgPlain_run_innerloop: |
| 78 | .llong .vgPlain_run_innerloop, TOC[tc0], 0 |
| 79 | .csect .text[PR] |
| 80 | |
| 81 | /*----------------------------------------------------*/ |
| 82 | /*--- Preamble (set everything up) ---*/ |
| 83 | /*----------------------------------------------------*/ |
| 84 | |
| 85 | /* signature: |
| 86 | UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling ); |
| 87 | */ |
| 88 | .vgPlain_run_innerloop: |
| 89 | |
| 90 | /* r3 holds guest_state */ |
| 91 | /* r4 holds do_profiling */ |
| 92 | /* Rather than attempt to make sense of the AIX ABI, just |
| 93 | drop r1 by 512 (to get away from the caller's frame), then |
| 94 | 1024 (to give ourselves a 1024-byte save area), and then |
| 95 | another 512 (to clear our save area). In all, drop r1 by 2048 |
| 96 | and dump stuff on the stack at 512(1)..1536(1). */ |
| 97 | |
| 98 | /* ----- entry point to VG_(run_innerloop) ----- */ |
| 99 | /* For AIX/ppc64 we do: LR-> +16(parent_sp), CR-> +8(parent_sp) */ |
| 100 | |
| 101 | /* Save lr and cr*/ |
| 102 | mflr 0 |
| 103 | std 0,16(1) |
| 104 | mfcr 0 |
| 105 | std 0,8(1) |
| 106 | |
| 107 | /* New stack frame */ |
| 108 | stdu 1,-2048(1) /* sp should maintain 16-byte alignment */ |
| 109 | |
| 110 | /* Save callee-saved registers... */ |
| 111 | /* r3, r4 are live here, so use r5 */ |
| 112 | |
| 113 | /* Floating-point reg save area : 144 bytes at r1[256+256..256+399] */ |
| 114 | stfd 31,256+392(1) |
| 115 | stfd 30,256+384(1) |
| 116 | stfd 29,256+376(1) |
| 117 | stfd 28,256+368(1) |
| 118 | stfd 27,256+360(1) |
| 119 | stfd 26,256+352(1) |
| 120 | stfd 25,256+344(1) |
| 121 | stfd 24,256+336(1) |
| 122 | stfd 23,256+328(1) |
| 123 | stfd 22,256+320(1) |
| 124 | stfd 21,256+312(1) |
| 125 | stfd 20,256+304(1) |
| 126 | stfd 19,256+296(1) |
| 127 | stfd 18,256+288(1) |
| 128 | stfd 17,256+280(1) |
| 129 | stfd 16,256+272(1) |
| 130 | stfd 15,256+264(1) |
| 131 | stfd 14,256+256(1) |
| 132 | |
| 133 | /* General reg save area : 76 bytes at r1[256+400 .. 256+543] */ |
| 134 | std 31,256+544(1) |
| 135 | std 30,256+536(1) |
| 136 | std 29,256+528(1) |
| 137 | std 28,256+520(1) |
| 138 | std 27,256+512(1) |
| 139 | std 26,256+504(1) |
| 140 | std 25,256+496(1) |
| 141 | std 24,256+488(1) |
| 142 | std 23,256+480(1) |
| 143 | std 22,256+472(1) |
| 144 | std 21,256+464(1) |
| 145 | std 20,256+456(1) |
| 146 | std 19,256+448(1) |
| 147 | std 18,256+440(1) |
| 148 | std 17,256+432(1) |
| 149 | std 16,256+424(1) |
| 150 | std 15,256+416(1) |
| 151 | std 14,256+408(1) |
| 152 | /* Probably not necessary to save r13 (thread-specific ptr), |
| 153 | as VEX stays clear of it... but what the hell. */ |
| 154 | std 13,256+400(1) |
| 155 | |
| 156 | /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI. |
| 157 | The Linux kernel might not actually use VRSAVE for its intended |
| 158 | purpose, but it should be harmless to preserve anyway. */ |
| 159 | /* r3, r4 are live here, so use r5 */ |
| 160 | ld 5,tocent__vgPlain_machine_ppc64_has_VMX(2) |
| 161 | ld 5,0(5) |
| 162 | cmpldi 5,0 |
| 163 | beq LafterVMX1 |
| 164 | |
| 165 | // Sigh. AIX 5.2 has no idea that Altivec exists. |
| 166 | // /* VRSAVE save word : 4 bytes at r1[476 .. 479] */ |
| 167 | // mfspr 5,256 /* vrsave reg is spr number 256 */ |
| 168 | // stw 5,476(1) |
| 169 | // |
| 170 | // /* Vector reg save area (quadword aligned): |
| 171 | // 192 bytes at r1[480 .. 671] */ |
| 172 | // li 5,656 |
| 173 | // stvx 31,5,1 |
| 174 | // li 5,640 |
| 175 | // stvx 30,5,1 |
| 176 | // li 5,624 |
| 177 | // stvx 29,5,1 |
| 178 | // li 5,608 |
| 179 | // stvx 28,5,1 |
| 180 | // li 5,592 |
| 181 | // stvx 27,5,1 |
| 182 | // li 5,576 |
| 183 | // stvx 26,5,1 |
| 184 | // li 5,560 |
| 185 | // stvx 25,5,1 |
| 186 | // li 5,544 |
| 187 | // stvx 25,5,1 |
| 188 | // li 5,528 |
| 189 | // stvx 23,5,1 |
| 190 | // li 5,512 |
| 191 | // stvx 22,5,1 |
| 192 | // li 5,496 |
| 193 | // stvx 21,5,1 |
| 194 | // li 5,480 |
| 195 | // stvx 20,5,1 |
| 196 | LafterVMX1: |
| 197 | |
| 198 | /* Local variable space... */ |
| 199 | /* Put the original guest state pointer at r1[256]. We |
| 200 | will need to refer to it each time round the dispatch loop. |
| 201 | Apart from that, we can use r1[0 .. 255] and r1[264 .. 511] |
| 202 | as scratch space. */ |
| 203 | |
| 204 | /* r3 holds guest_state */ |
| 205 | /* r4 holds do_profiling */ |
| 206 | mr 31,3 /* r31 (generated code gsp) = r3 */ |
| 207 | std 3,256(1) /* stash orig guest_state ptr */ |
| 208 | |
| 209 | /* hold dispatch_ctr (NOTE: 32-bit value) in r29 */ |
| 210 | ld 5,tocent__vgPlain_dispatch_ctr(2) |
sewardj | 7373582 | 2007-02-20 19:23:19 +0000 | [diff] [blame] | 211 | lwz 29,0(5) /* 32-bit zero-extending load */ |
sewardj | 7525c82 | 2006-10-17 01:06:44 +0000 | [diff] [blame] | 212 | |
| 213 | /* set host FPU control word to the default mode expected |
| 214 | by VEX-generated code. See comments in libvex.h for |
| 215 | more info. */ |
| 216 | /* get zero into f3 (tedious) */ |
| 217 | /* note: fsub 3,3,3 is not a reliable way to do this, |
| 218 | since if f3 holds a NaN or similar then we don't necessarily |
| 219 | wind up with zero. */ |
| 220 | li 5,0 |
| 221 | std 5,128(1) /* r1[128] is scratch */ |
| 222 | lfd 3,128(1) |
| 223 | mtfsf 0xFF,3 /* fpscr = f3 */ |
| 224 | |
| 225 | /* set host AltiVec control word to the default mode expected |
| 226 | by VEX-generated code. */ |
| 227 | ld 5,tocent__vgPlain_machine_ppc64_has_VMX(2) |
| 228 | ld 5,0(5) |
| 229 | cmpldi 5,0 |
| 230 | beq LafterVMX2 |
| 231 | |
| 232 | // Sigh. AIX 5.2 has no idea that Altivec exists. |
| 233 | // vspltisw 3,0x0 /* generate zero */ |
| 234 | // mtvscr 3 |
| 235 | LafterVMX2: |
| 236 | |
| 237 | /* fetch %CIA into r3 */ |
| 238 | ld 3,OFFSET_ppc64_CIA(31) |
| 239 | |
| 240 | /* fall into main loop (the right one) */ |
| 241 | /* r4 = do_profiling. It's probably trashed after here, |
| 242 | but that's OK: we don't need it after here. */ |
| 243 | cmpldi 4,0 |
| 244 | beq VG_(run_innerloop__dispatch_unprofiled) |
| 245 | b VG_(run_innerloop__dispatch_profiled) |
| 246 | /*NOTREACHED*/ |
| 247 | |
| 248 | /*----------------------------------------------------*/ |
| 249 | /*--- NO-PROFILING (standard) dispatcher ---*/ |
| 250 | /*----------------------------------------------------*/ |
| 251 | |
| 252 | .globl VG_(run_innerloop__dispatch_unprofiled) |
| 253 | VG_(run_innerloop__dispatch_unprofiled): |
| 254 | /* At entry: Live regs: |
| 255 | r1 (=sp) |
| 256 | r3 (=CIA = next guest address) |
| 257 | r29 (=dispatch_ctr) |
| 258 | r31 (=guest_state) |
| 259 | Stack state: |
| 260 | 256(r1) (=orig guest_state) |
| 261 | */ |
| 262 | |
| 263 | /* Has the guest state pointer been messed with? If yes, exit. */ |
| 264 | ld 5,256(1) /* original guest_state ptr */ |
| 265 | cmpd 5,31 |
sewardj | 7373582 | 2007-02-20 19:23:19 +0000 | [diff] [blame] | 266 | ld 5,tocent__vgPlain_tt_fast(2) /* &VG_(tt_fast) */ |
sewardj | 7525c82 | 2006-10-17 01:06:44 +0000 | [diff] [blame] | 267 | bne gsp_changed |
| 268 | |
| 269 | /* save the jump address in the guest state */ |
| 270 | std 3,OFFSET_ppc64_CIA(31) |
| 271 | |
| 272 | /* Are we out of timeslice? If yes, defer to scheduler. */ |
| 273 | addi 29,29,-1 |
| 274 | cmplwi 29,0 /* yes, lwi - is 32-bit */ |
| 275 | beq counter_is_zero |
| 276 | |
| 277 | /* try a fast lookup in the translation cache */ |
sewardj | 7373582 | 2007-02-20 19:23:19 +0000 | [diff] [blame] | 278 | /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry) |
| 279 | = ((r3 >>u 2) & VG_TT_FAST_MASK) << 4 */ |
| 280 | rldicl 4,3, 62, 64-VG_TT_FAST_BITS /* entry# */ |
| 281 | sldi 4,4,4 /* entry# * sizeof(FastCacheEntry) */ |
| 282 | add 5,5,4 /* &VG_(tt_fast)[entry#] */ |
| 283 | ld 6,0(5) /* .guest */ |
| 284 | ld 7,8(5) /* .host */ |
sewardj | 7525c82 | 2006-10-17 01:06:44 +0000 | [diff] [blame] | 285 | cmpd 3,6 |
| 286 | bne fast_lookup_failed |
| 287 | |
sewardj | 7373582 | 2007-02-20 19:23:19 +0000 | [diff] [blame] | 288 | /* Found a match. Call .host. */ |
| 289 | mtctr 7 |
sewardj | 7525c82 | 2006-10-17 01:06:44 +0000 | [diff] [blame] | 290 | bctrl |
| 291 | |
| 292 | /* On return from guest code: |
| 293 | r3 holds destination (original) address. |
| 294 | r31 may be unchanged (guest_state), or may indicate further |
| 295 | details of the control transfer requested to *r3. |
| 296 | */ |
sewardj | 7525c82 | 2006-10-17 01:06:44 +0000 | [diff] [blame] | 297 | /* start over */ |
| 298 | b VG_(run_innerloop__dispatch_unprofiled) |
| 299 | /*NOTREACHED*/ |
| 300 | |
| 301 | /*----------------------------------------------------*/ |
| 302 | /*--- PROFILING dispatcher (can be much slower) ---*/ |
| 303 | /*----------------------------------------------------*/ |
| 304 | |
| 305 | .globl VG_(run_innerloop__dispatch_profiled) |
| 306 | VG_(run_innerloop__dispatch_profiled): |
| 307 | /* At entry: Live regs: |
| 308 | r1 (=sp) |
| 309 | r3 (=CIA = next guest address) |
| 310 | r29 (=dispatch_ctr) |
| 311 | r31 (=guest_state) |
| 312 | Stack state: |
| 313 | 256(r1) (=orig guest_state) |
| 314 | */ |
| 315 | |
| 316 | /* Has the guest state pointer been messed with? If yes, exit. */ |
| 317 | ld 5,256(1) /* original guest_state ptr */ |
| 318 | cmpd 5,31 |
sewardj | 7373582 | 2007-02-20 19:23:19 +0000 | [diff] [blame] | 319 | ld 5,tocent__vgPlain_tt_fast(2) /* &VG_(tt_fast) */ |
sewardj | 7525c82 | 2006-10-17 01:06:44 +0000 | [diff] [blame] | 320 | bne gsp_changed |
| 321 | |
| 322 | /* save the jump address in the guest state */ |
| 323 | std 3,OFFSET_ppc64_CIA(31) |
| 324 | |
| 325 | /* Are we out of timeslice? If yes, defer to scheduler. */ |
| 326 | addi 29,29,-1 |
| 327 | cmplwi 29,0 /* yes, lwi - is 32-bit */ |
| 328 | beq counter_is_zero |
| 329 | |
| 330 | /* try a fast lookup in the translation cache */ |
sewardj | 7373582 | 2007-02-20 19:23:19 +0000 | [diff] [blame] | 331 | /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry) |
| 332 | = ((r3 >>u 2) & VG_TT_FAST_MASK) << 4 */ |
| 333 | rldicl 4,3, 62, 64-VG_TT_FAST_BITS /* entry# */ |
| 334 | sldi 4,4,4 /* entry# * sizeof(FastCacheEntry) */ |
| 335 | add 5,5,4 /* &VG_(tt_fast)[entry#] */ |
| 336 | ld 6,0(5) /* .guest */ |
| 337 | ld 7,8(5) /* .host */ |
sewardj | 7525c82 | 2006-10-17 01:06:44 +0000 | [diff] [blame] | 338 | cmpd 3,6 |
| 339 | bne fast_lookup_failed |
| 340 | |
| 341 | /* increment bb profile counter */ |
| 342 | ld 9,tocent__vgPlain_tt_fastN(2) /* r9 = &tt_fastN */ |
sewardj | 7373582 | 2007-02-20 19:23:19 +0000 | [diff] [blame] | 343 | srdi 4,4,1 /* entry# * sizeof(UInt*) */ |
| 344 | ldx 8,9,4 /* r7 = tt_fastN[r4] */ |
| 345 | lwz 10,0(8) |
sewardj | 7525c82 | 2006-10-17 01:06:44 +0000 | [diff] [blame] | 346 | addi 10,10,1 |
sewardj | 7373582 | 2007-02-20 19:23:19 +0000 | [diff] [blame] | 347 | stw 10,0(8) |
sewardj | 7525c82 | 2006-10-17 01:06:44 +0000 | [diff] [blame] | 348 | |
sewardj | 7373582 | 2007-02-20 19:23:19 +0000 | [diff] [blame] | 349 | /* Found a match. Call .host. */ |
| 350 | mtctr 7 |
sewardj | 7525c82 | 2006-10-17 01:06:44 +0000 | [diff] [blame] | 351 | bctrl |
| 352 | |
| 353 | /* On return from guest code: |
| 354 | r3 holds destination (original) address. |
| 355 | r31 may be unchanged (guest_state), or may indicate further |
| 356 | details of the control transfer requested to *r3. |
| 357 | */ |
sewardj | 7525c82 | 2006-10-17 01:06:44 +0000 | [diff] [blame] | 358 | /* start over */ |
| 359 | b VG_(run_innerloop__dispatch_profiled) |
| 360 | /*NOTREACHED*/ |
| 361 | |
| 362 | /*----------------------------------------------------*/ |
| 363 | /*--- exit points ---*/ |
| 364 | /*----------------------------------------------------*/ |
| 365 | |
| 366 | gsp_changed: |
| 367 | /* Someone messed with the gsp (in r31). Have to |
| 368 | defer to scheduler to resolve this. dispatch ctr |
| 369 | is not yet decremented, so no need to increment. */ |
| 370 | /* %CIA is NOT up to date here. First, need to write |
| 371 | %r3 back to %CIA, but without trashing %r31 since |
| 372 | that holds the value we want to return to the scheduler. |
| 373 | Hence use %r5 transiently for the guest state pointer. */ |
| 374 | ld 5,256(1) /* original guest_state ptr */ |
| 375 | std 3,OFFSET_ppc64_CIA(5) |
| 376 | mr 3,31 /* r3 = new gsp value */ |
| 377 | b run_innerloop_exit |
| 378 | /*NOTREACHED*/ |
| 379 | |
| 380 | counter_is_zero: |
| 381 | /* %CIA is up to date */ |
| 382 | /* back out decrement of the dispatch counter */ |
| 383 | addi 29,29,1 |
| 384 | li 3,VG_TRC_INNER_COUNTERZERO |
| 385 | b run_innerloop_exit |
| 386 | |
| 387 | fast_lookup_failed: |
| 388 | /* %CIA is up to date */ |
| 389 | /* back out decrement of the dispatch counter */ |
| 390 | addi 29,29,1 |
| 391 | li 3,VG_TRC_INNER_FASTMISS |
| 392 | b run_innerloop_exit |
| 393 | |
| 394 | |
| 395 | |
| 396 | /* All exits from the dispatcher go through here. |
| 397 | r3 holds the return value. |
| 398 | */ |
| 399 | run_innerloop_exit: |
| 400 | /* We're leaving. Check that nobody messed with |
| 401 | VSCR or FPSCR. */ |
| 402 | |
| 403 | /* Set fpscr back to a known state, since vex-generated code |
| 404 | may have messed with fpscr[rm]. */ |
| 405 | li 5,0 |
| 406 | std 5,128(1) /* r1[128] is scratch */ |
| 407 | lfd 3,128(1) |
| 408 | mtfsf 0xFF,3 /* fpscr = f3 */ |
| 409 | |
| 410 | /* Using r11 - value used again further on, so don't trash! */ |
| 411 | ld 11,tocent__vgPlain_machine_ppc64_has_VMX(2) |
| 412 | ld 11,0(11) |
| 413 | cmpldi 11,0 |
| 414 | beq LafterVMX8 |
| 415 | |
| 416 | // Sigh. AIX 5.2 has no idea that Altivec exists. |
| 417 | // /* Check VSCR[NJ] == 1 */ |
| 418 | // /* first generate 4x 0x00010000 */ |
| 419 | // vspltisw 4,0x1 /* 4x 0x00000001 */ |
| 420 | // vspltisw 5,0x0 /* zero */ |
| 421 | // vsldoi 6,4,5,0x2 /* <<2*8 => 4x 0x00010000 */ |
| 422 | // /* retrieve VSCR and mask wanted bits */ |
| 423 | // mfvscr 7 |
| 424 | // vand 7,7,6 /* gives NJ flag */ |
| 425 | // vspltw 7,7,0x3 /* flags-word to all lanes */ |
| 426 | // vcmpequw. 8,6,7 /* CR[24] = 1 if v6 == v7 */ |
| 427 | // bt 24,invariant_violation /* branch if all_equal */ |
| 428 | LafterVMX8: |
| 429 | |
| 430 | /* otherwise we're OK */ |
| 431 | b run_innerloop_exit_REALLY |
| 432 | |
| 433 | |
| 434 | invariant_violation: |
| 435 | li 3,VG_TRC_INVARIANT_FAILED |
| 436 | b run_innerloop_exit_REALLY |
| 437 | |
| 438 | run_innerloop_exit_REALLY: |
| 439 | /* r3 holds VG_TRC_* value to return */ |
| 440 | |
| 441 | /* Write ctr to VG(dispatch_ctr) */ |
| 442 | ld 5,tocent__vgPlain_dispatch_ctr(2) |
| 443 | stw 29,0(5) /* yes, really stw */ |
| 444 | |
| 445 | /* Restore callee-saved registers... */ |
| 446 | |
| 447 | /* Floating-point regs */ |
| 448 | lfd 31,256+392(1) |
| 449 | lfd 30,256+384(1) |
| 450 | lfd 29,256+376(1) |
| 451 | lfd 28,256+368(1) |
| 452 | lfd 27,256+360(1) |
| 453 | lfd 26,256+352(1) |
| 454 | lfd 25,256+344(1) |
| 455 | lfd 24,256+336(1) |
| 456 | lfd 23,256+328(1) |
| 457 | lfd 22,256+320(1) |
| 458 | lfd 21,256+312(1) |
| 459 | lfd 20,256+304(1) |
| 460 | lfd 19,256+296(1) |
| 461 | lfd 18,256+288(1) |
| 462 | lfd 17,256+280(1) |
| 463 | lfd 16,256+272(1) |
| 464 | lfd 15,256+264(1) |
| 465 | lfd 14,256+256(1) |
| 466 | |
| 467 | /* General regs */ |
| 468 | ld 31,256+544(1) |
| 469 | ld 30,256+536(1) |
| 470 | ld 29,256+528(1) |
| 471 | ld 28,256+520(1) |
| 472 | ld 27,256+512(1) |
| 473 | ld 26,256+504(1) |
| 474 | ld 25,256+496(1) |
| 475 | ld 24,256+488(1) |
| 476 | ld 23,256+480(1) |
| 477 | ld 22,256+472(1) |
| 478 | ld 21,256+464(1) |
| 479 | ld 20,256+456(1) |
| 480 | ld 19,256+448(1) |
| 481 | ld 18,256+440(1) |
| 482 | ld 17,256+432(1) |
| 483 | ld 16,256+424(1) |
| 484 | ld 15,256+416(1) |
| 485 | ld 14,256+408(1) |
| 486 | ld 13,256+400(1) |
| 487 | |
| 488 | /* r11 already holds VG_(machine_ppc64_has_VMX) value */ |
| 489 | cmpldi 11,0 |
| 490 | beq LafterVMX9 |
| 491 | |
| 492 | // Sigh. AIX 5.2 has no idea that Altivec exists. |
| 493 | // /* VRSAVE */ |
| 494 | // lwz 4,476(1) |
| 495 | // mtspr 4,256 /* VRSAVE reg is spr number 256 */ |
| 496 | // |
| 497 | // /* Vector regs */ |
| 498 | // li 4,656 |
| 499 | // lvx 31,4,1 |
| 500 | // li 4,640 |
| 501 | // lvx 30,4,1 |
| 502 | // li 4,624 |
| 503 | // lvx 29,4,1 |
| 504 | // li 4,608 |
| 505 | // lvx 28,4,1 |
| 506 | // li 4,592 |
| 507 | // lvx 27,4,1 |
| 508 | // li 4,576 |
| 509 | // lvx 26,4,1 |
| 510 | // li 4,560 |
| 511 | // lvx 25,4,1 |
| 512 | // li 4,544 |
| 513 | // lvx 24,4,1 |
| 514 | // li 4,528 |
| 515 | // lvx 23,4,1 |
| 516 | // li 4,512 |
| 517 | // lvx 22,4,1 |
| 518 | // li 4,496 |
| 519 | // lvx 21,4,1 |
| 520 | // li 4,480 |
| 521 | // lvx 20,4,1 |
| 522 | LafterVMX9: |
| 523 | |
| 524 | /* r3 is live here; don't trash it */ |
| 525 | /* restore lr,cr,sp */ |
| 526 | addi 4,1,2048 /* r4 = old SP */ |
| 527 | ld 0,16(4) |
| 528 | mtlr 0 |
| 529 | ld 0,8(4) |
| 530 | mtcr 0 |
| 531 | mr 1,4 |
| 532 | blr |
| 533 | |
| 534 | LT..vgPlain_run_innerloop: |
| 535 | .long 0 |
| 536 | .byte 0,0,32,64,0,0,1,0 |
| 537 | .long 0 |
| 538 | .long LT..vgPlain_run_innerloop-.vgPlain_run_innerloop |
| 539 | .short 3 |
| 540 | .byte "vgPlain_run_innerloop" |
| 541 | .align 2 |
| 542 | _section_.text: |
| 543 | .csect .data[RW],3 |
| 544 | .llong _section_.text |
| 545 | |
| 546 | /*------------------------------------------------------------*/ |
| 547 | /*--- ---*/ |
| 548 | /*--- A special dispatcher, for running no-redir ---*/ |
| 549 | /*--- translations. Just runs the given translation once. ---*/ |
| 550 | /*--- ---*/ |
| 551 | /*------------------------------------------------------------*/ |
| 552 | |
| 553 | /* signature: |
| 554 | void VG_(run_a_noredir_translation) ( UWord* argblock ); |
| 555 | */ |
| 556 | |
| 557 | /* Run a no-redir translation. argblock points to 4 UWords, 2 to carry args |
| 558 | and 2 to carry results: |
| 559 | 0: input: ptr to translation |
| 560 | 1: input: ptr to guest state |
| 561 | 2: output: next guest PC |
| 562 | 3: output: guest state pointer afterwards (== thread return code) |
| 563 | */ |
| 564 | .csect .text[PR] |
| 565 | .align 2 |
| 566 | .globl .VG_(run_a_noredir_translation) |
| 567 | .VG_(run_a_noredir_translation): |
| 568 | /* Rather than attempt to make sense of the AIX ABI, just |
| 569 | drop r1 by 512 (to get away from the caller's frame), then |
| 570 | 1024 (to give ourselves a 1024-byte save area), and then |
| 571 | another 1024 (to clear our save area). In all, drop r1 by 2048 |
| 572 | and dump stuff on the stack at 512(1)..1536(1). */ |
| 573 | /* At entry, r3 points to argblock */ |
| 574 | |
| 575 | /* ----- entry point to VG_(run_innerloop) ----- */ |
| 576 | /* For AIX/ppc64 we do: LR-> +16(parent_sp), CR-> +8(parent_sp) */ |
| 577 | |
| 578 | /* Save lr and cr*/ |
| 579 | mflr 0 |
| 580 | std 0,16(1) |
| 581 | mfcr 0 |
| 582 | std 0,8(1) |
| 583 | |
| 584 | /* New stack frame */ |
| 585 | stdu 1,-2048(1) /* sp should maintain 16-byte alignment */ |
| 586 | |
| 587 | /* General reg save area : 160 bytes at r1[512 .. 671] */ |
| 588 | std 31,664(1) |
| 589 | std 30,656(1) |
| 590 | std 29,648(1) |
| 591 | std 28,640(1) |
| 592 | std 27,632(1) |
| 593 | std 26,624(1) |
| 594 | std 25,616(1) |
| 595 | std 24,608(1) |
| 596 | std 23,600(1) |
| 597 | std 22,592(1) |
| 598 | std 21,584(1) |
| 599 | std 20,576(1) |
| 600 | std 19,568(1) |
| 601 | std 18,560(1) |
| 602 | std 17,552(1) |
| 603 | std 16,544(1) |
| 604 | std 15,536(1) |
| 605 | std 14,528(1) |
| 606 | std 13,520(1) |
| 607 | std 3,512(1) /* will need it later */ |
| 608 | |
| 609 | ld 31,8(3) /* rd argblock[1] */ |
| 610 | ld 30,0(3) /* rd argblock[0] */ |
| 611 | mtlr 30 /* run translation */ |
| 612 | blrl |
| 613 | |
| 614 | ld 4,512(1) /* &argblock */ |
| 615 | std 3, 16(4) /* wr argblock[2] */ |
| 616 | std 31,24(4) /* wr argblock[3] */ |
| 617 | |
| 618 | /* General regs */ |
| 619 | ld 31,664(1) |
| 620 | ld 30,656(1) |
| 621 | ld 29,648(1) |
| 622 | ld 28,640(1) |
| 623 | ld 27,632(1) |
| 624 | ld 26,624(1) |
| 625 | ld 25,616(1) |
| 626 | ld 24,608(1) |
| 627 | ld 23,600(1) |
| 628 | ld 22,592(1) |
| 629 | ld 21,584(1) |
| 630 | ld 20,576(1) |
| 631 | ld 19,568(1) |
| 632 | ld 18,560(1) |
| 633 | ld 17,552(1) |
| 634 | ld 16,544(1) |
| 635 | ld 15,536(1) |
| 636 | ld 14,528(1) |
| 637 | ld 13,520(1) |
| 638 | |
| 639 | /* restore lr,cr,sp */ |
| 640 | addi 4,1,2048 /* r4 = old SP */ |
| 641 | ld 0,16(4) |
| 642 | mtlr 0 |
| 643 | ld 0,8(4) |
| 644 | mtcr 0 |
| 645 | mr 1,4 |
| 646 | blr |
| 647 | |
| 648 | /*--------------------------------------------------------------------*/ |
| 649 | /*--- end ---*/ |
| 650 | /*--------------------------------------------------------------------*/ |