blob: 11d97701c7307d44dd28e4c62e1ef60cd910e259 [file] [log] [blame]
sewardj7525c822006-10-17 01:06:44 +00001
2/*--------------------------------------------------------------------*/
3/*--- The core dispatch loop, for jumping to a code address. ---*/
4/*--- dispatch-ppc64-aix5.S ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
11 Copyright (C) 2006-2006 OpenWorks LLP
12 info@open-works.co.uk
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
32#include "pub_core_basics_asm.h"
33#include "pub_core_dispatch_asm.h"
34#include "pub_core_transtab_asm.h"
35#include "libvex_guest_offsets.h" /* for OFFSET_ppc64_CIA */
36
37
38/*------------------------------------------------------------*/
39/*--- ---*/
40/*--- The dispatch loop. VG_(run_innerloop) is used to ---*/
41/*--- run all translations except no-redir ones. ---*/
42/*--- ---*/
43/*------------------------------------------------------------*/
44
45/*----------------------------------------------------*/
46/*--- Incomprehensible TOC mumbo-jumbo nonsense. ---*/
47/*----------------------------------------------------*/
48
49/* No, I don't have a clue either. I just compiled a bit of
50 C with gcc and copied the assembly code it produced. */
51
52/* Basically "ld rd, tocent__foo(2)" gets &foo into rd. */
53
54 .file "dispatch-ppc64-aix5.S"
55 .machine "ppc64"
56 .toc
57 .csect .text[PR]
58 .toc
59tocent__vgPlain_dispatch_ctr:
60 .tc vgPlain_dispatch_ctr[TC],vgPlain_dispatch_ctr[RW]
61tocent__vgPlain_machine_ppc64_has_VMX:
62 .tc vgPlain_machine_ppc64_has_VMX[TC],vgPlain_machine_ppc64_has_VMX[RW]
63tocent__vgPlain_tt_fast:
64 .tc vgPlain_tt_fast[TC],vgPlain_tt_fast[RW]
65tocent__vgPlain_tt_fastN:
66 .tc vgPlain_tt_fast[TC],vgPlain_tt_fastN[RW]
67 .csect .text[PR]
68 .align 2
69 .globl vgPlain_run_innerloop
70 .globl .vgPlain_run_innerloop
71 .csect vgPlain_run_innerloop[DS]
72vgPlain_run_innerloop:
73 .llong .vgPlain_run_innerloop, TOC[tc0], 0
74 .csect .text[PR]
75
76/*----------------------------------------------------*/
77/*--- Preamble (set everything up) ---*/
78/*----------------------------------------------------*/
79
80/* signature:
81UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling );
82*/
83.vgPlain_run_innerloop:
84
85 /* r3 holds guest_state */
86 /* r4 holds do_profiling */
87 /* Rather than attempt to make sense of the AIX ABI, just
88 drop r1 by 512 (to get away from the caller's frame), then
89 1024 (to give ourselves a 1024-byte save area), and then
90 another 512 (to clear our save area). In all, drop r1 by 2048
91 and dump stuff on the stack at 512(1)..1536(1). */
92
93 /* ----- entry point to VG_(run_innerloop) ----- */
94 /* For AIX/ppc64 we do: LR-> +16(parent_sp), CR-> +8(parent_sp) */
95
96 /* Save lr and cr*/
97 mflr 0
98 std 0,16(1)
99 mfcr 0
100 std 0,8(1)
101
102 /* New stack frame */
103 stdu 1,-2048(1) /* sp should maintain 16-byte alignment */
104
105 /* Save callee-saved registers... */
106 /* r3, r4 are live here, so use r5 */
107
108 /* Floating-point reg save area : 144 bytes at r1[256+256..256+399] */
109 stfd 31,256+392(1)
110 stfd 30,256+384(1)
111 stfd 29,256+376(1)
112 stfd 28,256+368(1)
113 stfd 27,256+360(1)
114 stfd 26,256+352(1)
115 stfd 25,256+344(1)
116 stfd 24,256+336(1)
117 stfd 23,256+328(1)
118 stfd 22,256+320(1)
119 stfd 21,256+312(1)
120 stfd 20,256+304(1)
121 stfd 19,256+296(1)
122 stfd 18,256+288(1)
123 stfd 17,256+280(1)
124 stfd 16,256+272(1)
125 stfd 15,256+264(1)
126 stfd 14,256+256(1)
127
128 /* General reg save area : 76 bytes at r1[256+400 .. 256+543] */
129 std 31,256+544(1)
130 std 30,256+536(1)
131 std 29,256+528(1)
132 std 28,256+520(1)
133 std 27,256+512(1)
134 std 26,256+504(1)
135 std 25,256+496(1)
136 std 24,256+488(1)
137 std 23,256+480(1)
138 std 22,256+472(1)
139 std 21,256+464(1)
140 std 20,256+456(1)
141 std 19,256+448(1)
142 std 18,256+440(1)
143 std 17,256+432(1)
144 std 16,256+424(1)
145 std 15,256+416(1)
146 std 14,256+408(1)
147 /* Probably not necessary to save r13 (thread-specific ptr),
148 as VEX stays clear of it... but what the hell. */
149 std 13,256+400(1)
150
151 /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI.
152 The Linux kernel might not actually use VRSAVE for its intended
153 purpose, but it should be harmless to preserve anyway. */
154 /* r3, r4 are live here, so use r5 */
155 ld 5,tocent__vgPlain_machine_ppc64_has_VMX(2)
156 ld 5,0(5)
157 cmpldi 5,0
158 beq LafterVMX1
159
160// Sigh. AIX 5.2 has no idea that Altivec exists.
161// /* VRSAVE save word : 4 bytes at r1[476 .. 479] */
162// mfspr 5,256 /* vrsave reg is spr number 256 */
163// stw 5,476(1)
164//
165// /* Vector reg save area (quadword aligned):
166// 192 bytes at r1[480 .. 671] */
167// li 5,656
168// stvx 31,5,1
169// li 5,640
170// stvx 30,5,1
171// li 5,624
172// stvx 29,5,1
173// li 5,608
174// stvx 28,5,1
175// li 5,592
176// stvx 27,5,1
177// li 5,576
178// stvx 26,5,1
179// li 5,560
180// stvx 25,5,1
181// li 5,544
182// stvx 25,5,1
183// li 5,528
184// stvx 23,5,1
185// li 5,512
186// stvx 22,5,1
187// li 5,496
188// stvx 21,5,1
189// li 5,480
190// stvx 20,5,1
191LafterVMX1:
192
193 /* Local variable space... */
194 /* Put the original guest state pointer at r1[256]. We
195 will need to refer to it each time round the dispatch loop.
196 Apart from that, we can use r1[0 .. 255] and r1[264 .. 511]
197 as scratch space. */
198
199 /* r3 holds guest_state */
200 /* r4 holds do_profiling */
201 mr 31,3 /* r31 (generated code gsp) = r3 */
202 std 3,256(1) /* stash orig guest_state ptr */
203
204 /* hold dispatch_ctr (NOTE: 32-bit value) in r29 */
205 ld 5,tocent__vgPlain_dispatch_ctr(2)
206 lwz 29,0(5)
207
208 /* set host FPU control word to the default mode expected
209 by VEX-generated code. See comments in libvex.h for
210 more info. */
211 /* get zero into f3 (tedious) */
212 /* note: fsub 3,3,3 is not a reliable way to do this,
213 since if f3 holds a NaN or similar then we don't necessarily
214 wind up with zero. */
215 li 5,0
216 std 5,128(1) /* r1[128] is scratch */
217 lfd 3,128(1)
218 mtfsf 0xFF,3 /* fpscr = f3 */
219
220 /* set host AltiVec control word to the default mode expected
221 by VEX-generated code. */
222 ld 5,tocent__vgPlain_machine_ppc64_has_VMX(2)
223 ld 5,0(5)
224 cmpldi 5,0
225 beq LafterVMX2
226
227// Sigh. AIX 5.2 has no idea that Altivec exists.
228// vspltisw 3,0x0 /* generate zero */
229// mtvscr 3
230LafterVMX2:
231
232 /* fetch %CIA into r3 */
233 ld 3,OFFSET_ppc64_CIA(31)
234
235 /* fall into main loop (the right one) */
236 /* r4 = do_profiling. It's probably trashed after here,
237 but that's OK: we don't need it after here. */
238 cmpldi 4,0
239 beq VG_(run_innerloop__dispatch_unprofiled)
240 b VG_(run_innerloop__dispatch_profiled)
241 /*NOTREACHED*/
242
243/*----------------------------------------------------*/
244/*--- NO-PROFILING (standard) dispatcher ---*/
245/*----------------------------------------------------*/
246
247.globl VG_(run_innerloop__dispatch_unprofiled)
248VG_(run_innerloop__dispatch_unprofiled):
249 /* At entry: Live regs:
250 r1 (=sp)
251 r3 (=CIA = next guest address)
252 r29 (=dispatch_ctr)
253 r31 (=guest_state)
254 Stack state:
255 256(r1) (=orig guest_state)
256 */
257
258 /* Has the guest state pointer been messed with? If yes, exit. */
259 ld 5,256(1) /* original guest_state ptr */
260 cmpd 5,31
261 bne gsp_changed
262
263 /* save the jump address in the guest state */
264 std 3,OFFSET_ppc64_CIA(31)
265
266 /* Are we out of timeslice? If yes, defer to scheduler. */
267 addi 29,29,-1
268 cmplwi 29,0 /* yes, lwi - is 32-bit */
269 beq counter_is_zero
270
271 /* try a fast lookup in the translation cache */
272 /* r4 = VG_TT_FAST_HASH(addr) * sizeof(ULong*)
273 = ((r3 >>u 2) & VG_TT_FAST_MASK) << 3 */
274 rldicl 4,3, 62, 64-VG_TT_FAST_BITS
275 sldi 4,4,3
276
277 ld 5,tocent__vgPlain_tt_fast(2) /* r5 = &tt_fast */
278
279 ldx 5,5,4 /* r5 = VG_(tt_fast)[VG_TT_FAST_HASH(addr)] */
280 ld 6,0(5) /* r6 = (r5)->orig_addr */
281 cmpd 3,6
282 bne fast_lookup_failed
283
284 /* Found a match. Call tce[1], which is 8 bytes along, since
285 each tce element is a 64-bit int. */
286 addi 8,5,8
287 mtctr 8
288
289 /* run the translation */
290 bctrl
291
292 /* On return from guest code:
293 r3 holds destination (original) address.
294 r31 may be unchanged (guest_state), or may indicate further
295 details of the control transfer requested to *r3.
296 */
297
298 /* start over */
299 b VG_(run_innerloop__dispatch_unprofiled)
300 /*NOTREACHED*/
301
302/*----------------------------------------------------*/
303/*--- PROFILING dispatcher (can be much slower) ---*/
304/*----------------------------------------------------*/
305
306.globl VG_(run_innerloop__dispatch_profiled)
307VG_(run_innerloop__dispatch_profiled):
308 /* At entry: Live regs:
309 r1 (=sp)
310 r3 (=CIA = next guest address)
311 r29 (=dispatch_ctr)
312 r31 (=guest_state)
313 Stack state:
314 256(r1) (=orig guest_state)
315 */
316
317 /* Has the guest state pointer been messed with? If yes, exit. */
318 ld 5,256(1) /* original guest_state ptr */
319 cmpd 5,31
320 bne gsp_changed
321
322 /* save the jump address in the guest state */
323 std 3,OFFSET_ppc64_CIA(31)
324
325 /* Are we out of timeslice? If yes, defer to scheduler. */
326 addi 29,29,-1
327 cmplwi 29,0 /* yes, lwi - is 32-bit */
328 beq counter_is_zero
329
330 /* try a fast lookup in the translation cache */
331 /* r4 = VG_TT_FAST_HASH(addr) * sizeof(ULong*)
332 = ((r3 >>u 2) & VG_TT_FAST_MASK) << 3 */
333 rldicl 4,3, 62, 64-VG_TT_FAST_BITS
334 sldi 4,4,3
335
336 ld 5,tocent__vgPlain_tt_fast(2) /* r5 = &tt_fast */
337
338 ldx 5,5,4 /* r5 = VG_(tt_fast)[VG_TT_FAST_HASH(addr)] */
339 ld 6,0(5) /* r6 = (r5)->orig_addr */
340 cmpd 3,6
341 bne fast_lookup_failed
342
343 /* increment bb profile counter */
344 ld 9,tocent__vgPlain_tt_fastN(2) /* r9 = &tt_fastN */
345 ldx 7,9,4 /* r7 = tt_fastN[r4] */
346 lwz 10,0(7)
347 addi 10,10,1
348 stw 10,0(7)
349
350 /* Found a match. Call tce[1], which is 8 bytes along, since
351 each tce element is a 64-bit int. */
352 addi 8,5,8
353 mtctr 8
354
355 /* run the translation */
356 bctrl
357
358 /* On return from guest code:
359 r3 holds destination (original) address.
360 r31 may be unchanged (guest_state), or may indicate further
361 details of the control transfer requested to *r3.
362 */
363
364 /* start over */
365 b VG_(run_innerloop__dispatch_profiled)
366 /*NOTREACHED*/
367
368/*----------------------------------------------------*/
369/*--- exit points ---*/
370/*----------------------------------------------------*/
371
372gsp_changed:
373 /* Someone messed with the gsp (in r31). Have to
374 defer to scheduler to resolve this. dispatch ctr
375 is not yet decremented, so no need to increment. */
376 /* %CIA is NOT up to date here. First, need to write
377 %r3 back to %CIA, but without trashing %r31 since
378 that holds the value we want to return to the scheduler.
379 Hence use %r5 transiently for the guest state pointer. */
380 ld 5,256(1) /* original guest_state ptr */
381 std 3,OFFSET_ppc64_CIA(5)
382 mr 3,31 /* r3 = new gsp value */
383 b run_innerloop_exit
384 /*NOTREACHED*/
385
386counter_is_zero:
387 /* %CIA is up to date */
388 /* back out decrement of the dispatch counter */
389 addi 29,29,1
390 li 3,VG_TRC_INNER_COUNTERZERO
391 b run_innerloop_exit
392
393fast_lookup_failed:
394 /* %CIA is up to date */
395 /* back out decrement of the dispatch counter */
396 addi 29,29,1
397 li 3,VG_TRC_INNER_FASTMISS
398 b run_innerloop_exit
399
400
401
402/* All exits from the dispatcher go through here.
403 r3 holds the return value.
404*/
405run_innerloop_exit:
406 /* We're leaving. Check that nobody messed with
407 VSCR or FPSCR. */
408
409 /* Set fpscr back to a known state, since vex-generated code
410 may have messed with fpscr[rm]. */
411 li 5,0
412 std 5,128(1) /* r1[128] is scratch */
413 lfd 3,128(1)
414 mtfsf 0xFF,3 /* fpscr = f3 */
415
416 /* Using r11 - value used again further on, so don't trash! */
417 ld 11,tocent__vgPlain_machine_ppc64_has_VMX(2)
418 ld 11,0(11)
419 cmpldi 11,0
420 beq LafterVMX8
421
422// Sigh. AIX 5.2 has no idea that Altivec exists.
423// /* Check VSCR[NJ] == 1 */
424// /* first generate 4x 0x00010000 */
425// vspltisw 4,0x1 /* 4x 0x00000001 */
426// vspltisw 5,0x0 /* zero */
427// vsldoi 6,4,5,0x2 /* <<2*8 => 4x 0x00010000 */
428// /* retrieve VSCR and mask wanted bits */
429// mfvscr 7
430// vand 7,7,6 /* gives NJ flag */
431// vspltw 7,7,0x3 /* flags-word to all lanes */
432// vcmpequw. 8,6,7 /* CR[24] = 1 if v6 == v7 */
433// bt 24,invariant_violation /* branch if all_equal */
434LafterVMX8:
435
436 /* otherwise we're OK */
437 b run_innerloop_exit_REALLY
438
439
440invariant_violation:
441 li 3,VG_TRC_INVARIANT_FAILED
442 b run_innerloop_exit_REALLY
443
444run_innerloop_exit_REALLY:
445 /* r3 holds VG_TRC_* value to return */
446
447 /* Write ctr to VG(dispatch_ctr) */
448 ld 5,tocent__vgPlain_dispatch_ctr(2)
449 stw 29,0(5) /* yes, really stw */
450
451 /* Restore callee-saved registers... */
452
453 /* Floating-point regs */
454 lfd 31,256+392(1)
455 lfd 30,256+384(1)
456 lfd 29,256+376(1)
457 lfd 28,256+368(1)
458 lfd 27,256+360(1)
459 lfd 26,256+352(1)
460 lfd 25,256+344(1)
461 lfd 24,256+336(1)
462 lfd 23,256+328(1)
463 lfd 22,256+320(1)
464 lfd 21,256+312(1)
465 lfd 20,256+304(1)
466 lfd 19,256+296(1)
467 lfd 18,256+288(1)
468 lfd 17,256+280(1)
469 lfd 16,256+272(1)
470 lfd 15,256+264(1)
471 lfd 14,256+256(1)
472
473 /* General regs */
474 ld 31,256+544(1)
475 ld 30,256+536(1)
476 ld 29,256+528(1)
477 ld 28,256+520(1)
478 ld 27,256+512(1)
479 ld 26,256+504(1)
480 ld 25,256+496(1)
481 ld 24,256+488(1)
482 ld 23,256+480(1)
483 ld 22,256+472(1)
484 ld 21,256+464(1)
485 ld 20,256+456(1)
486 ld 19,256+448(1)
487 ld 18,256+440(1)
488 ld 17,256+432(1)
489 ld 16,256+424(1)
490 ld 15,256+416(1)
491 ld 14,256+408(1)
492 ld 13,256+400(1)
493
494 /* r11 already holds VG_(machine_ppc64_has_VMX) value */
495 cmpldi 11,0
496 beq LafterVMX9
497
498// Sigh. AIX 5.2 has no idea that Altivec exists.
499// /* VRSAVE */
500// lwz 4,476(1)
501// mtspr 4,256 /* VRSAVE reg is spr number 256 */
502//
503// /* Vector regs */
504// li 4,656
505// lvx 31,4,1
506// li 4,640
507// lvx 30,4,1
508// li 4,624
509// lvx 29,4,1
510// li 4,608
511// lvx 28,4,1
512// li 4,592
513// lvx 27,4,1
514// li 4,576
515// lvx 26,4,1
516// li 4,560
517// lvx 25,4,1
518// li 4,544
519// lvx 24,4,1
520// li 4,528
521// lvx 23,4,1
522// li 4,512
523// lvx 22,4,1
524// li 4,496
525// lvx 21,4,1
526// li 4,480
527// lvx 20,4,1
528LafterVMX9:
529
530 /* r3 is live here; don't trash it */
531 /* restore lr,cr,sp */
532 addi 4,1,2048 /* r4 = old SP */
533 ld 0,16(4)
534 mtlr 0
535 ld 0,8(4)
536 mtcr 0
537 mr 1,4
538 blr
539
540LT..vgPlain_run_innerloop:
541 .long 0
542 .byte 0,0,32,64,0,0,1,0
543 .long 0
544 .long LT..vgPlain_run_innerloop-.vgPlain_run_innerloop
545 .short 3
546 .byte "vgPlain_run_innerloop"
547 .align 2
548_section_.text:
549 .csect .data[RW],3
550 .llong _section_.text
551
552/*------------------------------------------------------------*/
553/*--- ---*/
554/*--- A special dispatcher, for running no-redir ---*/
555/*--- translations. Just runs the given translation once. ---*/
556/*--- ---*/
557/*------------------------------------------------------------*/
558
559/* signature:
560void VG_(run_a_noredir_translation) ( UWord* argblock );
561*/
562
563/* Run a no-redir translation. argblock points to 4 UWords, 2 to carry args
564 and 2 to carry results:
565 0: input: ptr to translation
566 1: input: ptr to guest state
567 2: output: next guest PC
568 3: output: guest state pointer afterwards (== thread return code)
569*/
570.csect .text[PR]
571.align 2
572.globl .VG_(run_a_noredir_translation)
573.VG_(run_a_noredir_translation):
574 /* Rather than attempt to make sense of the AIX ABI, just
575 drop r1 by 512 (to get away from the caller's frame), then
576 1024 (to give ourselves a 1024-byte save area), and then
577 another 1024 (to clear our save area). In all, drop r1 by 2048
578 and dump stuff on the stack at 512(1)..1536(1). */
579 /* At entry, r3 points to argblock */
580
581 /* ----- entry point to VG_(run_innerloop) ----- */
582 /* For AIX/ppc64 we do: LR-> +16(parent_sp), CR-> +8(parent_sp) */
583
584 /* Save lr and cr*/
585 mflr 0
586 std 0,16(1)
587 mfcr 0
588 std 0,8(1)
589
590 /* New stack frame */
591 stdu 1,-2048(1) /* sp should maintain 16-byte alignment */
592
593 /* General reg save area : 160 bytes at r1[512 .. 671] */
594 std 31,664(1)
595 std 30,656(1)
596 std 29,648(1)
597 std 28,640(1)
598 std 27,632(1)
599 std 26,624(1)
600 std 25,616(1)
601 std 24,608(1)
602 std 23,600(1)
603 std 22,592(1)
604 std 21,584(1)
605 std 20,576(1)
606 std 19,568(1)
607 std 18,560(1)
608 std 17,552(1)
609 std 16,544(1)
610 std 15,536(1)
611 std 14,528(1)
612 std 13,520(1)
613 std 3,512(1) /* will need it later */
614
615 ld 31,8(3) /* rd argblock[1] */
616 ld 30,0(3) /* rd argblock[0] */
617 mtlr 30 /* run translation */
618 blrl
619
620 ld 4,512(1) /* &argblock */
621 std 3, 16(4) /* wr argblock[2] */
622 std 31,24(4) /* wr argblock[3] */
623
624 /* General regs */
625 ld 31,664(1)
626 ld 30,656(1)
627 ld 29,648(1)
628 ld 28,640(1)
629 ld 27,632(1)
630 ld 26,624(1)
631 ld 25,616(1)
632 ld 24,608(1)
633 ld 23,600(1)
634 ld 22,592(1)
635 ld 21,584(1)
636 ld 20,576(1)
637 ld 19,568(1)
638 ld 18,560(1)
639 ld 17,552(1)
640 ld 16,544(1)
641 ld 15,536(1)
642 ld 14,528(1)
643 ld 13,520(1)
644
645 /* restore lr,cr,sp */
646 addi 4,1,2048 /* r4 = old SP */
647 ld 0,16(4)
648 mtlr 0
649 ld 0,8(4)
650 mtcr 0
651 mr 1,4
652 blr
653
654/*--------------------------------------------------------------------*/
655/*--- end ---*/
656/*--------------------------------------------------------------------*/