Fill in some more bits to do with t-chaining for ppc64
(still doesn't work) (Valgrind side)
git-svn-id: svn://svn.valgrind.org/valgrind/branches/TCHAIN@12513 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/coregrind/m_dispatch/dispatch-ppc32-linux.S b/coregrind/m_dispatch/dispatch-ppc32-linux.S
index 61c7bab..4a2c5d3 100644
--- a/coregrind/m_dispatch/dispatch-ppc32-linux.S
+++ b/coregrind/m_dispatch/dispatch-ppc32-linux.S
@@ -62,7 +62,7 @@
/* r4 holds guest_state */
/* r5 holds host_addr */
- /* ----- entry point to VG_(run_innerloop) ----- */
+ /* ----- entry point to VG_(disp_run_translations) ----- */
/* For Linux/ppc32 we need the SysV ABI, which uses
LR->4(parent_sp), CR->anywhere.
(The AIX ABI, used on Darwin,
@@ -104,7 +104,7 @@
stfd 14,352(1)
LafterFP1:
- /* General reg save area : 72 bytes */
+ /* General reg save area : 76 bytes */
stw 31,348(1)
stw 30,344(1)
stw 29,340(1)
@@ -289,8 +289,8 @@
vcmpequw. 8,6,7 /* CR[24] = 1 if v6 == v7 */
bt 24,invariant_violation /* branch if all_equal */
#endif
-LafterVMX8:
+LafterVMX8:
/* otherwise we're OK */
b remove_frame
@@ -410,7 +410,7 @@
handing the caller the pair (Chain_me_S, RA) */
li 6, VG_TRC_CHAIN_ME_TO_SLOW_EP
mflr 7
- /* 8 = imm32 r30, disp_cp_chain_me_to_slowEP
+ /* 8 = imm32-fixed2 r30, disp_cp_chain_me_to_slowEP
4 = mtctr r30
4 = btctr
*/
@@ -426,7 +426,7 @@
handing the caller the pair (Chain_me_S, RA) */
li 6, VG_TRC_CHAIN_ME_TO_FAST_EP
mflr 7
- /* 8 = imm32 r30, disp_cp_chain_me_to_fastEP
+ /* 8 = imm32-fixed2 r30, disp_cp_chain_me_to_fastEP
4 = mtctr r30
4 = btctr
*/
diff --git a/coregrind/m_dispatch/dispatch-ppc64-linux.S b/coregrind/m_dispatch/dispatch-ppc64-linux.S
index 4c08a7e..4068d2c 100644
--- a/coregrind/m_dispatch/dispatch-ppc64-linux.S
+++ b/coregrind/m_dispatch/dispatch-ppc64-linux.S
@@ -39,57 +39,61 @@
/* References to globals via the TOC */
/*
- .globl vgPlain_tt_fast
+ .globl vgPlain_tt_fast
.lcomm vgPlain_tt_fast,4,4
.type vgPlain_tt_fast, @object
*/
- .section ".toc","aw"
+.section ".toc","aw"
.tocent__vgPlain_tt_fast:
.tc vgPlain_tt_fast[TC],vgPlain_tt_fast
-.tocent__vgPlain_tt_fastN:
- .tc vgPlain_tt_fastN[TC],vgPlain_tt_fastN
-.tocent__vgPlain_dispatch_ctr:
- .tc vgPlain_dispatch_ctr[TC],vgPlain_dispatch_ctr
+.tocent__vgPlain_stats__n_xindirs:
+ .tc vgPlain_stats__n_xindirs[TC],vgPlain_stats__n_xindirs
+.tocent__vgPlain_stats__n_xindir_misses:
+ .tc vgPlain_stats__n_xindir_misses[TC],vgPlain_stats__n_xindir_misses
.tocent__vgPlain_machine_ppc64_has_VMX:
.tc vgPlain_machine_ppc64_has_VMX[TC],vgPlain_machine_ppc64_has_VMX
/*------------------------------------------------------------*/
/*--- ---*/
-/*--- The dispatch loop. VG_(run_innerloop) is used to ---*/
-/*--- run all translations except no-redir ones. ---*/
+/*--- The dispatch loop. VG_(disp_run_translations) is ---*/
+/*--- used to run all translations, ---*/
+/*--- including no-redir ones. ---*/
/*--- ---*/
/*------------------------------------------------------------*/
/*----------------------------------------------------*/
-/*--- Preamble (set everything up) ---*/
+/*--- Entry and preamble (set everything up) ---*/
/*----------------------------------------------------*/
/* signature:
-UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling );
+void VG_(disp_run_translations)( UWord* two_words,
+ void* guest_state,
+ Addr host_addr );
*/
.section ".text"
.align 2
-.globl VG_(run_innerloop)
+.globl VG_(disp_run_translations)
.section ".opd","aw"
.align 3
-VG_(run_innerloop):
-.quad .VG_(run_innerloop),.TOC.@tocbase,0
+VG_(disp_run_translations):
+.quad .VG_(disp_run_translations),.TOC.@tocbase,0
.previous
-.type .VG_(run_innerloop),@function
-.globl .VG_(run_innerloop)
-.VG_(run_innerloop):
- /* r3 holds guest_state */
- /* r4 holds do_profiling */
+.type .VG_(disp_run_translations),@function
+.globl .VG_(disp_run_translations)
+.VG_(disp_run_translations):
+ /* r3 holds two_words */
+ /* r4 holds guest_state */
+ /* r5 holds host_addr */
- /* ----- entry point to VG_(run_innerloop) ----- */
+ /* ----- entry point to VG_(disp_run_translations) ----- */
/* PPC64 ABI saves LR->16(prt_sp), CR->8(prt_sp)) */
/* Save lr, cr */
- mflr 0
- std 0,16(1)
- mfcr 0
- std 0,8(1)
+ mflr 6
+ std 6,16(1)
+ mfcr 6
+ std 6,8(1)
/* New stack frame */
stdu 1,-624(1) /* sp should maintain 16-byte alignment */
@@ -116,7 +120,7 @@
stfd 15,488(1)
stfd 14,480(1)
- /* General reg save area : 144 bytes */
+ /* General reg save area : 152 bytes */
std 31,472(1)
std 30,464(1)
std 29,456(1)
@@ -135,58 +139,56 @@
std 16,352(1)
std 15,344(1)
std 14,336(1)
- /* Probably not necessary to save r13 (thread-specific ptr),
- as VEX stays clear of it... but what the hey. */
std 13,328(1)
+ std 3,104(1) /* save two_words for later */
/* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI.
The Linux kernel might not actually use VRSAVE for its intended
purpose, but it should be harmless to preserve anyway. */
- /* r3, r4 are live here, so use r5 */
- ld 5,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
- ld 5,0(5)
- cmpldi 5,0
+ /* r3, r4, r5 are live here, so use r6 */
+ ld 6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
+ ld 6,0(6)
+ cmpldi 6,0
beq .LafterVMX1
/* VRSAVE save word : 32 bytes */
- mfspr 5,256 /* vrsave reg is spr number 256 */
- stw 5,324(1)
+ mfspr 6,256 /* vrsave reg is spr number 256 */
+ stw 6,324(1)
/* Alignment padding : 4 bytes */
/* Vector reg save area (quadword aligned) : 192 bytes */
- li 5,304
- stvx 31,5,1
- li 5,288
- stvx 30,5,1
- li 5,272
- stvx 29,5,1
- li 5,256
- stvx 28,5,1
- li 5,240
- stvx 27,5,1
- li 5,224
- stvx 26,5,1
- li 5,208
- stvx 25,5,1
- li 5,192
- stvx 24,5,1
- li 5,176
- stvx 23,5,1
- li 5,160
- stvx 22,5,1
- li 5,144
- stvx 21,5,1
- li 5,128
- stvx 20,5,1
+ li 6,304
+ stvx 31,6,1
+ li 6,288
+ stvx 30,6,1
+ li 6,272
+ stvx 29,6,1
+ li 6,256
+ stvx 28,6,1
+ li 6,240
+ stvx 27,6,1
+ li 6,224
+ stvx 26,6,1
+ li 6,208
+ stvx 25,6,1
+ li 6,192
+ stvx 24,6,1
+ li 6,176
+ stvx 23,6,1
+ li 6,160
+ stvx 22,6,1
+ li 6,144
+ stvx 21,6,1
+ li 6,128
+ stvx 20,6,1
.LafterVMX1:
/* Local variable space... */
- /* r3 holds guest_state */
- /* r4 holds do_profiling */
- mr 31,3
- std 3,104(1) /* spill orig guest_state ptr */
+ /* r3 holds two_words */
+ /* r4 holds guest_state */
+ /* r5 holds host_addr */
/* 96(sp) used later to check FPSCR[RM] */
/* 88(sp) used later to load fpscr with zero */
@@ -201,13 +203,6 @@
0(sp) : back-chain
*/
-// CAB TODO: Use a caller-saved reg for orig guest_state ptr
-// - rem to set non-allocateable in isel.c
-
- /* hold dispatch_ctr (=32bit value) in r29 */
- ld 29,.tocent__vgPlain_dispatch_ctr@toc(2)
- lwz 29,0(29) /* 32-bit zero-extending load */
-
/* set host FPU control word to the default mode expected
by VEX-generated code. See comments in libvex.h for
more info. */
@@ -215,16 +210,16 @@
fsub 3,3,3 is not a reliable way to do this, since if
f3 holds a NaN or similar then we don't necessarily
wind up with zero. */
- li 5,0
- stw 5,88(1)
+ li 6,0
+ stw 6,88(1)
lfs 3,88(1)
mtfsf 0xFF,3 /* fpscr = lo32 of f3 */
/* set host AltiVec control word to the default mode expected
by VEX-generated code. */
- ld 5,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
- ld 5,0(5)
- cmpldi 5,0
+ ld 6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
+ ld 6,0(6)
+ cmpldi 6,0
beq .LafterVMX2
vspltisw 3,0x0 /* generate zero */
@@ -234,196 +229,34 @@
/* make a stack frame for the code we are calling */
stdu 1,-48(1)
- /* fetch %CIA into r3 */
- ld 3,OFFSET_ppc64_CIA(31)
+ /* Set up the guest state ptr */
+ mr 31,4 /* r31 (generated code gsp) = r4 */
- /* fall into main loop (the right one) */
- /* r4 = do_profiling. It's probably trashed after here,
- but that's OK: we don't need it after here. */
- cmplwi 4,0
- beq .VG_(run_innerloop__dispatch_unprofiled)
- b .VG_(run_innerloop__dispatch_profiled)
+ /* and jump into the code cache. Chained translations in
+ the code cache run, until for whatever reason, they can't
+ continue. When that happens, the translation in question
+ will jump (or call) to one of the continuation points
+ VG_(cp_...) below. */
+ mtctr 5
+ bctr
/*NOTREACHED*/
-
/*----------------------------------------------------*/
-/*--- NO-PROFILING (standard) dispatcher ---*/
+/*--- Postamble and exit. ---*/
/*----------------------------------------------------*/
- .section ".text"
- .align 2
- .globl VG_(run_innerloop__dispatch_unprofiled)
- .section ".opd","aw"
- .align 3
-VG_(run_innerloop__dispatch_unprofiled):
- .quad .VG_(run_innerloop__dispatch_unprofiled),.TOC.@tocbase,0
- .previous
- .type .VG_(run_innerloop__dispatch_unprofiled),@function
- .globl .VG_(run_innerloop__dispatch_unprofiled)
-.VG_(run_innerloop__dispatch_unprofiled):
- /* At entry: Live regs:
- r1 (=sp)
- r2 (toc pointer)
- r3 (=CIA = next guest address)
- r29 (=dispatch_ctr)
- r31 (=guest_state)
- Stack state:
- 144(r1) (=var space for FPSCR[RM])
- */
- /* Has the guest state pointer been messed with? If yes, exit.
- Also set up & VG_(tt_fast) early in an attempt at better
- scheduling. */
- ld 5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */
- rldicl. 0,31,0,63
- bne .gsp_changed
+.postamble:
+ /* At this point, r6 and r7 contain two
+ words to be returned to the caller. r6
+ holds a TRC value, and r7 optionally may
+ hold another word (for CHAIN_ME exits, the
+ address of the place to patch.) */
- /* save the jump address in the guest state */
- std 3,OFFSET_ppc64_CIA(31)
-
- /* Are we out of timeslice? If yes, defer to scheduler. */
- subi 29,29,1
- cmpldi 29,0
- beq .counter_is_zero
-
- /* try a fast lookup in the translation cache */
- /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry)
- = ((r3 >>u 2) & VG_TT_FAST_MASK) << 4 */
- rldicl 4,3, 62, 64-VG_TT_FAST_BITS /* entry# */
- sldi 4,4,4 /* entry# * sizeof(FastCacheEntry) */
- add 5,5,4 /* & VG_(tt_fast)[entry#] */
- ld 6,0(5) /* .guest */
- ld 7,8(5) /* .host */
- cmpd 3,6
- bne .fast_lookup_failed
-
- /* Found a match. Call .host. */
- mtctr 7
- bctrl
-
- /* On return from guest code:
- r3 holds destination (original) address.
- r31 may be unchanged (guest_state), or may indicate further
- details of the control transfer requested to *r3.
- */
- /* start over */
- b .VG_(run_innerloop__dispatch_unprofiled)
- /*NOTREACHED*/
- .size .VG_(run_innerloop), .-.VG_(run_innerloop)
-
-
-/*----------------------------------------------------*/
-/*--- PROFILING dispatcher (can be much slower) ---*/
-/*----------------------------------------------------*/
-
- .section ".text"
- .align 2
- .globl VG_(run_innerloop__dispatch_profiled)
- .section ".opd","aw"
- .align 3
-VG_(run_innerloop__dispatch_profiled):
- .quad .VG_(run_innerloop__dispatch_profiled),.TOC.@tocbase,0
- .previous
- .type .VG_(run_innerloop__dispatch_profiled),@function
- .globl .VG_(run_innerloop__dispatch_profiled)
-.VG_(run_innerloop__dispatch_profiled):
- /* At entry: Live regs:
- r1 (=sp)
- r2 (toc pointer)
- r3 (=CIA = next guest address)
- r29 (=dispatch_ctr)
- r31 (=guest_state)
- Stack state:
- 144(r1) (=var space for FPSCR[RM])
- */
- /* Has the guest state pointer been messed with? If yes, exit.
- Also set up & VG_(tt_fast) early in an attempt at better
- scheduling. */
- ld 5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */
- rldicl. 0,31,0,63
- bne .gsp_changed
-
- /* save the jump address in the guest state */
- std 3,OFFSET_ppc64_CIA(31)
-
- /* Are we out of timeslice? If yes, defer to scheduler. */
- subi 29,29,1
- cmpldi 29,0
- beq .counter_is_zero
-
- /* try a fast lookup in the translation cache */
- /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry)
- = ((r3 >>u 2) & VG_TT_FAST_MASK) << 4 */
- rldicl 4,3, 62, 64-VG_TT_FAST_BITS /* entry# */
- sldi 4,4,4 /* entry# * sizeof(FastCacheEntry) */
- add 5,5,4 /* & VG_(tt_fast)[entry#] */
- ld 6,0(5) /* .guest */
- ld 7,8(5) /* .host */
- cmpd 3,6
- bne .fast_lookup_failed
-
- /* increment bb profile counter VG_(tt_fastN)[x] (=32bit val) */
- ld 9, .tocent__vgPlain_tt_fastN@toc(2)
- srdi 4, 4,1 /* entry# * sizeof(UInt*) */
- ldx 9, 9,4 /* r7 = VG_(tt_fastN)[VG_TT_HASH(addr)] */
- lwz 6, 0(9) /* *(UInt*)r7 ++ */
- addi 6, 6,1
- stw 6, 0(9)
-
- /* Found a match. Call .host. */
- mtctr 7
- bctrl
-
- /* On return from guest code:
- r3 holds destination (original) address.
- r31 may be unchanged (guest_state), or may indicate further
- details of the control transfer requested to *r3.
- */
- /* start over */
- b .VG_(run_innerloop__dispatch_profiled)
- /*NOTREACHED*/
- .size .VG_(run_a_noredir_translation), .-.VG_(run_a_noredir_translation)
-
-
-/*----------------------------------------------------*/
-/*--- exit points ---*/
-/*----------------------------------------------------*/
-
-.gsp_changed:
- /* Someone messed with the gsp (in r31). Have to
- defer to scheduler to resolve this. dispatch ctr
- is not yet decremented, so no need to increment. */
- /* %CIA is NOT up to date here. First, need to write
- %r3 back to %CIA, but without trashing %r31 since
- that holds the value we want to return to the scheduler.
- Hence use %r5 transiently for the guest state pointer. */
- ld 5,152(1) /* original guest_state ptr */
- std 3,OFFSET_ppc64_CIA(5)
- mr 3,31 /* r3 = new gsp value */
- b .run_innerloop_exit
- /*NOTREACHED*/
-
-.counter_is_zero:
- /* %CIA is up to date */
- /* back out decrement of the dispatch counter */
- addi 29,29,1
- li 3,VG_TRC_INNER_COUNTERZERO
- b .run_innerloop_exit
-
-.fast_lookup_failed:
- /* %CIA is up to date */
- /* back out decrement of the dispatch counter */
- addi 29,29,1
- li 3,VG_TRC_INNER_FASTMISS
- b .run_innerloop_exit
-
-
-
-/* All exits from the dispatcher go through here.
- r3 holds the return value.
-*/
-.run_innerloop_exit:
/* We're leaving. Check that nobody messed with
- VSCR or FPSCR. */
+ VSCR or FPSCR in ways we don't expect. */
+ /* Using r11 - value used again further on, so don't trash! */
+ ld 11,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
+ ld 11,0(11)
/* Set fpscr back to a known state, since vex-generated code
may have messed with fpscr[rm]. */
@@ -434,10 +267,7 @@
addi 1,1,16
mtfsf 0xFF,3 /* fpscr = f3 */
- /* Using r11 - value used again further on, so don't trash! */
- ld 11,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
- ld 11,0(11)
- cmpldi 11,0
+ cmpldi 11,0 /* Do we have altivec? */
beq .LafterVMX8
/* Check VSCR[NJ] == 1 */
@@ -451,31 +281,18 @@
vspltw 7,7,0x3 /* flags-word to all lanes */
vcmpequw. 8,6,7 /* CR[24] = 1 if v6 == v7 */
bt 24,.invariant_violation /* branch if all_equal */
+
.LafterVMX8:
-
/* otherwise we're OK */
- b .run_innerloop_exit_REALLY
-
+ b .remove_frame
.invariant_violation:
- li 3,VG_TRC_INVARIANT_FAILED
- b .run_innerloop_exit_REALLY
+ li 6,VG_TRC_INVARIANT_FAILED
+ li 7,0
+ /* fall through */
-.run_innerloop_exit_REALLY:
- /* r3 holds VG_TRC_* value to return */
-
- /* Return to parent stack */
- addi 1,1,48
-
- /* Write ctr to VG_(dispatch_ctr) (=32bit value) */
- ld 5,.tocent__vgPlain_dispatch_ctr@toc(2)
- stw 29,0(5)
-
- /* Restore cr */
- lwz 0,44(1)
- mtcr 0
-
- /* Restore callee-saved registers... */
+.remove_frame:
+ /* Restore FP regs */
/* Floating-point regs */
lfd 31,616(1)
@@ -497,31 +314,11 @@
lfd 15,488(1)
lfd 14,480(1)
- /* General regs */
- ld 31,472(1)
- ld 30,464(1)
- ld 29,456(1)
- ld 28,448(1)
- ld 27,440(1)
- ld 26,432(1)
- ld 25,424(1)
- ld 24,416(1)
- ld 23,408(1)
- ld 22,400(1)
- ld 21,392(1)
- ld 20,384(1)
- ld 19,376(1)
- ld 18,368(1)
- ld 17,360(1)
- ld 16,352(1)
- ld 15,344(1)
- ld 14,336(1)
- ld 13,328(1)
-
- /* r11 already holds VG_(machine_ppc64_has_VMX) value */
- cmpldi 11,0
+ /* r11 already holds VG_(machine_ppc32_has_VMX) value */
+ cmplwi 11,0
beq .LafterVMX9
+ /* Restore Altivec regs */
/* VRSAVE */
lwz 4,324(1)
mfspr 4,256 /* VRSAVE reg is spr number 256 */
@@ -553,7 +350,33 @@
lvx 20,4,1
.LafterVMX9:
- /* reset cr, lr, sp */
+ /* restore int regs, including importantly r3 (two_words) */
+ addi 1,1,48
+ ld 31,472(1)
+ ld 30,464(1)
+ ld 29,456(1)
+ ld 28,448(1)
+ ld 27,440(1)
+ ld 26,432(1)
+ ld 25,424(1)
+ ld 24,416(1)
+ ld 23,408(1)
+ ld 22,400(1)
+ ld 21,392(1)
+ ld 20,384(1)
+ ld 19,376(1)
+ ld 18,368(1)
+ ld 17,360(1)
+ ld 16,352(1)
+ ld 15,344(1)
+ ld 14,336(1)
+ ld 13,328(1)
+ std 3,104(1)
+ /* Stash return values */
+ std 6,0(3)
+ std 7,8(3)
+
+ /* restore lr & sp, and leave */
ld 0,632(1) /* stack_size + 8 */
mtcr 0
ld 0,640(1) /* stack_size + 16 */
@@ -562,94 +385,146 @@
blr
-/*------------------------------------------------------------*/
-/*--- ---*/
-/*--- A special dispatcher, for running no-redir ---*/
-/*--- translations. Just runs the given translation once. ---*/
-/*--- ---*/
-/*------------------------------------------------------------*/
+/*----------------------------------------------------*/
+/*--- Continuation points ---*/
+/*----------------------------------------------------*/
-/* signature:
-void VG_(run_a_noredir_translation) ( UWord* argblock );
-*/
+/* ------ Chain me to slow entry point ------ */
+ .section ".text"
+ .align 2
+ .globl VG_(disp_cp_chain_me_to_slowEP)
+ .section ".opd","aw"
+ .align 3
+VG_(disp_cp_chain_me_to_slowEP):
+ .quad .VG_(disp_cp_chain_me_to_slowEP),.TOC.@tocbase,0
+ .previous
+ .type .VG_(disp_cp_chain_me_to_slowEP),@function
+ .globl .VG_(disp_cp_chain_me_to_slowEP)
+.VG_(disp_cp_chain_me_to_slowEP):
+ /* We got called. The return address indicates
+ where the patching needs to happen. Collect
+ the return address and, exit back to C land,
+ handing the caller the pair (Chain_me_S, RA) */
+ li 6, VG_TRC_CHAIN_ME_TO_SLOW_EP
+ mflr 7
+ /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_slowEP
+ 4 = mtctr r30
+ 4 = btctr
+ */
+ subi 7,7,20+4+4
+ b .postamble
-/* Run a no-redir translation. argblock points to 4 UWords, 2 to carry args
- and 2 to carry results:
- 0: input: ptr to translation
- 1: input: ptr to guest state
- 2: output: next guest PC
- 3: output: guest state pointer afterwards (== thread return code)
-*/
+/* ------ Chain me to fast entry point ------ */
+ .section ".text"
+ .align 2
+ .globl VG_(disp_cp_chain_me_to_fastEP)
+ .section ".opd","aw"
+ .align 3
+VG_(disp_cp_chain_me_to_fastEP):
+ .quad .VG_(disp_cp_chain_me_to_fastEP),.TOC.@tocbase,0
+ .previous
+ .type .VG_(disp_cp_chain_me_to_fastEP),@function
+ .globl .VG_(disp_cp_chain_me_to_fastEP)
+.VG_(disp_cp_chain_me_to_fastEP):
+ /* We got called. The return address indicates
+ where the patching needs to happen. Collect
+ the return address and, exit back to C land,
+ handing the caller the pair (Chain_me_S, RA) */
+ li 6, VG_TRC_CHAIN_ME_TO_FAST_EP
+ mflr 7
+ /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_fastEP
+ 4 = mtctr r30
+ 4 = btctr
+ */
+ subi 7,7,20+4+4
+ b .postamble
+
+/* ------ Indirect but boring jump ------ */
+ .section ".text"
+ .align 2
+ .globl VG_(disp_cp_xindir)
+ .section ".opd","aw"
+ .align 3
+VG_(disp_cp_xindir):
+ .quad .VG_(disp_cp_xindir),.TOC.@tocbase,0
+ .previous
+ .type .VG_(disp_cp_xindir),@function
+ .globl .VG_(disp_cp_xindir)
+.VG_(disp_cp_xindir):
+ /* Where are we going? */
+ ld 3,OFFSET_ppc64_CIA(31)
+
+ /* stats only */
+ ld 5, .tocent__vgPlain_stats__n_xindirs@toc(2)
+ ld 6,0(5)
+ addi 6,6,1
+ std 6,0(5)
+
+ /* r5 = &VG_(tt_fast) */
+ ld 5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */
+
+ /* try a fast lookup in the translation cache */
+ /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry)
+ = ((r3 >>u 2) & VG_TT_FAST_MASK) << 4 */
+ rldicl 4,3, 62, 64-VG_TT_FAST_BITS /* entry# */
+ sldi 4,4,4 /* entry# * sizeof(FastCacheEntry) */
+ add 5,5,4 /* & VG_(tt_fast)[entry#] */
+ ld 6,0(5) /* .guest */
+ ld 7,8(5) /* .host */
+ cmpd 3,6
+ bne .fast_lookup_failed
+
+ /* Found a match. Jump to .host. */
+ mtctr 7
+ bctr
+
+.fast_lookup_failed:
+ /* stats only */
+ ld 5, .tocent__vgPlain_stats__n_xindir_misses@toc(2)
+ ld 6,0(5)
+ addi 6,6,1
+ std 6,0(5)
+
+ li 6,VG_TRC_INNER_FASTMISS
+ li 7,0
+ b .postamble
+ /*NOTREACHED*/
+
+/* ------ Assisted jump ------ */
.section ".text"
-.align 2
-.globl VG_(run_a_noredir_translation)
-.section ".opd","aw"
-.align 3
-VG_(run_a_noredir_translation):
-.quad .VG_(run_a_noredir_translation),.TOC.@tocbase,0
-.previous
-.type .VG_(run_a_noredir_translation),@function
-.globl .VG_(run_a_noredir_translation)
-.VG_(run_a_noredir_translation):
- /* save callee-save int regs, & lr */
- stdu 1,-512(1)
- std 14,256(1)
- std 15,264(1)
- std 16,272(1)
- std 17,280(1)
- std 18,288(1)
- std 19,296(1)
- std 20,304(1)
- std 21,312(1)
- std 22,320(1)
- std 23,328(1)
- std 24,336(1)
- std 25,344(1)
- std 26,352(1)
- std 27,360(1)
- std 28,368(1)
- std 29,376(1)
- std 30,384(1)
- std 31,392(1)
- mflr 31
- std 31,400(1)
- std 2,408(1) /* also preserve R2, just in case .. */
+ .align 2
+ .globl VG_(disp_cp_xassisted)
+ .section ".opd","aw"
+ .align 3
+VG_(disp_cp_xassisted):
+ .quad .VG_(disp_cp_xassisted),.TOC.@tocbase,0
+ .previous
+ .type .VG_(disp_cp_xassisted),@function
+ .globl .VG_(disp_cp_xassisted)
+.VG_(disp_cp_xassisted):
+ /* r31 contains the TRC */
+ mr 6,31
+ li 7,0
+ b .postamble
- std 3,416(1)
- ld 31,8(3)
- ld 30,0(3)
- mtlr 30
- blrl
+/* ------ Event check failed ------ */
+ .section ".text"
+ .align 2
+ .globl VG_(disp_cp_evcheck_fail)
+ .section ".opd","aw"
+ .align 3
+VG_(disp_cp_evcheck_fail):
+ .quad .VG_(disp_cp_evcheck_fail),.TOC.@tocbase,0
+ .previous
+ .type .VG_(disp_cp_evcheck_fail),@function
+ .globl .VG_(disp_cp_evcheck_fail)
+.VG_(disp_cp_evcheck_fail):
+ li 6,VG_TRC_INNER_COUNTERZERO
+ li 7,0
+ b .postamble
- ld 4,416(1)
- std 3, 16(4)
- std 31,24(4)
-
- ld 14,256(1)
- ld 15,264(1)
- ld 16,272(1)
- ld 17,280(1)
- ld 18,288(1)
- ld 19,296(1)
- ld 20,304(1)
- ld 21,312(1)
- ld 22,320(1)
- ld 23,328(1)
- ld 24,336(1)
- ld 25,344(1)
- ld 26,352(1)
- ld 27,360(1)
- ld 28,368(1)
- ld 29,376(1)
- ld 30,384(1)
- ld 31,400(1)
- mtlr 31
- ld 31,392(1)
- ld 2,408(1) /* also preserve R2, just in case .. */
-
- addi 1,1,512
- blr
-
+
+.size .VG_(disp_run_translations), .-.VG_(disp_run_translations)
/* Let the linker know we don't need an executable stack */
.section .note.GNU-stack,"",@progbits
diff --git a/coregrind/m_scheduler/scheduler.c b/coregrind/m_scheduler/scheduler.c
index 09e2908..6fc5f50 100644
--- a/coregrind/m_scheduler/scheduler.c
+++ b/coregrind/m_scheduler/scheduler.c
@@ -871,8 +871,9 @@
//vg_assert(VG_(threads)[tid].siginfo.si_signo == 0);
/* Set up event counter stuff for the run. */
- tst->arch.vex.host_EvC_COUNTER = *dispatchCtrP;
- tst->arch.vex.host_EvC_FAILADDR = (HWord)&VG_(disp_cp_evcheck_fail);
+ tst->arch.vex.host_EvC_COUNTER = *dispatchCtrP;
+ tst->arch.vex.host_EvC_FAILADDR
+ = (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail) );
if (0) {
vki_sigset_t m;
@@ -917,7 +918,7 @@
vg_assert((Int)tst->arch.vex.host_EvC_COUNTER >= -1);
vg_assert(tst->arch.vex.host_EvC_FAILADDR
- == (HWord)&VG_(disp_cp_evcheck_fail));
+ == (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail)) );
done_this_time = *dispatchCtrP - ((Int)tst->arch.vex.host_EvC_COUNTER + 1);
diff --git a/coregrind/m_translate.c b/coregrind/m_translate.c
index 8c572fb..af67d10 100644
--- a/coregrind/m_translate.c
+++ b/coregrind/m_translate.c
@@ -906,6 +906,7 @@
Int offB_REDIR_SP = offsetof(VexGuestPPC64State,guest_REDIR_SP);
Int offB_REDIR_STACK = offsetof(VexGuestPPC64State,guest_REDIR_STACK);
Int offB_EMWARN = offsetof(VexGuestPPC64State,guest_EMWARN);
+ Int offB_CIA = offsetof(VexGuestPPC64State,guest_CIA);
Bool is64 = True;
IRType ty_Word = Ity_I64;
IROp op_CmpNE = Iop_CmpNE64;
@@ -919,6 +920,7 @@
Int offB_REDIR_SP = offsetof(VexGuestPPC32State,guest_REDIR_SP);
Int offB_REDIR_STACK = offsetof(VexGuestPPC32State,guest_REDIR_STACK);
Int offB_EMWARN = offsetof(VexGuestPPC32State,guest_EMWARN);
+ Int offB_CIA = offsetof(VexGuestPPC32State,guest_CIA);
Bool is64 = False;
IRType ty_Word = Ity_I32;
IROp op_CmpNE = Iop_CmpNE32;
@@ -970,7 +972,8 @@
mkU(0)
),
Ijk_EmFail,
- is64 ? IRConst_U64(0) : IRConst_U32(0)
+ is64 ? IRConst_U64(0) : IRConst_U32(0),
+ offB_CIA
)
);
@@ -997,6 +1000,7 @@
Int offB_REDIR_SP = offsetof(VexGuestPPC64State,guest_REDIR_SP);
Int offB_REDIR_STACK = offsetof(VexGuestPPC64State,guest_REDIR_STACK);
Int offB_EMWARN = offsetof(VexGuestPPC64State,guest_EMWARN);
+ Int offB_CIA = offsetof(VexGuestPPC64State,guest_CIA);
Bool is64 = True;
IRType ty_Word = Ity_I64;
IROp op_CmpNE = Iop_CmpNE64;
@@ -1008,6 +1012,7 @@
Int offB_REDIR_SP = offsetof(VexGuestPPC32State,guest_REDIR_SP);
Int offB_REDIR_STACK = offsetof(VexGuestPPC32State,guest_REDIR_STACK);
Int offB_EMWARN = offsetof(VexGuestPPC32State,guest_EMWARN);
+ Int offB_CIA = offsetof(VexGuestPPC32State,guest_CIA);
Bool is64 = False;
IRType ty_Word = Ity_I32;
IROp op_CmpNE = Iop_CmpNE32;
@@ -1049,7 +1054,8 @@
mkU(0)
),
Ijk_EmFail,
- is64 ? IRConst_U64(0) : IRConst_U32(0)
+ is64 ? IRConst_U64(0) : IRConst_U32(0),
+ offB_CIA
)
);
@@ -1514,57 +1520,20 @@
hassle, because we don't expect them to get used often. So
don't bother. */
if (allow_redirection) {
- vta.disp_cp_chain_me_to_slowEP = (void*) &VG_(disp_cp_chain_me_to_slowEP);
- vta.disp_cp_chain_me_to_fastEP = (void*) &VG_(disp_cp_chain_me_to_fastEP);
- vta.disp_cp_xindir = (void*) &VG_(disp_cp_xindir);
+ vta.disp_cp_chain_me_to_slowEP
+ = VG_(fnptr_to_fnentry)( &VG_(disp_cp_chain_me_to_slowEP) );
+ vta.disp_cp_chain_me_to_fastEP
+ = VG_(fnptr_to_fnentry)( &VG_(disp_cp_chain_me_to_fastEP) );
+ vta.disp_cp_xindir
+ = VG_(fnptr_to_fnentry)( &VG_(disp_cp_xindir) );
} else {
vta.disp_cp_chain_me_to_slowEP = NULL;
vta.disp_cp_chain_me_to_fastEP = NULL;
vta.disp_cp_xindir = NULL;
}
- /* Thins doesn't involve chaining and so is always allowable. */
- vta.disp_cp_xassisted = (void*) &VG_(disp_cp_xassisted);
-
-#if 0
- // FIXME tidy this up and make profiling work again
-# if defined(VGA_x86) || defined(VGA_amd64)
- if (!allow_redirection) {
- /* It's a no-redir translation. Will be run with the
- nonstandard dispatcher VG_(run_a_noredir_translation) and so
- needs a nonstandard return point. */
- vta.dispatch_assisted
- = (void*) &VG_(run_a_noredir_translation__return_point);
- vta.dispatch_unassisted
- = vta.dispatch_assisted;
- }
- else
- if (VG_(clo_profile_flags) > 0) {
- /* normal translation; although we're profiling. */
- vta.dispatch_assisted
- = (void*) &VG_(run_innerloop__dispatch_assisted_profiled);
- vta.dispatch_unassisted
- = (void*) &VG_(run_innerloop__dispatch_unassisted_profiled);
- }
- else {
- /* normal translation and we're not profiling (the normal case) */
- vta.dispatch_assisted
- = (void*) &VG_(run_innerloop__dispatch_assisted_unprofiled);
- vta.dispatch_unassisted
- = (void*) &VG_(run_innerloop__dispatch_unassisted_unprofiled);
- }
-
-# elif defined(VGA_ppc32) || defined(VGA_ppc64) \
- || defined(VGA_arm) || defined(VGA_s390x)
- /* See comment in libvex.h. This target uses a
- return-to-link-register scheme to get back to the dispatcher, so
- both fields are NULL. */
- vta.dispatch_assisted = NULL;
- vta.dispatch_unassisted = NULL;
-
-# else
-# error "Unknown arch"
-# endif
-#endif /* 0 */
+ /* This doesn't involve chaining and so is always allowable. */
+ vta.disp_cp_xassisted
+ = VG_(fnptr_to_fnentry)( &VG_(disp_cp_xassisted) );
/* Sheesh. Finally, actually _do_ the translation! */
tres = LibVEX_Translate ( &vta );
diff --git a/coregrind/m_transtab.c b/coregrind/m_transtab.c
index a8e5eb5..8ae48cb 100644
--- a/coregrind/m_transtab.c
+++ b/coregrind/m_transtab.c
@@ -805,8 +805,10 @@
UChar* place_to_patch
= ((HChar*)tte->tcptr) + ie->from_offs;
UChar* disp_cp_chain_me
- = ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
- : &VG_(disp_cp_chain_me_to_slowEP);
+ = VG_(fnptr_to_fnentry)(
+ ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
+ : &VG_(disp_cp_chain_me_to_slowEP)
+ );
UChar* place_to_jump_to_EXPECTED
= ie->to_fastEP ? to_fastEPaddr : to_slowEPaddr;
diff --git a/coregrind/pub_core_dispatch.h b/coregrind/pub_core_dispatch.h
index 5b61f87..efb5aeb 100644
--- a/coregrind/pub_core_dispatch.h
+++ b/coregrind/pub_core_dispatch.h
@@ -66,13 +66,13 @@
/* We need to know addresses of the continuation-point (cp_) labels so
we can tell VEX what they are. They will get baked into the code
- VEX generates. The UChar is entirely mythical, but we need to
+ VEX generates. The type is entirely mythical, but we need to
state _some_ type, so as to keep gcc happy. */
-UChar VG_(disp_cp_chain_me_to_slowEP);
-UChar VG_(disp_cp_chain_me_to_fastEP);
-UChar VG_(disp_cp_xindir);
-UChar VG_(disp_cp_xassisted);
-UChar VG_(disp_cp_evcheck_fail);
+void VG_(disp_cp_chain_me_to_slowEP)(void);
+void VG_(disp_cp_chain_me_to_fastEP)(void);
+void VG_(disp_cp_xindir)(void);
+void VG_(disp_cp_xassisted)(void);
+void VG_(disp_cp_evcheck_fail)(void);
#endif // __PUB_CORE_DISPATCH_H