fixed up ppc64 assembly with .opd sections
do_syscall_for_client_WRK() needed a bigger stack to avoid the linkage area.
always use dot_prefix for label calls
not wrapping assembly with
.section ".text"
...
.previous
- ppc64 doesn't like it... seems we can't 'stack' more than one section to pop off with .previous ?
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@5405 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/coregrind/m_libcassert.c b/coregrind/m_libcassert.c
index 773b601..f5e4a17 100644
--- a/coregrind/m_libcassert.c
+++ b/coregrind/m_libcassert.c
@@ -78,8 +78,8 @@
#elif defined(VGP_ppc64_linux)
# define GET_REAL_PC_SP_AND_FP(pc, sp, fp) \
asm("mflr 0;" /* r0 = lr */ \
- "bl m_libcassert_get_ip;" /* lr = pc */ \
- "m_libcassert_get_ip:\n" \
+ "bl .m_libcassert_get_ip;" /* lr = pc */ \
+ ".m_libcassert_get_ip:\n" \
"mflr %0;" \
"mtlr 0;" /* restore lr */ \
"mr %1,1;" \
diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c
index 403b8fd..a8987e7 100644
--- a/coregrind/m_machine.c
+++ b/coregrind/m_machine.c
@@ -373,7 +373,7 @@
/* VG_(printf)("FP %d VMX %d\n", (Int)have_fp, (Int)have_vmx); */
/* We can only support 3 cases, not 4 (vmx but no fp). So make
- fp a prerequisite for vmx. */
+ fp a prerequisite for vmx. */
if (have_vmx && !have_fp)
have_vmx = False;
diff --git a/coregrind/m_main.c b/coregrind/m_main.c
index 97ba2d9..0bb7161 100644
--- a/coregrind/m_main.c
+++ b/coregrind/m_main.c
@@ -2850,9 +2850,9 @@
);
#elif defined(VGP_ppc64_linux)
asm("\n"
- ".text\n"
/* PPC64 ELF ABI says '_start' points to a function descriptor.
So we must have one, and that is what goes into the .opd section. */
+ "\t.align 2\n"
"\t.global _start\n"
"\t.section \".opd\",\"aw\"\n"
"\t.align 3\n"
@@ -2886,7 +2886,6 @@
"\tbl ._start_in_C\n"
"\tnop\n"
"\ttrap\n"
- ".previous\n"
);
#else
#error "_start: needs implementation on this platform"
diff --git a/coregrind/m_signals.c b/coregrind/m_signals.c
index e920271..a9d060c 100644
--- a/coregrind/m_signals.c
+++ b/coregrind/m_signals.c
@@ -477,11 +477,18 @@
".previous\n"
#elif defined(VGP_ppc64_linux)
# define _MYSIG(name) \
- ".text\n" \
+ ".align 2\n" \
+ ".globl my_sigreturn\n" \
+ ".section \".opd\",\"aw\"\n" \
+ ".align 3\n" \
"my_sigreturn:\n" \
+ ".quad .my_sigreturn,.TOC.@tocbase,0\n" \
+ ".previous\n" \
+ ".type .my_sigreturn,@function\n" \
+ ".globl .my_sigreturn\n" \
+ ".my_sigreturn:\n" \
" li 0, " #name "\n" \
- " sc\n" \
- ".previous\n"
+ " sc\n"
#else
# error Unknown platform
#endif
diff --git a/coregrind/m_syscall.c b/coregrind/m_syscall.c
index ae28f49..db95656 100644
--- a/coregrind/m_syscall.c
+++ b/coregrind/m_syscall.c
@@ -218,7 +218,15 @@
bottom but of [1]. */
extern void do_syscall_WRK ( ULong* argblock );
asm(
-".text\n"
+".align 2\n"
+".globl do_syscall_WRK\n"
+".section \".opd\",\"aw\"\n"
+".align 3\n"
+"do_syscall_WRK:\n"
+".quad .do_syscall_WRK,.TOC.@tocbase,0\n"
+".previous\n"
+".type .do_syscall_WRK,@function\n"
+".globl .do_syscall_WRK\n"
".do_syscall_WRK:\n"
" std 3,-16(1)\n" /* stash arg */
" ld 8, 48(3)\n" /* sc arg 6 */
@@ -236,7 +244,6 @@
" andi. 3,3,1\n"
" std 3,8(5)\n" /* argblock[1] = cr0.s0 & 1 */
" blr\n"
-".previous\n"
);
#else
# error Unknown platform
diff --git a/coregrind/m_syswrap/syscall-ppc64-linux.S b/coregrind/m_syswrap/syscall-ppc64-linux.S
index 3d8134f..0a31c24 100644
--- a/coregrind/m_syswrap/syscall-ppc64-linux.S
+++ b/coregrind/m_syswrap/syscall-ppc64-linux.S
@@ -70,14 +70,22 @@
/* from vki_arch.h */
#define VKI_SIG_SETMASK 2
+.align 2
+.globl ML_(do_syscall_for_client_WRK)
+.section ".opd","aw"
+.align 3
+ML_(do_syscall_for_client_WRK):
+.quad .ML_(do_syscall_for_client_WRK),.TOC.@tocbase,0
+.previous
+.type .ML_(do_syscall_for_client_WRK),@function
.globl .ML_(do_syscall_for_client_WRK)
.ML_(do_syscall_for_client_WRK):
/* make a stack frame */
- stdu 1,-64(1)
- std 31,56(1)
- std 30,48(1)
- std 29,40(1)
- std 28,32(1)
+ stdu 1,-80(1)
+ std 31,72(1)
+ std 30,64(1)
+ std 29,56(1)
+ std 28,48(1)
mr 31,3 /* syscall number */
mr 30,4 /* guest_state */
mr 29,6 /* postmask */
@@ -122,11 +130,11 @@
/* now safe from signals */
/* pop off stack frame */
-5: ld 28,32(1)
- ld 29,40(1)
- ld 30,48(1)
- ld 31,56(1)
- addi 1,1,64
+5: ld 28,48(1)
+ ld 29,56(1)
+ ld 30,64(1)
+ ld 31,72(1)
+ addi 1,1,80
blr
/* failure: return -ve error code */
@@ -149,8 +157,7 @@
ML_(blksys_committed): .long 4b
ML_(blksys_finished): .long 5b
-.previous
-
+
/* Let the linker know we don't need an executable stack */
.section .note.GNU-stack,"",@progbits
diff --git a/coregrind/m_syswrap/syswrap-ppc64-linux.c b/coregrind/m_syswrap/syswrap-ppc64-linux.c
index 35c068d..ef75719 100644
--- a/coregrind/m_syswrap/syswrap-ppc64-linux.c
+++ b/coregrind/m_syswrap/syswrap-ppc64-linux.c
@@ -74,7 +74,7 @@
address, the second word is the TOC ptr (r2), and the third word is
the static chain value. */
asm(
-".text\n"
+" .align 2\n"
" .globl vgModuleLocal_call_on_new_stack_0_1\n"
" .section \".opd\",\"aw\"\n"
" .align 3\n"
@@ -122,7 +122,6 @@
" mtcr 0\n\t" // CAB: Need this?
" bctr\n\t" // jump to dst
" trap\n" // should never get here
-".previous\n"
);
@@ -166,7 +165,15 @@
Int* parent_tid,
void/*vki_modify_ldt_t*/ * );
asm(
-".text\n"
+" .align 2\n"
+" .globl do_syscall_clone_ppc64_linux\n"
+" .section \".opd\",\"aw\"\n"
+" .align 3\n"
+"do_syscall_clone_ppc64_linux:\n"
+" .quad .do_syscall_clone_ppc64_linux,.TOC.@tocbase,0\n"
+" .previous\n"
+" .type .do_syscall_clone_ppc64_linux,@function\n"
+" .globl .do_syscall_clone_ppc64_linux\n"
".do_syscall_clone_ppc64_linux:\n"
" stdu 1,-64(1)\n"
" std 29,40(1)\n"
@@ -229,7 +236,6 @@
" ld 31,56(1)\n"
" addi 1,1,64\n"
" blr\n"
-".previous\n"
);
#undef __NR_CLONE
diff --git a/coregrind/m_trampoline.S b/coregrind/m_trampoline.S
index 8423fed..1da47dd 100644
--- a/coregrind/m_trampoline.S
+++ b/coregrind/m_trampoline.S
@@ -296,11 +296,28 @@
/* a leading page of unexecutable code */
UD2_PAGE
+.align 2
.global VG_(trampoline_stuff_start)
+.section ".opd","aw"
+.align 3
VG_(trampoline_stuff_start):
+.quad .VG_(trampoline_stuff_start),.TOC.@tocbase,0
+.previous
+.type .VG_(trampoline_stuff_start),@function
+.global .VG_(trampoline_stuff_start)
+.VG_(trampoline_stuff_start):
+
+.align 2
.global VG_(trampoline_stuff_end)
+.section ".opd","aw"
+.align 3
VG_(trampoline_stuff_end):
+.quad .VG_(trampoline_stuff_end),.TOC.@tocbase,0
+.previous
+.type .VG_(trampoline_stuff_end),@function
+.global .VG_(trampoline_stuff_end)
+.VG_(trampoline_stuff_end):
# undef UD2_16
# undef UD2_64
diff --git a/coregrind/vki_unistd-ppc64-linux.h b/coregrind/vki_unistd-ppc64-linux.h
index 17f640c..ffd5397 100644
--- a/coregrind/vki_unistd-ppc64-linux.h
+++ b/coregrind/vki_unistd-ppc64-linux.h
@@ -309,4 +309,4 @@
#define __NR_inotify_rm_watch 277
-#endif /* __VKI_UNISTD_PPC32_LINUX_H */
+#endif /* __VKI_UNISTD_PPC64_LINUX_H */
diff --git a/docs/internals/performance.txt b/docs/internals/performance.txt
index ddeeda6..ff2075c 100644
--- a/docs/internals/performance.txt
+++ b/docs/internals/performance.txt
@@ -16,7 +16,7 @@
Saved 1--3% on a few programs.
- r5345,r5346,r5352: Julian improved the dispatcher so that x86 and
AMD64 use jumps instead of call/return for calling translations.
- Also, on x86, amd64 and ppc32, --profile-flags style profiling was
+ Also, on x86, amd64, ppc32 and ppc64, --profile-flags style profiling was
removed from the despatch loop unless --profile-flags is being used.
Improved Nulgrind performance typically by 10--20%, and Memcheck
performance typically by 2--20%.