jmp_with_stack is a hack which makes no sense on archs which pass args
in registers.  Replace it (for amd64) with something more disciplined:
call_on_new_stack_0_0 and call_on_new_stack_0_1.



git-svn-id: svn://svn.valgrind.org/valgrind/trunk@3401 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/coregrind/amd64-linux/syscalls.c b/coregrind/amd64-linux/syscalls.c
index b558a23..9ed1eb4 100644
--- a/coregrind/amd64-linux/syscalls.c
+++ b/coregrind/amd64-linux/syscalls.c
@@ -247,11 +247,18 @@
 {
    ULong *rsp = allocstack(tid);
 
+   VG_(printf)("m-t-r: %d\n", (int)tid);
    vg_assert(tid == VG_(master_tid));
 
-   VG_(threads)[tid].arch.vex.guest_RDI = (ULong)tid; /* set arg */
-   *--rsp = 0;			/* bogus return address */
-   jmp_with_stack((void (*)(void))VGA_(thread_wrapper), (Addr)rsp);
+   call_on_new_stack_0_1( 
+      (Addr)rsp,             /* stack */
+      0,                     /*bogus return address*/
+      VGA_(thread_wrapper),  /* fn to call */
+      (Word)tid              /* arg to give it */
+   );
+
+   /*NOTREACHED*/
+   vg_assert(0);
 }
 
 
diff --git a/coregrind/amd64/jmp_with_stack.c b/coregrind/amd64/jmp_with_stack.c
index 87ddb8f..dacf37a 100644
--- a/coregrind/amd64/jmp_with_stack.c
+++ b/coregrind/amd64/jmp_with_stack.c
@@ -53,3 +53,77 @@
    for(;;)
       asm volatile("ud2");
 } 
+
+
+
+#define ZERO_ALL_INT_REGS \
+   "   movq $0, %rax\n"  \
+   "   movq $0, %rbx\n"  \
+   "   movq $0, %rcx\n"  \
+   "   movq $0, %rdx\n"  \
+   "   movq $0, %rsi\n"  \
+   "   movq $0, %rdi\n"  \
+   "   movq $0, %rbp\n"  \
+   "   movq $0, %r8\n"   \
+   "   movq $0, %r9\n"   \
+   "   movq $0, %r10\n"  \
+   "   movq $0, %r11\n"  \
+   "   movq $0, %r12\n"  \
+   "   movq $0, %r13\n"  \
+   "   movq $0, %r14\n"  \
+   "   movq $0, %r15\n"
+
+/* Call f(), but first switch stacks, using 'stack' as the new stack,
+   and use 'retaddr' as f's return-to address.  Also, clear all the
+   integer registers before entering f. */
+/*
+__attribute__((noreturn))
+void call_on_new_stack_0_0 ( Addr stack,
+			     Addr retaddr,
+			     void (*f)(void) );
+  %rdi == stack
+  %rsi == retaddr
+  %rdx == f
+*/
+asm(
+".global call_on_new_stack_0_0\n"
+"call_on_new_stack_0_0:\n"
+"   movq   %rdi, %rsp\n"  /* set stack */
+"   pushq  %rsi\n"        /* retaddr to stack */
+"   pushq  %rdx\n"        /* f to stack*/
+    ZERO_ALL_INT_REGS
+"   ret\n"                /* jump to f */
+"   ud2\n"                /* should never get here */
+);
+
+
+
+/* Call f(arg1), but first switch stacks, using 'stack' as the new
+   stack, and use 'retaddr' as f's return-to address.  Also, clear all
+   the integer registers before entering f.*/
+/*
+__attribute__((noreturn))
+void call_on_new_stack_0_1 ( Addr stack,
+			     Addr retaddr,
+			     void (*f)(Word),
+                             Word arg1 );
+   %rdi == stack
+   %rsi == retaddr
+   %rdx == f
+   %rcx == arg1
+*/
+asm(
+".global call_on_new_stack_0_1\n"
+"call_on_new_stack_0_1:\n"
+"   movq   %rdi, %rsp\n"  /* set stack */
+"   pushq  %rsi\n"        /* retaddr to stack */
+"   pushq  %rdx\n"        /* f to stack*/
+"   pushq  %rcx\n"        /* arg1 to stack*/
+    ZERO_ALL_INT_REGS
+"   popq   %rdi\n"        /* arg1 to correct arg reg */
+"   ret\n"                /* jump to f */
+"   ud2\n"                /* should never get here */
+);
+
+
+#undef ZERO_ALL_INT_REGS
diff --git a/coregrind/core.h b/coregrind/core.h
index 0d23d27..fc4cec2 100644
--- a/coregrind/core.h
+++ b/coregrind/core.h
@@ -1686,7 +1686,7 @@
 extern void VGA_(os_state_init)(ThreadState *);
 
 // Run a thread from beginning to end.  Does not return if tid == VG_(master_tid).
-void VGA_(thread_wrapper)(ThreadId tid);
+void VGA_(thread_wrapper)(Word /*ThreadId*/ tid);
 
 // Like VGA_(thread_wrapper), but it allocates a stack before calling
 // to VGA_(thread_wrapper) on that stack, as if it had been set up by
diff --git a/coregrind/linux/core_os.c b/coregrind/linux/core_os.c
index e679573..f71d331 100644
--- a/coregrind/linux/core_os.c
+++ b/coregrind/linux/core_os.c
@@ -15,10 +15,11 @@
 }
 
 /* Run a thread from beginning to end. Does not return. */
-void VGA_(thread_wrapper)(ThreadId tid)
+void VGA_(thread_wrapper)(Word /*ThreadId*/ tidW)
 {
    VgSchedReturnCode ret;
-   ThreadState *tst = VG_(get_ThreadState)(tid);
+   ThreadId     tid = (ThreadId)tidW;
+   ThreadState* tst = VG_(get_ThreadState)(tid);
 
    vg_assert(tst->status == VgTs_Init);
 
diff --git a/coregrind/ume.h b/coregrind/ume.h
index 25509d6..e91f030 100644
--- a/coregrind/ume.h
+++ b/coregrind/ume.h
@@ -54,6 +54,27 @@
 __attribute__((noreturn))
 void jmp_with_stack(void (*eip)(void), Addr sp);
 
+
+/* Call f(), but first switch stacks, using 'stack' as the new stack,
+   and use 'retaddr' as f's return-to address.  Also, clear all the
+   integer registers before entering f. */
+extern
+__attribute__((noreturn))
+void call_on_new_stack_0_0 ( Addr stack,
+			     Addr retaddr,
+			     void (*f)(void) );
+
+/* Call f(arg1), but first switch stacks, using 'stack' as the new
+   stack, and use 'retaddr' as f's return-to address.  Also, clear all
+   the integer registers before entering f.*/
+extern
+__attribute__((noreturn))
+void call_on_new_stack_0_1 ( Addr stack,
+			     Addr retaddr,
+			     void (*f)(Word),
+                             Word arg1 );
+
+
 /*------------------------------------------------------------*/
 /*--- Loading ELF files                                    ---*/
 /*------------------------------------------------------------*/