Fix some bugs in syscall/signal handling:

If the proxyLWP for a thread got a signal just as we were sending the
syscall request to it, then we would end up running the syscall twice.
The fix is to not check the results pipe while sending the syscall request
- wait until we're in a better state for handling signals (the deadlock
the results-read was supposed to avoid cannot actually happen).

Related to that, if we're delivering a signal to a thread, and that thread
is currently waiting for a syscall to complete, make sure we collect the
syscall results before entering the signal handler (otherwise we may end
up bogusly trying to restart the syscall by moving EIP back, even though
it now points to the signal handler rather than the syscall instruction)

This change also adds an assertion to VG_(restart_syscall) to make sure
we were actually restarting a syscall and not just randomly changing EIP
(this found the problem above).

Also, make set/getitimer run in the proxyLWP context, so that they
modify/read the proxyLWP's timers rather than the schedluer LWP's timers.


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@2013 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/coregrind/vg_include.h b/coregrind/vg_include.h
index 9229880..9eea7b5 100644
--- a/coregrind/vg_include.h
+++ b/coregrind/vg_include.h
@@ -1540,6 +1540,7 @@
 extern void VG_(proxy_sigack)   ( ThreadId tid, const vki_ksigset_t *);
 extern void VG_(proxy_abort_syscall) ( ThreadId tid );
 extern void VG_(proxy_waitsig)  ( void );
+extern void VG_(proxy_wait_sys)	(ThreadId tid);
 
 extern void VG_(proxy_shutdown) ( void );	/* shut down the syscall workers */
 extern Int  VG_(proxy_resfd)    ( void );	/* FD something can select on to know 
diff --git a/coregrind/vg_mylibc.c b/coregrind/vg_mylibc.c
index 34fe6cc..07144b2 100644
--- a/coregrind/vg_mylibc.c
+++ b/coregrind/vg_mylibc.c
@@ -1053,11 +1053,10 @@
 static inline ExeContext *get_real_execontext(Addr ret)
 {
    ExeContext *ec;
-   Addr ebp;
+   Addr esp, ebp;
    Addr stacktop;
-   Addr esp = (Addr)&esp;
 
-   asm("movl %%ebp, %0" : "=r" (ebp));
+   asm("movl %%ebp, %0; movl %%esp, %1" : "=r" (ebp), "=r" (esp));
    stacktop = (Addr)&VG_(stack)[VG_STACK_SIZE_W];
    if (esp >= (Addr)&VG_(sigstack)[0] && esp < (Addr)&VG_(sigstack)[VG_STACK_SIZE_W])
       stacktop = (Addr)&VG_(sigstack)[VG_STACK_SIZE_W];
diff --git a/coregrind/vg_proxylwp.c b/coregrind/vg_proxylwp.c
index 6716b0a..9fc470a 100644
--- a/coregrind/vg_proxylwp.c
+++ b/coregrind/vg_proxylwp.c
@@ -558,7 +558,7 @@
 		  XXX how to distunguish between restartable and
 		  non-restartable syscalls?  Does it matter?
 	       */
-	       reply.syscallno = tst->m_eax;
+	       reply.syscallno = tst->syscallno;
 
 	       tst->m_eax = -VKI_ERESTARTSYS;
 	       px->state = PXS_IntReply;
@@ -1181,6 +1181,15 @@
    sys_wait_results(False, 0, PX_BAD);
 }
 
+void VG_(proxy_wait_sys)(ThreadId tid)
+{
+   ThreadState *tst = VG_(get_ThreadState)(tid);
+
+   vg_assert(tst->status == VgTs_WaitSys);
+
+   sys_wait_results(True, tid, PX_RunSyscall);
+}
+
 /* Tell proxy about it's thread's updated signal mask */
 void VG_(proxy_setsigmask)(ThreadId tid)
 {
@@ -1266,20 +1275,33 @@
 
    vg_assert(proxy != NULL);
    vg_assert(proxy->tid == tid);
+   vg_assert(tst->status == VgTs_WaitSys);
+
+   /* Clear the results pipe before we try to write to a proxy to
+      prevent a deadlock (the proxyLWP may be trying to write a result
+      back to the scheduler LWP, and therefore not be reading its
+      input pipe, which would then block the write below).
+
+      XXX I think this can't happen - the pipe has 4k of buffering,
+      and can therefore fit many messages, but we can only have one
+      outstanding - the write below will not block forever.  Fetching
+      results here can cause all kinds of confusion, because we
+      definitely don't want the complexity of trying to deliver a
+      signal right now.
+   */
+   if (0)
+      VG_(proxy_results)();
 
    req.request = PX_RunSyscall;
 
    tst->syscallno = tst->m_eax;
    tst->m_eax = -VKI_ERESTARTSYS;
 
-   /* clear the results pipe before we try to write to a proxy to
-      prevent a deadlock */
-   VG_(proxy_results)();
    res = VG_(write)(proxy->topx, &req, sizeof(req));
 
    if (res != sizeof(req)) {
-      VG_(printf)("sys_issue: write to tid %d failed %d (not %d)\n",
-		  tid, res, sizeof(req));
+      VG_(message)(Vg_DebugMsg, "sys_issue: write to tid %d failed %d (not %d)\n",
+		   tid, res, sizeof(req));
    }
    return 0;
 }
diff --git a/coregrind/vg_signals.c b/coregrind/vg_signals.c
index 2a68ef2..76900d8 100644
--- a/coregrind/vg_signals.c
+++ b/coregrind/vg_signals.c
@@ -1389,7 +1389,6 @@
 {
    Int			sigNo = info->si_signo;
    vki_ksigset_t	handlermask;
-   enum ThreadStatus	status;
    SCSS_Per_Signal	*handler = &vg_scss.scss_per_sig[sigNo];
    ThreadState		*tst = VG_(get_ThreadState)(tid);
 
@@ -1419,6 +1418,15 @@
    if (tst->status == VgTs_WaitSys) {
       vg_assert(tst->syscallno != -1);
 
+      /* OK, the thread was waiting for a syscall to complete.  This
+	 means that the proxy has either not yet processed the
+	 RunSyscall request, or was processing it when the signal
+	 came.  Either way, it is going to give us some syscall
+	 results right now, so wait for them to appear.  This makes
+	 the thread runnable again, so we're in the right state to run
+	 the handler, and resume the syscall when we're done. */
+      VG_(proxy_wait_sys)(tid);
+
       if (0)
 	 VG_(printf)("signal %d interrupting syscall %d\n",
 		     sigNo, tst->syscallno);
@@ -1459,23 +1467,17 @@
 	 VG_(handle_SCSS_change)( False /* lazy update */ );
       }
    
-      status = tst->status;
-
-      switch(status) {
+      switch(tst->status) {
       case VgTs_Runnable:
 	 break;
 
       case VgTs_WaitSys:
-	 /* don't change status yet, because we're about to get a
-	    message telling us the syscall was interrupted */
-	 break;
-
       case VgTs_WaitJoiner:
       case VgTs_WaitJoinee:
       case VgTs_WaitMX:
       case VgTs_WaitCV:
       case VgTs_Sleeping:
-	 status = VgTs_Runnable;
+	 tst->status = VgTs_Runnable;
 	 break;
 
       case VgTs_Empty:
@@ -1483,8 +1485,6 @@
 	 break;
       }
 
-      tst->status = status;
-
       /* handler gets the union of the signal's mask and the thread's
 	 mask */
       handlermask = handler->scss_mask;
diff --git a/coregrind/vg_syscalls.c b/coregrind/vg_syscalls.c
index 566ed0d..ab489a5 100644
--- a/coregrind/vg_syscalls.c
+++ b/coregrind/vg_syscalls.c
@@ -4173,7 +4173,6 @@
    SYSBA(getpmsg,		True),
    SYSB_(putpmsg,		True),
 
-   SYSBA(getitimer,		False),
    SYSBA(syslog,		True),
    SYSB_(personality,		False),
    SYSB_(chroot,		False),
@@ -4291,7 +4290,6 @@
    SYSBA(sched_getparam,	False),	/* ??? */
    SYSB_(sched_yield,		False),	/* ??? */
    SYSB_(select,		True),
-   SYSBA(setitimer,		False),
    SYSB_(setfsgid32,		False),
    SYSB_(setgid32,		False),
    SYSB_(setgid,		False),
@@ -4332,9 +4330,12 @@
    SYSB_(rt_sigsuspend,		True),
    SYSBA(rt_sigtimedwait,	True),
    SYSBA(rt_sigqueueinfo,	False),
+
    SYSBA(sigpending,		True), /* not blocking, but must run in LWP context */
    SYSBA(rt_sigpending,		True), /* not blocking, but must run in LWP context */
    SYSB_(alarm,			True), /* not blocking, but must run in LWP context */
+   SYSBA(setitimer,		True), /* not blocking, but must run in LWP context */
+   SYSBA(getitimer,		True), /* not blocking, but must run in LWP context */
 
 #if !SIGNAL_SIMULATION
    SYSBA(sigaltstack,		False),
@@ -4524,6 +4525,22 @@
 
    tst->m_eax = tst->syscallno;
    tst->m_eip -= 2;		/* sizeof(int $0x80) */
+
+   /* Make sure our caller is actually sane, and we're really backing
+      back over a syscall.
+
+      int $0x80 == CD 80 
+   */
+   {
+      UChar *p = (UChar *)tst->m_eip;
+      
+      if (p[0] != 0xcd || p[1] != 0x80)
+	 VG_(message)(Vg_DebugMsg, 
+		      "?! restarting over syscall at %p %02x %02x\n",
+		      tst->m_eip, p[0], p[1]);
+
+      vg_assert(p[0] == 0xcd && p[1] == 0x80);
+   }
 }
 
 /*--------------------------------------------------------------------*/
diff --git a/docs/proxylwp.sxw b/docs/proxylwp.sxw
index 89c9e32..38ceeaf 100644
--- a/docs/proxylwp.sxw
+++ b/docs/proxylwp.sxw
Binary files differ
diff --git a/include/valgrind.h b/include/valgrind.h
index 047d6fc..15399df 100644
--- a/include/valgrind.h
+++ b/include/valgrind.h
@@ -194,9 +194,11 @@
 
 #ifndef NVALGRIND
 
+int VALGRIND_PRINTF(const char *format, ...)
+   __attribute__((format(__printf__, 1, 2)));
 __attribute__((weak))
 int
-VALGRIND_PRINTF(char *format, ...)
+VALGRIND_PRINTF(const char *format, ...)
 {
    unsigned int _qzz_res;
    va_list vargs;
@@ -207,9 +209,11 @@
    return _qzz_res;
 }
 
+int VALGRIND_PRINTF_BACKTRACE(const char *format, ...)
+   __attribute__((format(__printf__, 1, 2)));
 __attribute__((weak))
 int
-VALGRIND_PRINTF_BACKTRACE(char *format, ...)
+VALGRIND_PRINTF_BACKTRACE(const char *format, ...)
 {
    unsigned int _qzz_res;
    va_list vargs;
diff --git a/include/vg_kerneliface.h b/include/vg_kerneliface.h
index 43256ea..fcfc71d 100644
--- a/include/vg_kerneliface.h
+++ b/include/vg_kerneliface.h
@@ -365,9 +365,7 @@
 #define VKI_EMFILE	    24	    /* Too many open files */
 #define VKI_ENOSYS          38      /* Function not implemented */
 
-#define VKI_ERESTARTSYS	    512
-#define VKI_ERESTARTNOINTR  513
-#define VKI_ERESTARTNOHAND  514
+#define VKI_ERESTARTSYS	    512	    /* Restart the syscall */
 
 
 /* Gawd ... hack ... */