Update some stuff related to the syscalls change:
 - restart syscalls which return ERESETARTSYS
 - make code to restart a syscall common
 - keep a process-wide pending signal set to store as-yet
   undelivered signals (only used in 2.4 mode; TODO: siginfo, queues)
 - make execve work a bit better - it is impossible to recover from
   execve failing, so we try to check that it will before running the
   syscall itself (also fixes bug with SuSE 8.2 kernel)


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@1964 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/coregrind/vg_include.h b/coregrind/vg_include.h
index f3cae12..e8f53d3 100644
--- a/coregrind/vg_include.h
+++ b/coregrind/vg_include.h
@@ -1530,6 +1530,7 @@
 
 extern Bool VG_(pre_syscall) ( ThreadId tid );
 extern void VG_(post_syscall)( ThreadId tid );
+extern void VG_(restart_syscall) ( ThreadId tid );
 
 extern Bool VG_(is_kerror) ( Int res );
 
diff --git a/coregrind/vg_mylibc.c b/coregrind/vg_mylibc.c
index 92addf4..34fe6cc 100644
--- a/coregrind/vg_mylibc.c
+++ b/coregrind/vg_mylibc.c
@@ -1209,7 +1209,7 @@
 {
    Int res;
    res = VG_(do_syscall)(__NR_stat, (UInt)file_name, (UInt)buf);
-   return VG_(is_kerror)(res) ? (-1) : 0;
+   return res;			/* return -ve error */
 }
 
 Int VG_(fstat) ( Int fd, struct vki_stat* buf )
diff --git a/coregrind/vg_signals.c b/coregrind/vg_signals.c
index bdbd841..2a68ef2 100644
--- a/coregrind/vg_signals.c
+++ b/coregrind/vg_signals.c
@@ -112,6 +112,12 @@
    thing with signals and LWPs, so we need to do our own. */
 Bool VG_(do_signal_routing) = False;
 
+/* Set of signal which are pending for the whole process.  This is
+   only used when we're doing signal routing, and this is a place to
+   remember pending signals which we can't keep actually pending for
+   some reason. */
+static vki_ksigset_t proc_pending; /* process-wide pending signals */
+
 /* Since we use a couple of RT signals, we need to handle allocating
    the rest for application use. */
 Int VG_(sig_rtmin) = VKI_SIGRTUSERMIN;
@@ -1419,8 +1425,7 @@
 
       if (tst->m_eax == -VKI_ERESTARTSYS) {
 	  if (handler->scss_flags & VKI_SA_RESTART) {
-	     tst->m_eax = tst->syscallno;
-	     tst->m_eip -= 2;		/* sizeof(int $0x80) */
+	     VG_(restart_syscall)(tid);
 	  } else
 	     tst->m_eax = -VKI_EINTR;
       } else {
@@ -1619,38 +1624,17 @@
    if (info->si_code <= VKI_SI_USER) {
       /* 
 	 OK, one of sync signals was sent from user-mode, so try to
-	 deliver it to someone who cares.  We've currently got the
-	 signal blocked because we're in the handler, so some other
-	 thread will pick it up if they want it.  If all the other
-	 threads have this signal blocked, it will remain pending.
-	 wait for .01sec to see if someone picks it up, then eat it if
-	 not (otherwise we will just keep spinning, since we *can't*
-	 block these signals).
+	 deliver it to someone who cares.  Just add it to the
+	 process-wide pending signal set - signal routing will deliver
+	 it to someone eventually.
 
-	 XXX This is crap.  All the proxy LWPs could easily be
-	 blocking this signal transiently (say, waiting for us to
-	 respond to a SigACK), but want it eventually.  Maybe we
-	 should just bite the bullet and scan the per-thread block
-	 sets and decide who to deliver it to.
-       */
-      static const struct vki_timespec ts = { 0, (Int)(.01 * 1000000000) };
-      static const struct vki_timespec zero = { 0, 0 };
-      vki_ksigset_t set;
-      vki_ksiginfo_t si;
-      Bool dropped = False;
-
-      VG_(kkill)(VG_(main_pid), sigNo);
-      VG_(nanosleep)(&ts, NULL);
-      VG_(ksigemptyset)(&set);
-      VG_(ksigaddset)(&set, sigNo);
-      while(VG_(ksigtimedwait)(&set, &si, &zero) == sigNo)
-	 dropped = True;
-
-      if (dropped)
-	 VG_(message)(Vg_UserMsg,
-		      "Dropped pending signal %d (%s) because all threads were blocking it,"
-		      "but we cannot block it forever.",
-		      sigNo, signame(sigNo));
+	 The only other place which touches proc_pending is
+	 VG_(route_signals), and it has signals blocked while doing
+	 so, so there's no race.
+      */
+      VG_(message)(Vg_DebugMsg, 
+		   "adding signal %d to pending set", sigNo);
+      VG_(ksigaddset)(&proc_pending, sigNo);
    } else {
       /* 
 	 A bad signal came from the kernel (indicating an instruction
@@ -1729,7 +1713,6 @@
    static const struct vki_timespec zero = { 0, 0 };
    static ThreadId start_tid = 1;	/* tid to start scanning from */
    vki_ksigset_t set;
-   vki_ksigset_t resend;
    vki_ksiginfo_t si;
    Int sigNo;
 
@@ -1739,23 +1722,29 @@
    if (!VG_(do_signal_routing))
       return;
 
-   VG_(ksigemptyset)(&resend);
-
    /* get the scheduler LWP's signal mask, and use it as the set of
-      signals we're polling for */
-   VG_(ksigprocmask)(VKI_SIG_SETMASK, NULL, &set);
+      signals we're polling for - also block all signals to prevent
+      races */
+   VG_(block_all_host_signals) ( &set );
 
-   while(VG_(ksigtimedwait)(&set, &si, &zero) > 0) {
+   /* grab any pending signals and add them to the pending signal set */
+   while(VG_(ksigtimedwait)(&set, &si, &zero) > 0)
+      VG_(ksigaddset)(&proc_pending, si.si_signo);
+
+   /* transfer signals from the process pending set to a particular
+      thread which has it unblocked */
+   for(sigNo = 0; sigNo < VKI_KNSIG; sigNo++) {
       ThreadId tid;
       ThreadId end_tid;
       Int target = -1;
       
+      if (!VG_(ksigismember)(&proc_pending, sigNo))
+	 continue;
+
       end_tid = start_tid - 1;
       if (end_tid < 0 || end_tid >= VG_N_THREADS)
 	      end_tid = VG_N_THREADS-1;
 
-      sigNo = si.si_signo;
-
       /* look for a suitable thread to deliver it to */
       for(tid = start_tid;
 	  tid != end_tid;
@@ -1773,22 +1762,18 @@
 	 }
       }
       
+      /* found one - deliver it and be done */
       if (target != -1) {
 	 if (VG_(clo_trace_signals))
 	    VG_(message)(Vg_DebugMsg, "Routing signal %d to tid %d",
 			 sigNo, tid);
-	 VG_(proxy_sendsig)(tid, sigNo);
-      } else {
-	 if (VG_(clo_trace_signals))
-	    VG_(message)(Vg_DebugMsg, "Adding signal %d to pending set",
-			 sigNo);
-	 VG_(ksigaddset)(&resend, sigNo);
+	 VG_(proxy_sendsig)(target, sigNo);
+	 VG_(ksigdelset)(&proc_pending, sigNo);
       }
    }
 
-   for(sigNo = 0; sigNo < VKI_KNSIG; sigNo++)
-      if (VG_(ksigismember)(&resend, sigNo))
-	 VG_(ktkill)(VG_(main_pid), sigNo);
+   /* restore signal mask */
+   VG_(restore_all_host_signals) (&set);
 }
 
 /* At startup, copy the process' real signal state to the SCSS.
@@ -1808,6 +1793,9 @@
    */
    VG_(block_all_host_signals)( &saved_procmask );
 
+   /* clear process-wide pending signal set */
+   VG_(ksigemptyset)(&proc_pending);
+
    /* Set the signal mask which the scheduler LWP should maintain from
       now on. */
    set_main_sigmask();
diff --git a/coregrind/vg_syscalls.c b/coregrind/vg_syscalls.c
index 43cdbd0..e0cf791 100644
--- a/coregrind/vg_syscalls.c
+++ b/coregrind/vg_syscalls.c
@@ -1223,9 +1223,35 @@
       char *const envp[]); */
    MAYBE_PRINTF("execve ( %p(%s), %p, %p ) --- NOT CHECKED\n", 
 		arg1, arg1, arg2, arg3);
-   /* Resistance is futile.  Nuke all other threads.  POSIX
-      mandates this. */
-   VG_(nuke_all_threads_except)( tid );
+
+   /* Erk.  If the exec fails, then the following will have made a
+      mess of things which makes it hard for us to continue.  The
+      right thing to do is piece everything together again in
+      POST(execve), but that's hard work.  Instead, we make an effort
+      to check that the execve will work before actually calling
+      exec. */
+   {
+      struct vki_stat st;
+      Int ret = VG_(stat)((Char *)arg1, &st);
+
+      if (ret < 0) {
+	 res = ret;
+	 return;
+      }
+      /* just look for any X bit set
+	 XXX do proper permissions check?
+       */
+      if ((st.st_mode & 0111) == 0) {
+	 res = -VKI_EACCES;
+	 return;
+      }
+   }
+
+   /* Resistance is futile.  Nuke all other threads.  POSIX mandates
+      this. (Really, nuke them all, since the new process will make
+      its own new thread.) */
+   VG_(nuke_all_threads_except)( VG_INVALID_THREADID );
+
    /* Make any binding for LD_PRELOAD disappear, so that child
       processes don't get traced into. */
    if (!VG_(clo_trace_children)) {
@@ -1242,15 +1268,15 @@
       VG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH)(
 	 ld_preload_str, ld_library_path_str );
    }
-}
 
-POST(execve)
-{
-   /* Should we still be alive here?  Don't think so. */
-   /* Actually, above comment is wrong.  execve can fail, just
-      like any other syscall -- typically the file to exec does
-      not exist.  Hence: */
-   vg_assert(VG_(is_kerror)(res));
+   res = VG_(do_syscall)(__NR_execve, arg1, arg2, arg3);
+
+   /* If we got here, then the execve failed.  We've already made too much of a mess
+      of ourselves to continue, so we have to abort. */
+   VG_(message)(Vg_UserMsg, "execve(%p \"%s\", %p, %p) failed, errno %d",
+		arg1, arg1, arg2, arg3, -res);
+   VG_(core_panic)("EXEC FAILED: I can't recover from execve() failing, so I'm dying.\n"
+		   "Add more stringent tests in PRE(execve), or work out how to recover.");   
 }
 
 PRE(access)
@@ -4099,6 +4125,8 @@
 
    SYSB_(modify_ldt,		False),
 
+   SYSB_(execve,		False),
+
 #if SIGNAL_SIMULATION
    SYSBA(sigaltstack,		False),
    SYSBA(rt_sigaction,		False),
@@ -4184,7 +4212,6 @@
    SYSB_(ioperm,		False),
    SYSBA(capget,		False),
    SYSB_(capset,		False),
-   SYSBA(execve,		False),
    SYSB_(access,		False),
    SYSBA(brk,			False),
    SYSB_(chdir,			False),
@@ -4467,12 +4494,33 @@
       VGP_POPCC(VgpSkinSysWrap);
    }
 
+   if (tst->m_eax == -VKI_ERESTARTSYS) {
+      /* Applications never expect to see this, so we should actually
+	 restart the syscall (it means the signal happened before the
+	 syscall made any progress, so we can safely restart it and
+	 pretend the signal happened before the syscall even
+	 started)  */
+      VG_(restart_syscall)(tid);
+   }
+
    tst->status = VgTs_Runnable;	/* runnable again */
    tst->syscallno = -1;
 
    VGP_POPCC(VgpCoreSysWrap);
 }
 
+void VG_(restart_syscall)(ThreadId tid)
+{
+   ThreadState* tst;
+   tst = VG_(get_ThreadState)(tid);
+
+   vg_assert(tst != NULL);
+   vg_assert(tst->status == VgTs_WaitSys);
+   vg_assert(tst->syscallno != -1);
+
+   tst->m_eax = tst->syscallno;
+   tst->m_eip -= 2;		/* sizeof(int $0x80) */
+}
 
 /*--------------------------------------------------------------------*/
 /*--- end                                            vg_syscalls.c ---*/
diff --git a/include/vg_kerneliface.h b/include/vg_kerneliface.h
index a12dfee..43256ea 100644
--- a/include/vg_kerneliface.h
+++ b/include/vg_kerneliface.h
@@ -358,6 +358,7 @@
 #define VKI_ENOMEM          12      /* Out of memory */
 #define VKI_EWOULDBLOCK     VKI_EAGAIN  /* Operation would block */
 #define VKI_EAGAIN          11      /* Try again */
+#define	VKI_EACCES	    13      /* Permission denied */
 #define	VKI_EFAULT          14      /* Bad address */
 #define VKI_EEXIST	    17	    /* File exists */
 #define VKI_EINVAL          22      /* Invalid argument */
diff --git a/include/vg_skin.h b/include/vg_skin.h
index 081bd6c..79ad4aa 100644
--- a/include/vg_skin.h
+++ b/include/vg_skin.h
@@ -435,6 +435,7 @@
 extern Int  VG_(unlink) ( Char* file_name );
 extern Int  VG_(stat)   ( Char* file_name, struct vki_stat* buf );
 extern Int  VG_(fstat)  ( Int   fd,        struct vki_stat* buf );
+extern Int  VG_(dup2)   ( Int oldfd, Int newfd );
 
 extern Char* VG_(getcwd) ( Char* buf, Int size );
 
@@ -514,7 +515,6 @@
 /* other, randomly useful functions */
 extern UInt VG_(read_millisecond_timer) ( void );
 
-
 /*====================================================================*/
 /*=== UCode definition                                             ===*/
 /*====================================================================*/