A complete reworking of Valgrind's handling of system calls and signals,
with the aim of making it more robust, more correct and perhaps faster.

This patch removes the need to poll blocking syscalls, by adding a proxy
LWP for each application thread.  This LWP is a kernel thread whose job
is to run all (potentially) blocking syscalls, and also to handle signals.

This allows the kernel to do more of the work of dealing with signals,
so on kernels which do this properly (2.6), Valgrind's behavious is a
lot more posix compliant.  On base 2.4 kernels, we emulate some of the
missing 2.6 functionality.


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@1918 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/coregrind/Makefile.am b/coregrind/Makefile.am
index 50a2033..6582ac2 100644
--- a/coregrind/Makefile.am
+++ b/coregrind/Makefile.am
@@ -4,8 +4,8 @@
 add_includes = -I$(srcdir)/demangle -I$(top_srcdir)/include
 
 AM_CPPFLAGS = $(add_includes) -DVG_LIBDIR="\"$(libdir)"\"
-AM_CFLAGS = $(WERROR) -Winline -Wall -Wshadow -O -fomit-frame-pointer \
-		@PREFERRED_STACK_BOUNDARY@ -g
+AM_CFLAGS = $(WERROR) -Winline -Wall -Wshadow -O -fno-omit-frame-pointer \
+		@PREFERRED_STACK_BOUNDARY@ -g -fpic
 AM_CCASFLAGS = $(add_includes)
 
 valdir = $(libdir)/valgrind
@@ -19,7 +19,7 @@
 	valgrinq.so \
 	libpthread.so
 
-EXTRA_DIST = vg_libpthread.vs
+EXTRA_DIST = vg_libpthread.vs valgrind.vs
 
 libpthread_so_SOURCES = \
 	vg_libpthread.c \
@@ -53,6 +53,7 @@
 	vg_mylibc.c \
 	vg_needs.c \
 	vg_procselfmaps.c \
+	vg_proxylwp.c \
 	vg_dummy_profile.c \
 	vg_signals.c \
 	vg_startup.S \
@@ -63,7 +64,11 @@
 	vg_translate.c \
 	vg_transtab.c \
 	vg_ldt.c
-valgrind_so_LDFLAGS = -Wl,-z,initfirst -shared
+valgrind_so_DEPENDENCIES = $(srcdir)/valgrind.vs
+valgrind_so_LDFLAGS = \
+	-Wl,-z,initfirst -shared \
+	-Wl,-version-script $(srcdir)/valgrind.vs 
+
 valgrind_so_LDADD = \
 	demangle/cp-demangle.o \
 	demangle/cplus-dem.o \
diff --git a/coregrind/arch/x86-linux/vg_libpthread.c b/coregrind/arch/x86-linux/vg_libpthread.c
index ee194fc..0d0d2b5 100644
--- a/coregrind/arch/x86-linux/vg_libpthread.c
+++ b/coregrind/arch/x86-linux/vg_libpthread.c
@@ -74,24 +74,19 @@
 #include <stdio.h>
 
 
+# define strong_alias(name, aliasname) \
+  extern __typeof (name) aliasname __attribute__ ((alias (#name)));
+
+# define weak_alias(name, aliasname) \
+  extern __typeof (name) aliasname __attribute__ ((weak, alias (#name)));
+
+
 /* ---------------------------------------------------------------------
    Forwardses.
    ------------------------------------------------------------------ */
 
 #define WEAK	__attribute__((weak))
 
-
-static
-int my_do_syscall1 ( int syscallno, int arg1 );
-
-static
-int my_do_syscall2 ( int syscallno, 
-                     int arg1, int arg2 );
-
-static
-int my_do_syscall3 ( int syscallno, 
-                     int arg1, int arg2, int arg3 );
-
 static
 __inline__
 int is_kerror ( int res )
@@ -141,7 +136,7 @@
 static
 void my_exit ( int arg )
 {
-   my_do_syscall1(__NR_exit, arg);
+   VG_(do_syscall)(__NR_exit, arg);
    /*NOTREACHED*/
 }
 
@@ -149,7 +144,7 @@
 static
 void my_write ( int fd, const void *buf, int count )
 {
-   my_do_syscall3(__NR_write, fd, (int)buf, count );
+   VG_(do_syscall)(__NR_write, fd, (int)buf, count );
 }
 */
 
@@ -1208,7 +1203,7 @@
    return res;
 }
 
-static __inline__
+static
 void __my_pthread_testcancel(void)
 {
    int res;
@@ -1272,10 +1267,6 @@
                return EINVAL;
    }
 
-   /* Crude check */
-   if (newmask == NULL)
-      return EFAULT;
-
    VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
                            VG_USERREQ__PTHREAD_SIGMASK,
                            how, newmask, oldmask, 0);
@@ -1284,17 +1275,20 @@
    return res == 0 ? 0 : EFAULT;
 }
 
-
 int sigwait ( const sigset_t* set, int* sig )
 {
    int res;
-   ensure_valgrind("sigwait");
+   vki_ksiginfo_t si;
+   
+   __my_pthread_testcancel();
+
    /* As with pthread_sigmask we deliberately confuse sigset_t with
       vki_ksigset_t. */
-   VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
-                           VG_USERREQ__SIGWAIT,
-                           set, sig, 0, 0);
-   return res;
+   si.si_signo = 0;
+   res = VG_(ksigtimedwait)((const vki_ksigset_t *)set, &si, NULL);
+   *sig = si.si_signo;
+
+   return 0;			/* always returns 0 */
 }
 
 
@@ -1324,40 +1318,6 @@
 }
 
 
-int pause ( void )
-{
-   unsigned int n_orig, n_now;
-   struct vki_timespec nanosleep_interval;
-   ensure_valgrind("pause");
-
-   /* This is surely a cancellation point. */
-   __my_pthread_testcancel();
-
-   VALGRIND_MAGIC_SEQUENCE(n_orig, 0xFFFFFFFF /* default */,
-                           VG_USERREQ__GET_N_SIGS_RETURNED, 
-                           0, 0, 0, 0);
-   my_assert(n_orig != 0xFFFFFFFF);
-
-   while (1) {
-      VALGRIND_MAGIC_SEQUENCE(n_now, 0xFFFFFFFF /* default */,
-                              VG_USERREQ__GET_N_SIGS_RETURNED, 
-                              0, 0, 0, 0);
-      my_assert(n_now != 0xFFFFFFFF);
-      my_assert(n_now >= n_orig);
-      if (n_now != n_orig) break;
-
-      nanosleep_interval.tv_sec  = 0;
-      nanosleep_interval.tv_nsec = 12 * 1000 * 1000; /* 12 milliseconds */
-      /* It's critical here that valgrind's nanosleep implementation
-         is nonblocking. */
-      (void)my_do_syscall2(__NR_nanosleep, 
-                           (int)(&nanosleep_interval), (int)NULL);
-   }
-
-   *(__errno_location()) = EINTR;
-   return -1;
-}
-
 
 /* ---------------------------------------------------
    THREAD-SPECIFICs
@@ -1433,15 +1393,16 @@
    VALGRIND_MAGIC_SEQUENCE(res, 1 /* default */,
                            VG_USERREQ__PTHREAD_KEY_CREATE,
                            key, destr_function, 0, 0);
-   my_assert(res == 0);
-
-   /* POSIX sez: "Upon key creation, the value NULL shall be
-      associated with the new key in all active threads." */
-   for (i = 0; i < VG_N_THREADS; i++) {
-      specifics_ptr = get_or_allocate_specifics_ptr(i);
-      /* we get NULL if i is an invalid thread. */
-      if (specifics_ptr != NULL)
-         specifics_ptr[*key] = NULL;
+   
+   if (res == 0) {
+      /* POSIX sez: "Upon key creation, the value NULL shall be
+	 associated with the new key in all active threads." */
+      for (i = 0; i < VG_N_THREADS; i++) {
+	 specifics_ptr = get_or_allocate_specifics_ptr(i);
+	 /* we get NULL if i is an invalid thread. */
+	 if (specifics_ptr != NULL)
+	    specifics_ptr[*key] = NULL;
+      }
    }
 
    return res;
@@ -1525,7 +1486,6 @@
 {
    int res;
    int done;
-   ensure_valgrind("pthread_once");
 
 #  define TAKE_LOCK                                   \
       res = __pthread_mutex_lock(&once_masterlock);   \
@@ -1535,6 +1495,14 @@
       res = __pthread_mutex_unlock(&once_masterlock); \
       my_assert(res == 0);
 
+   void cleanup(void *v) {
+      TAKE_LOCK;
+      *once_control = P_ONCE_NOT_DONE;
+      RELEASE_LOCK;
+   }
+
+   ensure_valgrind("pthread_once");
+
    /* Grab the lock transiently, so we can safely see what state this
       once_control is in. */
 
@@ -1546,10 +1514,12 @@
  	 /* Not started.  Change state to indicate running, drop the
 	    lock and run.  */
          *once_control = P_ONCE_RUNNING;
+	 _pthread_cleanup_push(NULL, cleanup, NULL);
 	 RELEASE_LOCK;
          init_routine();
          /* re-take the lock, and set state to indicate done. */
 	 TAKE_LOCK;
+	 _pthread_cleanup_pop(NULL, False);
          *once_control = P_ONCE_COMPLETED;
 	 RELEASE_LOCK;
 	 break;
@@ -1891,6 +1861,15 @@
 #  endif
 }
 
+extern 
+int __libc_accept(int fd, struct sockaddr *addr, socklen_t *len);
+
+WEAK int __accept(int fd, struct sockaddr *addr, socklen_t *len)
+{
+   __my_pthread_testcancel();
+   return __libc_accept(fd, addr, len);
+}
+strong_alias(__accept, accept);
 
 extern
 int  __libc_connect(int  sockfd,  
@@ -1935,149 +1914,12 @@
    return __libc_read(fd, buf, count);
 }
 
-/*
- * Ugh, this is horrible but here goes:
- *
- * Open of a named pipe (fifo file) can block.  In a threaded program,
- * this means that the whole thing can block.  We therefore need to
- * make the open appear to block to the caller, but still keep polling
- * for everyone else.
- *
- * There are four cases:
- *
- * - the caller asked for O_NONBLOCK.  The easy one: we just do it.
- *
- * - the caller asked for a blocking O_RDONLY open.  We open it with
- *   O_NONBLOCK and then use poll to wait for it to become ready.
- *
- * - the caller asked for a blocking O_WRONLY open.  Unfortunately, this
- *   will fail with ENXIO when we make it non-blocking.  Doubly
- *   unfortunate is that we can only rely on these semantics if it is
- *   actually a fifo file; the hack is that if we see that it is a
- *   O_WRONLY open and we get ENXIO, then stat the path and see if it
- *   actually is a fifo.  This is racy, but it is the best we can do.
- *   If it is a fifo, then keep trying the open until it works; if not
- *   just return the error.
- *
- * - the caller asked for a blocking O_RDWR open.  Well, under Linux,
- *   this never blocks, so we just clear the non-blocking flag and
- *   return.
- *
- * This code assumes that for whatever we open, O_NONBLOCK followed by
- * a fcntl clearing O_NONBLOCK is the same as opening without
- * O_NONBLOCK.  Also assumes that stat and fstat have no side-effects.
- *
- * XXX Should probably put in special cases for some devices as well,
- * like serial ports.  Unfortunately they don't work like fifos, so
- * this logic will become even more tortured.  Wait until we really
- * need it.
- */ 
-static int _open(const char *pathname, int flags, mode_t mode,
-	         int (*openp)(const char *, int, mode_t))
-{
-   int fd;
-   struct stat st;
-   struct vki_timespec nanosleep_interval;
-   int saved_errno;
-
-   __my_pthread_testcancel();
-
-   /* Assume we can only get O_RDONLY, O_WRONLY or O_RDWR */
-   my_assert((flags & VKI_O_ACCMODE) != VKI_O_ACCMODE);
-
-   for(;;) {
-      fd = (*openp)(pathname, flags | VKI_O_NONBLOCK, mode);
-
-      /* return immediately if caller wanted nonblocking anyway */
-      if (flags & VKI_O_NONBLOCK)
-	 return fd;
-
-      saved_errno = *(__errno_location());
-
-      if (fd != -1)
-	 break;			/* open worked */
-
-      /* If we got ENXIO and we're opening WRONLY, and it turns out
-	 to really be a FIFO, then poll waiting for open to succeed */
-      if (*(__errno_location()) == ENXIO &&
-	  (flags & VKI_O_ACCMODE) == VKI_O_WRONLY &&
-	  (stat(pathname, &st) == 0 && S_ISFIFO(st.st_mode))) {
-
-	 /* OK, we're opening a FIFO for writing; sleep and spin */
-	 nanosleep_interval.tv_sec  = 0;
-	 nanosleep_interval.tv_nsec = 13 * 1000 * 1000; /* 13 milliseconds */
-	 /* It's critical here that valgrind's nanosleep implementation
-	    is nonblocking. */
-	 (void)my_do_syscall2(__NR_nanosleep, 
-			      (int)(&nanosleep_interval), (int)NULL);
-      } else {
-	 /* it was just an error */
-	 *(__errno_location()) = saved_errno;
-	 return -1;
-      }
-   }
-
-   /* OK, we've got a nonblocking FD for a caller who wants blocking;
-      reset the flags to what they asked for */
-   fcntl(fd, VKI_F_SETFL, flags);
-
-   /* Return now if one of:
-      - we were opening O_RDWR (never blocks)
-      - we opened with O_WRONLY (polling already done)
-      - the thing we opened wasn't a FIFO after all (or fstat failed)
-   */
-   if ((flags & VKI_O_ACCMODE) != VKI_O_RDONLY ||
-       (fstat(fd, &st) == -1 || !S_ISFIFO(st.st_mode))) {
-      *(__errno_location()) = saved_errno;
-      return fd;
-   }
-
-   /* OK, drop into the poll loop looking for something to read on the fd */
-   my_assert((flags & VKI_O_ACCMODE) == VKI_O_RDONLY);
-   for(;;) {
-      struct pollfd pollfd;
-      int res;
-
-      pollfd.fd = fd;
-      pollfd.events = POLLIN;
-      pollfd.revents = 0;
-
-      res = my_do_syscall3(__NR_poll, (int)&pollfd, 1, 0);
-      
-      my_assert(res == 0 || res == 1);
-
-      if (res == 1) {
-	 /* OK, got it.
-
-	    XXX This is wrong: we're waiting for either something to
-	    read or a HUP on the file descriptor, but the semantics of
-	    fifo open are that we should unblock as soon as someone
-	    simply opens the other end, not that they write something.
-	    With luck this won't matter in practice.
-	 */
-	 my_assert(pollfd.revents & (POLLIN|POLLHUP));
-	 break;
-      }
-
-      /* Still nobody home; sleep and spin */
-      nanosleep_interval.tv_sec  = 0;
-      nanosleep_interval.tv_nsec = 13 * 1000 * 1000; /* 13 milliseconds */
-      /* It's critical here that valgrind's nanosleep implementation
-	 is nonblocking. */
-      (void)my_do_syscall2(__NR_nanosleep, 
-			   (int)(&nanosleep_interval), (int)NULL);
-   }
-
-   *(__errno_location()) = saved_errno;
-   return fd;
-}
-
 extern
 int __libc_open64(const char *pathname, int flags, mode_t mode);
 /* WEAK */
 int open64(const char *pathname, int flags, mode_t mode)
 {
-   return _open(pathname, flags, mode, __libc_open64);
+   return __libc_open64(pathname, flags, mode);
 }
 
 extern
@@ -2085,7 +1927,7 @@
 /* WEAK */
 int open(const char *pathname, int flags, mode_t mode)
 {
-   return _open(pathname, flags, mode, __libc_open);
+   return __libc_open(pathname, flags, mode);
 }
 
 extern
@@ -2098,30 +1940,6 @@
 }
 
 
-WEAK
-int accept(int s, struct sockaddr *addr, socklen_t *addrlen)
-{
-   return VGR_(accept)(s, addr, addrlen);
-}
-
-WEAK
-int recv(int s, void *buf, size_t len, int flags)
-{
-   return VGR_(recv)(s, buf, len, flags);
-}
-
-WEAK
-int readv(int fd, const struct iovec *iov, int count)
-{
-  return VGR_(readv)(fd, iov, count);
-}
-
-WEAK
-int writev(int fd, const struct iovec *iov, int count)
-{
-  return VGR_(writev)(fd, iov, count);
-}
-
 extern
 pid_t __libc_waitpid(pid_t pid, int *status, int options);
 WEAK
@@ -2135,12 +1953,21 @@
 extern
 int __libc_nanosleep(const struct timespec *req, struct timespec *rem);
 WEAK
-int nanosleep(const struct timespec *req, struct timespec *rem)
+int __nanosleep(const struct timespec *req, struct timespec *rem)
 {
    __my_pthread_testcancel();
    return __libc_nanosleep(req, rem);
 }
 
+extern
+int __libc_pause(void);
+WEAK
+int __pause(void)
+{
+   __my_pthread_testcancel();
+   return __libc_pause();
+}
+
 
 extern
 int __libc_fsync(int fd);
@@ -2213,25 +2040,15 @@
    return __libc_pread(fd, buf, count, offset);
 }
 
-
-extern  
-void __libc_longjmp(jmp_buf env, int val) __attribute((noreturn));
-/* not weak: WEAK */
-void longjmp(jmp_buf env, int val)
+extern
+int __libc_recv(int s, void *msg, size_t len, int flags);
+WEAK
+int recv(int s, void *msg, size_t len, int flags)
 {
-   __libc_longjmp(env, val);
+   __my_pthread_testcancel();
+   return __libc_recv(s, msg, len, flags);
 }
 
-
-extern void __libc_siglongjmp (sigjmp_buf env, int val)
-                               __attribute__ ((noreturn));
-void siglongjmp(sigjmp_buf env, int val)
-{
-   kludged("siglongjmp (cleanup handlers are ignored)");
-   __libc_siglongjmp(env, val);
-}
-
-
 extern
 int __libc_send(int s, const void *msg, size_t len, int flags);
 WEAK
@@ -2270,8 +2087,6 @@
              struct sockaddr *from, socklen_t *fromlen)
 {
    __my_pthread_testcancel();
-   VGR_(wait_for_fd_to_be_readable_or_erring)(s);
-   __my_pthread_testcancel();
    return __libc_recvfrom(s, buf, len, flags, from, fromlen);
 }
 
@@ -2317,6 +2132,40 @@
    return __libc_msync(start, length, flags);
 }
 
+strong_alias(close, __close)
+strong_alias(fcntl, __fcntl)
+strong_alias(lseek, __lseek)
+strong_alias(open, __open)
+strong_alias(open64, __open64)
+strong_alias(read, __read)
+strong_alias(wait, __wait)
+strong_alias(write, __write)
+strong_alias(connect, __connect)
+strong_alias(send, __send)
+
+weak_alias (__pread64, pread64)
+weak_alias (__pwrite64, pwrite64)
+weak_alias(__nanosleep, nanosleep)
+weak_alias(__pause, pause)
+
+
+extern  
+void __libc_longjmp(jmp_buf env, int val) __attribute((noreturn));
+/* not weak: WEAK */
+void longjmp(jmp_buf env, int val)
+{
+   __libc_longjmp(env, val);
+}
+
+
+extern void __libc_siglongjmp (sigjmp_buf env, int val)
+                               __attribute__ ((noreturn));
+void siglongjmp(sigjmp_buf env, int val)
+{
+   kludged("siglongjmp (cleanup handlers are ignored)");
+   __libc_siglongjmp(env, val);
+}
+
 
 /*--- fork and its helper ---*/
 
@@ -2397,71 +2246,6 @@
 }
 
 
-static
-int my_do_syscall1 ( int syscallno, int arg1 )
-{ 
-   int __res;
-   __asm__ volatile ("pushl %%ebx; movl %%edx,%%ebx ; int $0x80 ; popl %%ebx"
-                     : "=a" (__res)
-                     : "0" (syscallno),
-                       "d" (arg1) );
-   return __res;
-}
-
-static
-int my_do_syscall2 ( int syscallno, 
-                     int arg1, int arg2 )
-{ 
-   int __res;
-   __asm__ volatile ("pushl %%ebx; movl %%edx,%%ebx ; int $0x80 ; popl %%ebx"
-                     : "=a" (__res)
-                     : "0" (syscallno),
-                       "d" (arg1),
-                       "c" (arg2) );
-   return __res;
-}
-
-static
-int my_do_syscall3 ( int syscallno, 
-                     int arg1, int arg2, int arg3 )
-{ 
-   int __res;
-   __asm__ volatile ("pushl %%ebx; movl %%esi,%%ebx ; int $0x80 ; popl %%ebx"
-                     : "=a" (__res)
-                     : "0" (syscallno),
-                       "S" (arg1),
-                       "c" (arg2),
-                       "d" (arg3) );
-   return __res;
-}
-
-static inline
-int my_do_syscall5 ( int syscallno, 
-                     int arg1, int arg2, int arg3, int arg4, int arg5 )
-{ 
-   int __res;
-   __asm__ volatile ("int $0x80"
-                     : "=a" (__res)
-                     : "0" (syscallno),
-                       "b" (arg1),
-                       "c" (arg2),
-                       "d" (arg3),
-                       "S" (arg4),
-                       "D" (arg5));
-   return __res;
-}
-
-
-WEAK
-int select ( int n, 
-             fd_set *rfds, 
-             fd_set *wfds, 
-             fd_set *xfds, 
-             struct timeval *timeout )
-{
-   return VGR_(select)(n, rfds, wfds, xfds, timeout);
-}
-
 
 /* ---------------------------------------------------------------------
    Hacky implementation of semaphores.
@@ -3013,64 +2797,30 @@
 }
 
 
-/* ---------------------------------------------------------------------
-   Make SYSV IPC not block everything -- pass to vg_intercept.c.
-   ------------------------------------------------------------------ */
-
-WEAK
-int msgsnd(int msgid, const void *msgp, size_t msgsz, int msgflg)
-{
-   return VGR_(msgsnd)(msgid, msgp, msgsz, msgflg);
-}
-
-WEAK
-int msgrcv(int msqid, void* msgp, size_t msgsz, 
-           long msgtyp, int msgflg )
-{
-   return VGR_(msgrcv)(msqid, msgp, msgsz, msgtyp, msgflg );
-}
-
 
 /* ---------------------------------------------------------------------
-   The glibc sources say that returning -1 in these 3 functions
-   causes real time signals not to be used.
+   Manage the allocation and use of RT signals.  The Valgrind core
+   uses one.  glibc needs us to implement this to make RT signals
+   work; things just seem to crash if we don't.
    ------------------------------------------------------------------ */
-
 int __libc_current_sigrtmin (void)
 {
-   static int moans = N_MOANS;
-   if (moans-- > 0) 
-      kludged("__libc_current_sigrtmin");
-   return -1;
+   return VG_(sig_rtmin);
 }
 
 int __libc_current_sigrtmax (void)
 {
-   static int moans = N_MOANS;
-   if (moans-- > 0) 
-      kludged("__libc_current_sigrtmax");
-   return -1;
+   return VG_(sig_rtmax);
 }
 
 int __libc_allocate_rtsig (int high)
 {
-   static int moans = N_MOANS;
-   if (moans-- > 0) 
-      kludged("__libc_allocate_rtsig");
-   return -1;
+   return VG_(sig_alloc_rtsig)(high);
 }
 
-
 /* ---------------------------------------------------------------------
    B'stard.
    ------------------------------------------------------------------ */
-
-# define strong_alias(name, aliasname) \
-  extern __typeof (name) aliasname __attribute__ ((alias (#name)));
-
-# define weak_alias(name, aliasname) \
-  extern __typeof (name) aliasname __attribute__ ((weak, alias (#name)));
-
 strong_alias(__pthread_mutex_lock, pthread_mutex_lock)
 strong_alias(__pthread_mutex_trylock, pthread_mutex_trylock)
 strong_alias(__pthread_mutex_unlock, pthread_mutex_unlock)
@@ -3090,22 +2840,8 @@
 strong_alias(sigaction, __sigaction)
 #endif
      
-strong_alias(close, __close)
-strong_alias(fcntl, __fcntl)
-strong_alias(lseek, __lseek)
-strong_alias(open, __open)
-strong_alias(open64, __open64)
-strong_alias(read, __read)
-strong_alias(wait, __wait)
-strong_alias(write, __write)
-strong_alias(connect, __connect)
-strong_alias(send, __send)
-
-weak_alias (__pread64, pread64)
-weak_alias (__pwrite64, pwrite64)
 weak_alias(__fork, fork)
 weak_alias(__vfork, vfork)
-
 weak_alias (__pthread_kill_other_threads_np, pthread_kill_other_threads_np)
 
 /*--------------------------------------------------*/
diff --git a/coregrind/arch/x86-linux/vg_syscall.S b/coregrind/arch/x86-linux/vg_syscall.S
index faa23a5..7b0fd00 100644
--- a/coregrind/arch/x86-linux/vg_syscall.S
+++ b/coregrind/arch/x86-linux/vg_syscall.S
@@ -30,138 +30,89 @@
 */
 
 #include "vg_constants.h"
-
+#include "vg_unistd.h"
 
 .globl	VG_(do_syscall)
 
-# NOTE that this routine expects the simulated machines state
-# to be in m_state_static.  Therefore it needs to be wrapped by
-# code which copies from baseBlock before the call, into
-# m_state_static, and back afterwards.
+/*
+	Perform a Linux syscall with int 0x80
 	
+	Syscall args are passed on the stack
+	Int VG_(do_syscall)(Int syscall_no, ...)
+
+	This has no effect on the virtual machine; the expectation is
+	that the syscall mechanism makes no useful changes to any
+	register except %eax, which is returned.
+ */
 VG_(do_syscall):
-	# Save all the int registers of the real machines state on the
-	# simulators stack.
-	pushal
-
-	# and save the real FPU state too
-	fwait
-
-	pushfl
-	cmpb	$0, VG_(have_ssestate)
-	jz	qq1nosse
-	fxsave	VG_(real_sse_state_saved_over_syscall)
-	andl	$0x0000FFBF, VG_(real_sse_state_saved_over_syscall)+24
-	fxrstor	VG_(real_sse_state_saved_over_syscall)
-	jmp	qq1merge
-qq1nosse:		
-	fnsave	VG_(real_sse_state_saved_over_syscall)
-	frstor	VG_(real_sse_state_saved_over_syscall)
-qq1merge:
-	popfl
-	
-	# remember what the simulators stack pointer is
-	movl	%esp, VG_(esp_saved_over_syscall)
-	
-	# Now copy the simulated machines state into the real one
-	# esp still refers to the simulators stack
-	pushfl
-	cmpb	$0, VG_(have_ssestate)
-	jz	qq2nosse
-	andl	$0x0000FFBF, VG_(m_state_static)+64+24
-	fxrstor	VG_(m_state_static)+64
-	jmp	qq2merge
-qq2nosse:	
-	frstor	VG_(m_state_static)+64
-qq2merge:
-	popfl
-	
-	movl	VG_(m_state_static)+56, %eax
-	pushl	%eax
-	popfl
-#if 0
-	/* don't bother to save/restore seg regs across the kernel iface.  
-	   Once we have our hands on them, our simulation of it is 
-	   completely internal, and the kernel sees nothing.  
-	   What's more, loading new values in to %cs seems 
-	   to be impossible anyway. */
-	movw	VG_(m_state_static)+0, %cs
-	movw	VG_(m_state_static)+4, %ss
-	movw	VG_(m_state_static)+8, %ds
-	movw	VG_(m_state_static)+12, %es
-	movw	VG_(m_state_static)+16, %fs
-	movw	VG_(m_state_static)+20, %gs
-#endif
-	movl	VG_(m_state_static)+24, %eax
-	movl	VG_(m_state_static)+28, %ecx
-	movl	VG_(m_state_static)+32, %edx
-	movl	VG_(m_state_static)+36, %ebx
-	movl	VG_(m_state_static)+40, %esp
-	movl	VG_(m_state_static)+44, %ebp
-	movl	VG_(m_state_static)+48, %esi
-	movl	VG_(m_state_static)+52, %edi
-
-	# esp now refers to the simulatees stack
-	# Do the actual system call
+	push	%esi
+	push	%edi
+	push	%ebx
+	push	%ebp
+	movl	16+ 4(%esp),%eax
+	movl	16+ 8(%esp),%ebx
+	movl	16+12(%esp),%ecx
+	movl	16+16(%esp),%edx
+	movl	16+20(%esp),%esi
+	movl	16+24(%esp),%edi
+	movl	16+28(%esp),%ebp
 	int	$0x80
-
-	# restore stack as soon as possible
-	# esp refers to simulatees stack
-	movl	%esp, VG_(m_state_static)+40
-	movl	VG_(esp_saved_over_syscall), %esp
-	# esp refers to simulators stack
-
-	# ... and undo everything else.  
-	# Copy real state back to simulated state.
-#if 0
-	movw	%cs, VG_(m_state_static)+0
-	movw	%ss, VG_(m_state_static)+4
-	movw	%ds, VG_(m_state_static)+8
-	movw	%es, VG_(m_state_static)+12
-	movw	%fs, VG_(m_state_static)+16
-	movw	%gs, VG_(m_state_static)+20
-#endif
-	movl	%eax, VG_(m_state_static)+24
-	movl	%ecx, VG_(m_state_static)+28
-	movl	%edx, VG_(m_state_static)+32
-	movl	%ebx, VG_(m_state_static)+36
-	movl	%ebp, VG_(m_state_static)+44
-	movl	%esi, VG_(m_state_static)+48
-	movl	%edi, VG_(m_state_static)+52
-	pushfl
-	popl	%eax
-	movl	%eax, VG_(m_state_static)+56
-	fwait
-
-	pushfl
-	cmpb	$0, VG_(have_ssestate)
-	jz	pp2nosse
-	fxsave	VG_(m_state_static)+64
-	andl	$0x0000FFBF, VG_(m_state_static)+64+24
-	fxrstor	VG_(m_state_static)+64
-	jmp	pp2merge
-pp2nosse:
-	fnsave	VG_(m_state_static)+64
-	frstor	VG_(m_state_static)+64
-pp2merge:
-	popfl
-	
-	# Restore the state of the simulator
-	pushfl
-	cmpb	$0, VG_(have_ssestate)
-	jz	pp1nosse
-	andl	$0x0000FFBF, VG_(real_sse_state_saved_over_syscall)+24
-	fxrstor	VG_(real_sse_state_saved_over_syscall)
-	jmp	pp1merge
-pp1nosse:
-	frstor	VG_(real_sse_state_saved_over_syscall)
-pp1merge:
-	popfl
-
-	popal
-
+	popl	%ebp
+	popl	%ebx
+	popl	%edi
+	popl	%esi
 	ret
 
+/*
+	Perform a clone system call.  clone is strange because it has
+	fork()-like return-twice semantics, so it needs special
+	handling here.
+
+	int VG_(clone)(int (*fn)(void *), void *child_stack, int flags, void *arg, 
+	               0                  4                  8          12
+		       pid_t *child_tid, pid_t *parent_tid)
+		       16                20
+
+ */
+.globl VG_(clone)
+VG_(clone):
+#define FSZ	(4+4+4)			/* frame size = retaddr+ebx+edi */
+	push	%ebx
+	push	%edi
+	/* set up child stack with function and arg */
+	movl	 4+FSZ(%esp), %ecx	/* child stack */
+	movl	12+FSZ(%esp), %ebx	/* fn arg */
+	movl	 0+FSZ(%esp), %eax	/* fn */
+	lea	-8(%ecx), %ecx		/* make space on stack */
+	movl	%ebx, 4(%ecx)		/*   fn arg */
+	movl	%eax, 0(%ecx)		/*   fn */
+
+	/* get other args to clone */
+	movl	 8+FSZ(%esp), %ebx	/* flags */
+	movl	20+FSZ(%esp), %edx	/* parent tid * */
+	movl	16+FSZ(%esp), %edi	/* child tid * */
+	movl	$__NR_clone, %eax
+	int	$0x80
+	testl	%eax, %eax
+	jnz	1f
+
+	/* CHILD - call thread function */
+	popl	%eax
+	call	*%eax
+
+	/* exit with result */
+	movl	%eax, %ebx
+	movl	$__NR_exit, %eax
+	int	$0x80
+
+	/* Hm, exit returned */
+	ud2
+		
+1:	/* PARENT or ERROR */
+	pop	%edi
+	pop	%ebx
+	ret
+	
 ##--------------------------------------------------------------------##
 ##--- end                                             vg_syscall.S ---##
 ##--------------------------------------------------------------------##
diff --git a/coregrind/valgrind.vs b/coregrind/valgrind.vs
new file mode 100644
index 0000000..d401357
--- /dev/null
+++ b/coregrind/valgrind.vs
@@ -0,0 +1,8 @@
+VALGRIND_2.0 {
+	global:
+		vgPlain_*;
+		vgSkin_*;
+		vgProf_*;
+	local:
+		*;		# default to hidden
+};
diff --git a/coregrind/vg_include.h b/coregrind/vg_include.h
index 06fb5f6..f3cae12 100644
--- a/coregrind/vg_include.h
+++ b/coregrind/vg_include.h
@@ -45,6 +45,9 @@
    options or sizes, recompile, and still have a working system.
    ------------------------------------------------------------------ */
 
+/* For system call numbers __NR_... */
+#include "vg_unistd.h"
+
 #include "vg_constants.h"
 
 /* All stuff visible to core and skins goes in vg_skin.h.  Things
@@ -108,11 +111,14 @@
    give finer interleaving but much increased scheduling overheads. */
 #define VG_SCHEDULING_QUANTUM   50000
 
-/* Number of file descriptors that can simultaneously be waited on for
-   I/O to complete.  Perhaps this should be the same as VG_N_THREADS
-   (surely a thread can't wait on more than one fd at once?.  Who
-   knows.) */
-#define VG_N_WAITING_FDS 20
+/* Maximum FD Valgrind can use for its internal file descriptors. */
+#define VG_MAX_SAFE_FD	1024	/* usual ulimit */
+
+/* Maximum allowed application-visible file descriptor.  Valgrind's
+   internal fds hide above this (starting at VG_MAX_FD+1).  This is
+   derived from the default fd limit (1024) minus the 2 fds per thread
+   and a small number of extra fds. */
+#define VG_MAX_FD	(VG_MAX_SAFE_FD - (VG_N_THREADS*2 + 4))
 
 /* Stack size for a thread.  We try and check that they do not go
    beyond it. */
@@ -137,7 +143,6 @@
 #define VG_STACK_SIZE_W       10000
 #define VG_SIGSTACK_SIZE_W    10000
 
-
 /* ---------------------------------------------------------------------
    Basic types
    ------------------------------------------------------------------ */
@@ -166,6 +171,11 @@
       VgLogTo_Socket
    } VgLogTo;
 
+/* pid of main process */
+extern Int VG_(main_pid);
+
+/* pgrp of process (global to all threads) */
+extern Int VG_(main_pgrp);
 
 /* Should we stop collecting errors if too many appear?  default: YES */
 extern Bool  VG_(clo_error_limit);
@@ -236,6 +246,14 @@
 extern Int   VG_(clo_backtrace_size);
 /* Engage miscellaneous weird hacks needed for some progs. */
 extern Char* VG_(clo_weird_hacks);
+/* How often we should poll for signals, assuming we need to poll for
+   signals. */
+extern Int   VG_(clo_signal_polltime);
+
+/* Low latency syscalls and signals */
+extern Bool  VG_(clo_lowlat_syscalls);
+extern Bool  VG_(clo_lowlat_signals);
+
 /* Should we run __libc_freeres at exit?  Sometimes causes crashes.
    Default: YES.  Note this is subservient to VG_(needs).libc_freeres;
    if the latter says False, then the setting of VG_(clo_weird_hacks)
@@ -510,7 +528,7 @@
 #define VG_USERREQ__PTHREAD_GETSPECIFIC_PTR 0x3016
 #define VG_USERREQ__READ_MILLISECOND_TIMER  0x3017
 #define VG_USERREQ__PTHREAD_SIGMASK         0x3018
-#define VG_USERREQ__SIGWAIT                 0x3019
+#define VG_USERREQ__SIGWAIT                 0x3019 /* unused */
 #define VG_USERREQ__PTHREAD_KILL            0x301A
 #define VG_USERREQ__PTHREAD_YIELD           0x301B
 #define VG_USERREQ__PTHREAD_KEY_VALIDATE    0x301C
@@ -523,7 +541,7 @@
 
 /* Ask how many signal handler returns have happened to this
    thread. */
-#define VG_USERREQ__GET_N_SIGS_RETURNED     0x3024
+#define VG_USERREQ__GET_N_SIGS_RETURNED     0x3024 /* unused */
 
 /* Get/set entries for a thread's pthread_atfork stack. */
 #define VG_USERREQ__SET_FHSTACK_USED        0x3025
@@ -549,40 +567,6 @@
    called at program exit. */
 extern void VG_(__libc_freeres_wrapper)( void );
 
-extern void VGR_(wait_for_fd_to_be_readable_or_erring) ( int fd );
-extern void VGR_(wait_for_fd_to_be_writable_or_erring) ( int fd );
-
-extern int VGR_(recv)(int s, void *buf, unsigned int/*size_t*/ len, 
-                                        int flags);
-
-extern int VGR_(accept)(int s, /*struct sockaddr*/ void *addr, 
-                               /*socklen_t*/ void *addrlen);
-
-
-extern int VGR_(select) ( int n, 
-                          /*fd_set*/ void *rfds, 
-                          /*fd_set*/ void *wfds, 
-                          /*fd_set*/ void *xfds, 
-                          /*struct timeval*/ void *timeout );
-
-extern int VGR_(msgsnd)(int msgid, 
-                        const void *msgp, 
-                        /*size_t*/ unsigned int msgsz, 
-                        int msgflg);
-
-extern int VGR_(msgrcv)( int msqid, 
-                         void* msgp,  
-                         /*size_t*/ unsigned int msgsz, 
-                         long msgtyp, 
-                         int msgflg );
-
-extern int VGR_(readv)(int fd, 
-                       const /*struct iovec*/ void *iovV, 
-                       int count);
-
-extern int VGR_(writev)(int fd, 
-                        const /*struct iovec*/ void *iovV, 
-                        int count);
 
 /* ---------------------------------------------------------------------
    Constants pertaining to the simulated CPU state, VG_(baseBlock),
@@ -667,16 +651,15 @@
    ------------------------------------------------------------------ */
 
 typedef
-   enum { 
+   enum ThreadStatus { 
       VgTs_Empty,      /* this slot is not in use */
       VgTs_Runnable,   /* waiting to be scheduled */
       VgTs_WaitJoiner, /* waiting for someone to do join on me */
       VgTs_WaitJoinee, /* waiting for the thread I did join on */
-      VgTs_WaitFD,     /* waiting for I/O completion on a fd */
       VgTs_WaitMX,     /* waiting on a mutex */
       VgTs_WaitCV,     /* waiting on a condition variable */
-      VgTs_WaitSIG,    /* waiting due to sigwait() */
-      VgTs_Sleeping    /* sleeping for a while */
+      VgTs_WaitSys,    /* waiting for a syscall to complete */
+      VgTs_Sleeping,   /* sleeping for a while */
    }
    ThreadStatus;
 
@@ -697,6 +680,7 @@
    }
    ForkHandlerEntry;
 
+typedef struct ProxyLWP ProxyLWP;
 
 typedef
    struct _ThreadState {
@@ -744,6 +728,15 @@
    void**   joiner_thread_return;
    ThreadId joiner_jee_tid;      
 
+   /* If VgTs_WaitSys, this is the result of the pre-syscall check */
+   void *sys_pre_res;
+
+   /* If VgTs_WaitSys, this is the syscall we're currently running */
+   Int syscallno;
+
+   /* Details about this thread's proxy LWP */
+   ProxyLWP *proxy;
+
    /* Whether or not detached. */
    Bool detached;
 
@@ -771,22 +764,23 @@
 
    /* This thread's blocked-signals mask.  Semantics is that for a
       signal to be delivered to this thread, the signal must not be
-      blocked by either the process-wide signal mask nor by this
-      one.  So, if this thread is prepared to handle any signal that
-      the process as a whole is prepared to handle, this mask should
-      be made empty -- and that it is its default, starting
-      state. */
+      blocked by this signal mask.  If more than one thread accepts a
+      signal, then it will be delivered to one at random.  If all
+      threads block the signal, it will remain pending until either a
+      thread unblocks it or someone uses sigwaitsig/sigtimedwait.
+
+      sig_mask reflects what the client told us its signal mask should
+      be, but isn't necessarily the current signal mask of the proxy
+      LWP: it may have more signals blocked because of signal
+      handling, or it may be different because of sigsuspend.
+   */
    vki_ksigset_t sig_mask;
 
-   /* When not VgTs_WaitSIG, has no meaning.  When VgTs_WaitSIG,
-      is the set of signals for which we are sigwait()ing. */
-   vki_ksigset_t sigs_waited_for;
-
-   /* Counts the number of times a signal handler for this thread
-      has returned.  This makes it easy to implement pause(), by
-      polling this value, of course interspersed with nanosleeps,
-      and waiting till it changes. */
-   UInt n_signals_returned;
+   /* Effective signal mask.  This is the mask which is currently
+      applying; it may be different from sig_mask while a signal
+      handler is running.
+    */
+   vki_ksigset_t eff_sig_mask;
 
    /* Stacks.  When a thread slot is freed, we don't deallocate its
       stack; we just leave it lying around for the next use of the
@@ -877,6 +871,9 @@
    False if no thread is currently running. */
 extern Bool VG_(is_running_thread)(ThreadId tid);
 
+/* Get the ThreadState for a particular thread */
+extern ThreadState *VG_(get_ThreadState)(ThreadId tid);
+
 /* Copy the specified thread's state into VG_(baseBlock) in
    preparation for running it. */
 extern void VG_(load_thread_state)( ThreadId );
@@ -894,6 +891,14 @@
 /* Nuke all threads except tid. */
 extern void VG_(nuke_all_threads_except) ( ThreadId me );
 
+/* Give a hint to the scheduler that it may be a good time to find a
+   new runnable thread.  If prefer_sched != VG_INVALID_THREADID, then
+   try to schedule that thread.
+*/
+extern void VG_(need_resched) ( ThreadId prefer_sched );
+
+/* Add a new timeout event for a thread*/
+extern void VG_(add_timeout) ( ThreadId tid, UInt time );
 
 /* Return codes from the scheduler. */
 typedef
@@ -902,8 +907,10 @@
                             even if we wait for a long time */
       VgSrc_ExitSyscall, /* client called exit().  This is the normal
                             route out. */
-      VgSrc_BbsDone      /* In a debugging run, the specified number of
+      VgSrc_BbsDone,     /* In a debugging run, the specified number of
                             bbs has been completed. */
+      VgSrc_FatalSig	 /* Killed by the default action of a fatal
+			    signal */
    }
    VgSchedReturnCode;
 
@@ -979,13 +986,25 @@
    Exports of vg_signals.c
    ------------------------------------------------------------------ */
 
+extern Bool VG_(do_signal_routing); /* whether scheduler LWP has to route signals */
+
+/* RT signal allocation */
+extern Int  VG_(sig_rtmin);
+extern Int  VG_(sig_rtmax);
+extern Int  VG_(sig_alloc_rtsig) ( Int high );
+
 extern void VG_(sigstartup_actions) ( void );
 extern void VG_(sigshutdown_actions) ( void );
 
-extern Bool VG_(deliver_signals) ( void );
+extern void VG_(deliver_signal) ( ThreadId tid, const vki_ksiginfo_t *, Bool async );
 extern void VG_(unblock_host_signal) ( Int sigNo );
 extern void VG_(handle_SCSS_change) ( Bool force_update );
 
+extern Bool VG_(is_sig_ign) ( Int sigNo );
+
+/* Route pending signals from the scheduler LWP to the appropriate
+   thread LWP. */
+extern void VG_(route_signals) ( void );
 
 /* Fake system calls for signal handling. */
 extern void VG_(do__NR_sigaltstack)   ( ThreadId tid );
@@ -1014,6 +1033,10 @@
 extern void VG_(restore_all_host_signals) 
                   ( /* IN */ vki_ksigset_t* saved_mask );
 
+extern vki_ksiginfo_t VG_(unresumable_siginfo);
+
+extern void VG_(kill_self)(Int sigNo);
+
 /* ---------------------------------------------------------------------
    Exports of vg_mylibc.c
    ------------------------------------------------------------------ */
@@ -1045,9 +1068,13 @@
                         vki_fd_set* writefds, 
                         vki_fd_set* exceptfds, 
                         struct vki_timeval * timeout );
+extern Int VG_(poll)( struct vki_pollfd *, UInt nfds, Int timeout);
 extern Int VG_(nanosleep)( const struct vki_timespec *req, 
                            struct vki_timespec *rem );
 
+/* Move an fd into the Valgrind-safe range */
+Int VG_(safe_fd)(Int oldfd);
+
 extern Int VG_(write_socket)( Int sd, void *msg, Int count );
 
 /* --- Connecting over the network --- */
@@ -1388,6 +1415,10 @@
    real. */
 extern Int VG_(exitcode);
 
+/* If we're doing the default action of a fatal signal */
+extern jmp_buf VG_(fatal_signal_jmpbuf);
+extern Bool    VG_(fatal_signal_set);		/* jmp_buf is valid */
+extern Int     VG_(fatal_sigNo);		/* the fatal signal */
 
 /* --- Counters, for informational purposes only. --- */
 
@@ -1458,29 +1489,53 @@
 extern __attribute__((regparm(1))) 
        void VG_(unknown_esp_update) ( Addr new_ESP );
 
+extern Bool VG_(is_addressable)(Addr p, Int sz);
+
+/* ---------------------------------------------------------------------
+   Exports of vg_proxylwp.c
+   ------------------------------------------------------------------ */
+
+/* Issue a syscall for thread tid */
+extern Int  VG_(sys_issue)(int tid);
+
+extern void VG_(proxy_init)     ( void );
+extern void VG_(proxy_create)   ( ThreadId tid );
+extern void VG_(proxy_delete)   ( ThreadId tid, Bool force );
+extern void VG_(proxy_results)  ( void );
+extern void VG_(proxy_sendsig)  ( ThreadId tid, Int signo );
+extern void VG_(proxy_setsigmask)(ThreadId tid);
+extern void VG_(proxy_sigack)   ( ThreadId tid, const vki_ksigset_t *);
+extern void VG_(proxy_abort_syscall) ( ThreadId tid );
+extern void VG_(proxy_waitsig)  ( void );
+
+extern void VG_(proxy_shutdown) ( void );	/* shut down the syscall workers */
+extern Int  VG_(proxy_resfd)    ( void );	/* FD something can select on to know 
+						   a syscall finished */
+
+/* Sanity-check the whole proxy-LWP machinery */
+void VG_(proxy_sanity)(void);
+
+/* Send a signal from a thread's proxy to the thread.  This longjmps
+   back into the proxy's main loop, so it doesn't return. */
+__attribute__ ((__noreturn__))
+extern void VG_(proxy_handlesig)( const vki_ksiginfo_t *siginfo, 
+				  const struct vki_sigcontext *sigcontext );
+
+
 /* ---------------------------------------------------------------------
    Exports of vg_syscalls.c
    ------------------------------------------------------------------ */
 
 extern void VG_(init_dataseg_end_for_brk) ( void );
 
-extern void VG_(perform_assumed_nonblocking_syscall) ( ThreadId tid );
-
-extern void* VG_(pre_known_blocking_syscall) ( ThreadId tid, Int syscallno );
-extern void  VG_(post_known_blocking_syscall)( ThreadId tid, Int syscallno,
-                                               void* pre_res, Int res );
+extern Bool VG_(pre_syscall) ( ThreadId tid );
+extern void VG_(post_syscall)( ThreadId tid );
 
 extern Bool VG_(is_kerror) ( Int res );
 
-#define KERNEL_DO_SYSCALL(thread_id, result_lvalue)      \
-         VG_(load_thread_state)(thread_id);              \
-         VG_(copy_baseBlock_to_m_state_static)();        \
-         VG_(do_syscall)();                              \
-         VG_(copy_m_state_static_to_baseBlock)();        \
-         VG_(save_thread_state)(thread_id);              \
-         result_lvalue = VG_(threads)[thread_id].m_eax;  \
-         VG_TRACK( post_reg_write_syscall_return, thread_id, R_EAX );
-
+/* Internal atfork handlers */
+typedef void (*vg_atfork_t)(ThreadId);
+extern void VG_(atfork)(vg_atfork_t pre, vg_atfork_t parent, vg_atfork_t child);
 
 /* ---------------------------------------------------------------------
    Exports of vg_transtab.c
@@ -1508,8 +1563,9 @@
    Exports of vg_syscall.S
    ------------------------------------------------------------------ */
 
-extern void VG_(do_syscall) ( void );
-
+extern Int VG_(do_syscall) ( UInt, ... );
+extern Int VG_(clone) ( Int (*fn)(void *), void *stack, Int flags, void *arg, 
+			Int *child_tid, Int *parent_tid);
 
 /* ---------------------------------------------------------------------
    Exports of vg_startup.S
diff --git a/coregrind/vg_intercept.c b/coregrind/vg_intercept.c
index 6497f22..1796385 100644
--- a/coregrind/vg_intercept.c
+++ b/coregrind/vg_intercept.c
@@ -71,97 +71,6 @@
 
 /* --------------------------------------------------------------- */
 
-# define strong_alias(name, aliasname) \
-  extern __typeof (name) aliasname __attribute__ ((alias (#name)));
-
-#define WEAK	__attribute__((weak))
-
-/* --------------------------------------------------------------- */
-
-static __inline__
-int my_do_syscall1 ( int syscallno, int arg1 )
-{ 
-   int __res;
-   __asm__ volatile ("pushl %%ebx; movl %%edx,%%ebx ; int $0x80 ; popl %%ebx"
-                     : "=a" (__res)
-                     : "0" (syscallno),
-                       "d" (arg1) );
-   return __res;
-}
-
-static __inline__
-int my_do_syscall2 ( int syscallno, 
-                     int arg1, int arg2 )
-{ 
-   int __res;
-   __asm__ volatile ("pushl %%ebx; movl %%edx,%%ebx ; int $0x80 ; popl %%ebx"
-                     : "=a" (__res)
-                     : "0" (syscallno),
-                       "d" (arg1),
-                       "c" (arg2) );
-   return __res;
-}
-
-static __inline__
-int my_do_syscall3 ( int syscallno, 
-                     int arg1, int arg2, int arg3 )
-{ 
-   int __res;
-   __asm__ volatile ("pushl %%ebx; movl %%esi,%%ebx ; int $0x80 ; popl %%ebx"
-                     : "=a" (__res)
-                     : "0" (syscallno),
-                       "S" (arg1),
-                       "c" (arg2),
-                       "d" (arg3) );
-   return __res;
-}
-
-static __inline__
-int my_do_syscall5 ( int syscallno, 
-                     int arg1, int arg2, int arg3, int arg4, int arg5 )
-{ 
-   int __res;
-   __asm__ volatile ("int $0x80"
-                     : "=a" (__res)
-                     : "0" (syscallno),
-                       "b" (arg1),
-                       "c" (arg2),
-                       "d" (arg3),
-                       "S" (arg4),
-                       "D" (arg5));
-   return __res;
-}
-
-static __inline__
-int do_syscall_select( int n, 
-                       vki_fd_set* readfds, 
-                       vki_fd_set* writefds, 
-                       vki_fd_set* exceptfds, 
-                       struct vki_timeval * timeout )
-{
-   int res;
-   int args[5];
-   args[0] = n;
-   args[1] = (int)readfds;
-   args[2] = (int)writefds;
-   args[3] = (int)exceptfds;
-   args[4] = (int)timeout;
-   res = my_do_syscall1(__NR_select, (int)(&(args[0])) );
-   return res;
-}
-
-
-static __inline__
-int do_syscall_ipc( unsigned call, 
-                    int first, int second, int third, 
-                    void *ptr)
-{
-   return my_do_syscall5(__NR_ipc, call, first, second, third, (int)ptr);
-}
-
-
-/* --------------------------------------------------------------- */
-
 /* Just start up Valgrind if it's not already going.  VG_(startup)()
    detects and ignores second and subsequent calls. */
 
@@ -211,8 +120,8 @@
 static
 void my_exit ( int arg )
 {
-   my_do_syscall1(__NR_exit, arg);
-   /*NOTREACHED*/
+   VG_(do_syscall)(__NR_exit_group, arg);
+   VG_(do_syscall)(__NR_exit, arg);
 }
 
 static
@@ -253,572 +162,6 @@
    my_assert(res == 0);
 }
 
-/* ================================ poll ================================ */
-
-/* This is the master implementation of poll().  It blocks only the
-   calling thread.  All roads lead here.
-*/
-
-#ifndef HAVE_NFDS_T
-typedef unsigned long int nfds_t;
-#endif
-
-
-int VGR_(poll) (struct pollfd *__fds, nfds_t __nfds, int __timeout)
-{
-   unsigned int        ms_now, ms_end, i;
-   int                 res;
-   struct vki_timespec nanosleep_interval;
-
-   __my_pthread_testcancel();
-   ensure_valgrind("poll");
-
-   /* Detect the current time and simultaneously find out if we are
-      running on Valgrind. */
-   VALGRIND_MAGIC_SEQUENCE(ms_now, 0xFFFFFFFF /* default */,
-                           VG_USERREQ__READ_MILLISECOND_TIMER,
-                           0, 0, 0, 0);
-
-
-   /* CHECK SIZES FOR struct pollfd */
-   my_assert(sizeof(struct timeval) == sizeof(struct vki_timeval));
-
-   /* dummy initialisation to keep gcc -Wall happy */
-   ms_end = 0;
-
-   /* If a zero timeout specified, this call is harmless.  Also do
-      this if not running on Valgrind. */
-   if (__timeout == 0 || ms_now == 0xFFFFFFFF) {
-      res = my_do_syscall3(__NR_poll, (int)__fds, __nfds, __timeout);
-      if (is_kerror(res)) {
-         * (__errno_location()) = -res;
-         return -1;
-      } else {
-         return res;
-      }
-   }
-
-   /* If a timeout was specified, set ms_end to be the end wallclock
-      time.  Easy considering that __timeout is in milliseconds. */
-   if (__timeout > 0) {
-      ms_end = ms_now + (unsigned int)__timeout;
-   }
-
-   /* fprintf(stderr, "MY_POLL: before loop\n"); */
-
-   /* Either timeout < 0, meaning wait indefinitely, or timeout > 0,
-      in which case t_end holds the end time. */
-
-   my_assert(__timeout != 0);
-
-   while (1) {
-
-      /* Do a return-immediately poll. */
-
-      res = my_do_syscall3(__NR_poll, (int)__fds, __nfds, 0 );
-      if (is_kerror(res)) {
-         /* Some kind of error.  Set errno and return.  */
-         * (__errno_location()) = -res;
-         return -1;
-      }
-      if (res > 0) {
-         /* One or more fds is ready.  Return now. */
-         return res;
-      }
-
-      /* Nothing interesting happened, so we go to sleep for a
-         while. */
-
-      /* fprintf(stderr, "MY_POLL: nanosleep\n"); */
-      /* nanosleep and go round again */
-      nanosleep_interval.tv_sec  = 0;
-      nanosleep_interval.tv_nsec = 13 * 1000 * 1000; /* 13 milliseconds */
-      /* It's critical here that valgrind's nanosleep implementation
-         is nonblocking. */
-      res = my_do_syscall2(__NR_nanosleep, 
-                           (int)(&nanosleep_interval), (int)NULL);
-      if (res == -VKI_EINTR) {
-         /* The nanosleep was interrupted by a signal.  So we do the
-            same. */
-         * (__errno_location()) = EINTR;
-         return -1;
-      }
-
-      /* Sleeping finished.  If a finite timeout, check to see if it
-         has expired yet. */
-      if (__timeout > 0) {
-         VALGRIND_MAGIC_SEQUENCE(ms_now, 0xFFFFFFFF /* default */,
-                                 VG_USERREQ__READ_MILLISECOND_TIMER,
-                                 0, 0, 0, 0);
-         my_assert(ms_now != 0xFFFFFFFF);
-         if (ms_now >= ms_end) {
-            /* timeout; nothing interesting happened. */
-            for (i = 0; i < __nfds; i++) 
-               __fds[i].revents = 0;
-            return 0;
-         }
-      }
-   }
-}
-
-int poll(struct pollfd *__fds, nfds_t __nfds, int __timeout)
-{
-   return VGR_(poll)(__fds, __nfds, __timeout);
-}
-
-strong_alias(poll, __poll)
-
-/* ================================ msgsnd ================================ */
-
-/* Turn a blocking msgsnd() into a polling non-blocking one, so that
-   other threads make progress */
-int VGR_(msgsnd)(int msgid, 
-                 const void *msgp, 
-                 /*size_t*/ unsigned int msgsz, 
-                 int msgflg)
-{
-   struct vki_timespec nanosleep_interval;
-   int err;
-
-   ensure_valgrind("msgsnd");
-
-   nanosleep_interval.tv_sec  = 0;
-   nanosleep_interval.tv_nsec = 13 * 1000 * 1000; /* 13 milliseconds */
-
-   if (msgflg & IPC_NOWAIT) {
-      /* If we aren't blocking anyway, just do it */
-      err = do_syscall_ipc(11, msgid, msgsz, msgflg, (void *)msgp);
-   } else {
-      /* Otherwise poll on the queue to let other things run */
-      for(;;) {
-	 err = do_syscall_ipc(11, msgid, msgsz, msgflg | IPC_NOWAIT, 
-                                  (void *)msgp);
-
-	 if (err != -EAGAIN)
-	    break;
-
-	 (void)my_do_syscall2(__NR_nanosleep, 
-			      (int)(&nanosleep_interval), (int)NULL);
-      }  
-   }
-
-   if (is_kerror(err)) {
-      *(__errno_location()) = -err;
-      return -1;
-   }
-   return 0;
-}
-
-#ifdef GLIBC_2_1
-int msgsnd(int msgid,       void *msgp, size_t msgsz, int msgflg)
-#else
-int msgsnd(int msgid, const void *msgp, size_t msgsz, int msgflg)
-#endif
-{
-   return VGR_(msgsnd)(msgid, msgp, msgsz, msgflg);
-}
-
-/* ================================ msgrcv ================================ */
-
-/* Turn a blocking msgrcv() into a polling non-blocking one, so that
-   other threads make progress */
-int VGR_(msgrcv)( int msqid, 
-                  void* msgp,  
-                  /*size_t*/ unsigned int msgsz, 
-                  long msgtyp, 
-                  int msgflg )
-{
-   struct vki_timespec nanosleep_interval;
-   int err;
-   struct ipc_kludge tmp;
-
-   ensure_valgrind("msgrcv");
-
-   nanosleep_interval.tv_sec  = 0;
-   nanosleep_interval.tv_nsec = 13 * 1000 * 1000; /* 13 milliseconds */
-
-   tmp.msgp = msgp;
-   tmp.msgtyp = msgtyp;
-
-   if (msgflg & IPC_NOWAIT) {
-      /* If we aren't blocking anyway, just do it */
-      err = do_syscall_ipc(12, msqid, msgsz, msgflg, &tmp );
-   } else {
-      /* Otherwise poll on the queue to let other things run */
-      for(;;) {
-	 err = do_syscall_ipc(12, msqid, msgsz, msgflg | IPC_NOWAIT, &tmp );
-
-	 if (err != -ENOMSG)
-	    break;
-
-	 (void)my_do_syscall2(__NR_nanosleep, 
-			      (int)(&nanosleep_interval), (int)NULL);
-      }  
-   }
-   
-   if (is_kerror(err)) {
-      *(__errno_location()) = -err;
-      return -1;
-   }
-
-   return err;
-}
-
-int msgrcv( int msqid, void  *msgp,  size_t msgsz, long msgtyp, int msgflg )
-{
-   return VGR_(msgrcv)( msqid, msgp,  msgsz, msgtyp, msgflg );
-}
-
-/* ================================ accept ================================ */
-
-extern
-int __libc_accept(int s, struct sockaddr *addr, socklen_t *addrlen);
-
-int VGR_(accept)(int s, /*struct sockaddr*/ void *addr, 
-                        /*socklen_t*/ void *addrlen)
-{
-   __my_pthread_testcancel();
-   VGR_(wait_for_fd_to_be_readable_or_erring)(s);
-   __my_pthread_testcancel();
-   return __libc_accept(s, addr, addrlen);
-}
-
-int accept(int s, struct sockaddr *addr, socklen_t *addrlen)
-{
-   return VGR_(accept)(s, addr, addrlen);
-}
-
-/* ================================ recv ================================ */
-
-extern
-int __libc_recv(int s, void *buf, size_t len, int flags);
-
-int VGR_(recv)(int s, void *buf, size_t len, int flags)
-{
-   __my_pthread_testcancel();
-   VGR_(wait_for_fd_to_be_readable_or_erring)(s);
-   __my_pthread_testcancel();
-   return __libc_recv(s, buf, len, flags);
-}
-
-int recv(int s, void *buf, size_t len, int flags)
-{
-   return VGR_(recv)(s, buf, len, flags);
-}
-
-strong_alias(recv, __recv)
-
-/* ================================ select ================================ */
-
-/* This is a wrapper round select(), which makes it thread-safe,
-   meaning that only this thread will block, rather than the entire
-   process.  This wrapper in turn depends on nanosleep() not to block
-   the entire process, but I think (hope? suspect?) that POSIX
-   pthreads guarantees that to be the case.
-
-   Basic idea is: modify the timeout parameter to select so that it
-   returns immediately.  Poll like this until select returns non-zero,
-   indicating something interesting happened, or until our time is up.
-   Space out the polls with nanosleeps of say 11 milliseconds, which
-   is required to be nonblocking; this allows other threads to run.  
-
-   Assumes:
-   * (checked via my_assert) types fd_set and vki_fd_set are identical.
-   * (checked via my_assert) types timeval and vki_timeval are identical.
-   * (unchecked) libc error numbers (EINTR etc) are the negation of the
-     kernel's error numbers (VKI_EINTR etc).
-*/
-
-int VGR_(select) ( int n, 
-                   /*fd_set*/ void *rfdsV, 
-                   /*fd_set*/ void *wfdsV, 
-                   /*fd_set*/ void *xfdsV, 
-                   /*struct timeval*/ void *timeoutV )
-{
-   unsigned int ms_now, ms_end;
-   int    res;
-   fd_set rfds_copy;
-   fd_set wfds_copy;
-   fd_set xfds_copy;
-   struct vki_timeval  t_now;
-   struct vki_timeval  zero_timeout;
-   struct vki_timespec nanosleep_interval;
-
-   struct timeval* timeout = (struct timeval*)timeoutV;
-   fd_set* rfds = (fd_set*)rfdsV;
-   fd_set* wfds = (fd_set*)wfdsV;
-   fd_set* xfds = (fd_set*)xfdsV;
-
-   __my_pthread_testcancel();
-
-   /* gcc's complains about ms_end being used uninitialised -- classic
-      case it can't understand, where ms_end is both defined and used
-      only if timeout != NULL.  Hence ... */
-   ms_end = 0;
-
-   /* We assume that the kernel and libc data layouts are identical
-      for the following types.  These asserts provide a crude
-      check. */
-   my_assert(sizeof(struct timeval) == sizeof(struct vki_timeval));
-
-   /* Detect the current time and simultaneously find out if we are
-      running on Valgrind. */
-   VALGRIND_MAGIC_SEQUENCE(ms_now, 0xFFFFFFFF /* default */,
-                           VG_USERREQ__READ_MILLISECOND_TIMER,
-                           0, 0, 0, 0);
-
-   /* If a zero timeout specified, this call is harmless.  Also go
-      this route if we're not running on Valgrind, for whatever
-      reason. */
-   if ( (timeout && timeout->tv_sec == 0 && timeout->tv_usec == 0)
-        || (ms_now == 0xFFFFFFFF) ) {
-      res = do_syscall_select( n, (vki_fd_set*)rfds, 
-                                   (vki_fd_set*)wfds, 
-                                   (vki_fd_set*)xfds, 
-                                   (struct vki_timeval*)timeout);
-      if (is_kerror(res)) {
-         * (__errno_location()) = -res;
-         return -1;
-      } else {
-         return res;
-      }
-   }
-
-   /* If a timeout was specified, set ms_end to be the end millisecond
-      counter [wallclock] time. */
-   if (timeout) {
-      res = my_do_syscall2(__NR_gettimeofday, (int)&t_now, (int)NULL);
-      my_assert(res == 0);
-      ms_end = ms_now;
-      ms_end += (timeout->tv_usec / 1000);
-      ms_end += (timeout->tv_sec * 1000);
-      /* Stay sane ... */
-      if (ms_end < ms_now)
-         ms_end = ms_now;
-   }
-
-   /* fprintf(stderr, "MY_SELECT: before loop\n"); */
-
-   /* Either timeout == NULL, meaning wait indefinitely, or timeout !=
-      NULL, in which case ms_end holds the end time. */
-
-   while (1) {
-
-      /* First, do a return-immediately select(). */
-
-      /* These could be trashed each time round the loop, so restore
-         them each time. */
-      if (rfds) rfds_copy = *rfds;
-      if (wfds) wfds_copy = *wfds;
-      if (xfds) xfds_copy = *xfds;
-
-      zero_timeout.tv_sec = zero_timeout.tv_usec = 0;
-
-      res = do_syscall_select( n, 
-                               rfds ? (vki_fd_set*)(&rfds_copy) : NULL,
-                               wfds ? (vki_fd_set*)(&wfds_copy) : NULL,
-                               xfds ? (vki_fd_set*)(&xfds_copy) : NULL,
-                               & zero_timeout );
-      if (is_kerror(res)) {
-         /* Some kind of error (including EINTR).  Set errno and
-            return.  The sets are unspecified in this case. */
-         * (__errno_location()) = -res;
-         return -1;
-      }
-      if (res > 0) {
-         /* one or more fds is ready.  Copy out resulting sets and
-            return. */
-         if (rfds) *rfds = rfds_copy;
-         if (wfds) *wfds = wfds_copy;
-         if (xfds) *xfds = xfds_copy;
-         return res;
-      }
-
-      /* Nothing interesting happened, so we go to sleep for a
-         while. */
-
-      /* fprintf(stderr, "MY_SELECT: nanosleep\n"); */
-      /* nanosleep and go round again */
-      nanosleep_interval.tv_sec  = 0;
-      nanosleep_interval.tv_nsec = 11 * 1000 * 1000; /* 11 milliseconds */
-      /* It's critical here that valgrind's nanosleep implementation
-         is nonblocking. */
-      res = my_do_syscall2(__NR_nanosleep, 
-                           (int)(&nanosleep_interval), (int)NULL);
-      if (res == -VKI_EINTR) {
-         /* The nanosleep was interrupted by a signal.  So we do the
-            same. */
-         * (__errno_location()) = EINTR;
-         return -1;
-      }
-
-      /* Sleeping finished.  If a finite timeout, check to see if it
-         has expired yet. */
-      if (timeout) {
-         VALGRIND_MAGIC_SEQUENCE(ms_now, 0xFFFFFFFF /* default */,
-                                 VG_USERREQ__READ_MILLISECOND_TIMER,
-                                 0, 0, 0, 0);
-         my_assert(ms_now != 0xFFFFFFFF);
-         if (ms_now >= ms_end) {
-            /* timeout; nothing interesting happened. */
-            if (rfds) FD_ZERO(rfds);
-            if (wfds) FD_ZERO(wfds);
-            if (xfds) FD_ZERO(xfds);
-            return 0;
-         }
-      }
-
-   }
-}
-
-int select ( int n, 
-             fd_set *rfds, 
-             fd_set *wfds, 
-             fd_set *xfds, 
-             struct timeval *timeout )
-{
-   return VGR_(select)(n, rfds, wfds, xfds, timeout);
-}
-
-strong_alias(select, __select)
-
-/* ================================ readv ================================ */
-
-int VGR_(readv)(int fd, const /*struct iovec*/ void *iovV, int count)
-{
-   int res;
-   const struct iovec* iov = (const struct iovec*)iovV;
-
-   __my_pthread_testcancel();
-   VGR_(wait_for_fd_to_be_readable_or_erring)(fd);
-   __my_pthread_testcancel();
-   res = my_do_syscall3(__NR_readv, fd, (unsigned)iov, count);
-
-   if (is_kerror(res)) {
-      *(__errno_location()) = -res;
-      return -1;
-   }
-   return res;
-}
-
-int readv (int fd, const struct iovec *iov, int count)
-{
-   return VGR_(readv)(fd, iov, count);
-}
-
-strong_alias(readv, __readv)
-
-/* ================================ writev ================================ */
-
-int VGR_(writev)(int fd, const /*struct iovec*/ void *iovV, int count)
-{
-   int res;
-   struct iovec* iov = (struct iovec*)iovV;
-
-   __my_pthread_testcancel();
-   VGR_(wait_for_fd_to_be_writable_or_erring)(fd);
-   __my_pthread_testcancel();
-   res = my_do_syscall3(__NR_writev, fd, (unsigned)iov, count);
-
-   if (is_kerror(res)) {
-      *(__errno_location()) = -res;
-      return -1;
-   }
-   return res;
-}
-
-int writev (int fd, const struct iovec *iov, int count)
-{
-   return VGR_(writev)(fd, iov, count);
-}
-
-strong_alias(writev, __writev)
-
-/* ================================ sigsuspend ============================ */
-
-/* ---------------------------------------------------------------------
-   Horrible hack to make sigsuspend() sort-of work OK.  Same trick as
-   for pause() in vg_libpthread.so.
-   ------------------------------------------------------------------ */
-
-/* Horrible because
-
-   -- uses VG_(ksigprocmask), VG_(nanosleep) and vg_assert, which are 
-      valgrind-native (not intended for client use).
-
-   -- This is here so single-threaded progs (not linking libpthread.so)
-      can see it.  But pause() should also be here.  ???
-*/
-
-/* Either libc supplies this (weak) or our libpthread.so supplies it
-   (strong) in a threaded setting. 
-*/
-extern int* __errno_location ( void );
-
-
-int sigsuspend ( /* const sigset_t * */ void* mask)
-{
-   unsigned int n_orig, n_now;
-   struct vki_timespec nanosleep_interval;
-
-   VALGRIND_MAGIC_SEQUENCE(n_orig, 0xFFFFFFFF /* default */,
-                           VG_USERREQ__GET_N_SIGS_RETURNED, 
-                           0, 0, 0, 0);
-   vg_assert(n_orig != 0xFFFFFFFF);
-
-   VG_(ksigprocmask)(VKI_SIG_SETMASK, mask, NULL);
-
-   while (1) {
-      VALGRIND_MAGIC_SEQUENCE(n_now, 0xFFFFFFFF /* default */,
-                              VG_USERREQ__GET_N_SIGS_RETURNED, 
-                              0, 0, 0, 0);
-      vg_assert(n_now != 0xFFFFFFFF);
-      vg_assert(n_now >= n_orig);
-      if (n_now != n_orig) break;
-
-      nanosleep_interval.tv_sec  = 0;
-      nanosleep_interval.tv_nsec = 53 * 1000 * 1000; /* 53 milliseconds */
-      /* It's critical here that valgrind's nanosleep implementation
-         is nonblocking. */
-      VG_(nanosleep)( &nanosleep_interval, NULL);
-   }
-
-   /* Maybe this is OK both in single and multithreaded setting. */
-   * (__errno_location()) = -VKI_EINTR; /* == EINTR; */ 
-   return -1;
-}
-
-
-/* ================================ waitpid ============================ */
-
-#undef WNOHANG
-#define WNOHANG         0x00000001
-
-extern pid_t __libc_waitpid(pid_t pid, int *status, int options);
-
-pid_t waitpid(pid_t pid, int *status, int options)
-{
-   pid_t res;
-   struct vki_timespec nanosleep_interval;
-
-   if (options & WNOHANG)
-      return __libc_waitpid(pid,status,options);
-
-   options |= WNOHANG;
-   while (1) {
-      res = __libc_waitpid(pid,status,options);
-      if (res != 0)
-         return res;
-
-      nanosleep_interval.tv_sec  = 0;
-      nanosleep_interval.tv_nsec = 54 * 1000 * 1000; /* 54 milliseconds */
-      /* It's critical here that valgrind's nanosleep implementation
-         is nonblocking. */
-      VG_(nanosleep)( &nanosleep_interval, NULL);
-   }
-}
-
-#undef WNOHANG
 
 /* ---------------------------------------------------------------------
    Hook for running __libc_freeres once the program exits.
@@ -847,63 +190,6 @@
 }
 
 
-/* ---------------------------------------------------------------------
-   Helpers for safely (nonblockingly) detecting when a file descriptor
-   is safe to use.
-   ------------------------------------------------------------------ */
-
-/* Helper function used to make accept() non-blocking.  Idea is to use
-   the above nonblocking poll() to make this thread ONLY wait for the
-   specified fd to become ready, and then return. */
-
-/* Sigh -- a hack.  We're not supposed to include this file directly;
-   should do it via /usr/include/fcntl.h, but that introduces a
-   varargs prototype for fcntl itself, which we can't mimic. */
-#define _FCNTL_H
-#include <bits/fcntl.h>
-
-extern int __libc_fcntl(int fd, int cmd, long arg);
-
-void VGR_(wait_for_fd_to_be_readable_or_erring) ( int fd )
-{
-   struct pollfd pfd;
-   int           res;
-
-   /* fprintf(stderr, "wait_for_fd_to_be_readable_or_erring %d\n", fd); */
-
-   /* First check to see if the fd is nonblocking, and/or invalid.  In
-      either case return immediately. */
-   res = __libc_fcntl(fd, F_GETFL, 0);
-   if (res == -1) return; /* fd is invalid somehow */
-   if (res & O_NONBLOCK) return; /* fd is nonblocking */
-
-   /* Ok, we'd better wait with poll. */
-   pfd.fd = fd;
-   pfd.events = POLLIN | POLLPRI | POLLERR | POLLHUP | POLLNVAL;
-   /* ... but not POLLOUT, you may notice. */
-   pfd.revents = 0;
-   (void)poll(&pfd, 1, -1 /* forever */);
-}
-
-void VGR_(wait_for_fd_to_be_writable_or_erring) ( int fd )
-{
-   struct pollfd pfd;
-   int           res;
-
-   /* fprintf(stderr, "wait_for_fd_to_be_readable_or_erring %d\n", fd); */
-
-   /* First check to see if the fd is nonblocking, and/or invalid.  In
-      either case return immediately. */
-   res = __libc_fcntl(fd, F_GETFL, 0);
-   if (res == -1) return; /* fd is invalid somehow */
-   if (res & O_NONBLOCK) return; /* fd is nonblocking */
-
-   /* Ok, we'd better wait with poll. */
-   pfd.fd = fd;
-   pfd.events = POLLOUT | POLLERR | POLLHUP | POLLNVAL;
-   pfd.revents = 0;
-   (void)poll(&pfd, 1, -1 /* forever */);
-}
 
 /*--------------------------------------------------------------------*/
 /*--- end                                           vg_intercept.c ---*/
diff --git a/coregrind/vg_libpthread.c b/coregrind/vg_libpthread.c
index ee194fc..0d0d2b5 100644
--- a/coregrind/vg_libpthread.c
+++ b/coregrind/vg_libpthread.c
@@ -74,24 +74,19 @@
 #include <stdio.h>
 
 
+# define strong_alias(name, aliasname) \
+  extern __typeof (name) aliasname __attribute__ ((alias (#name)));
+
+# define weak_alias(name, aliasname) \
+  extern __typeof (name) aliasname __attribute__ ((weak, alias (#name)));
+
+
 /* ---------------------------------------------------------------------
    Forwardses.
    ------------------------------------------------------------------ */
 
 #define WEAK	__attribute__((weak))
 
-
-static
-int my_do_syscall1 ( int syscallno, int arg1 );
-
-static
-int my_do_syscall2 ( int syscallno, 
-                     int arg1, int arg2 );
-
-static
-int my_do_syscall3 ( int syscallno, 
-                     int arg1, int arg2, int arg3 );
-
 static
 __inline__
 int is_kerror ( int res )
@@ -141,7 +136,7 @@
 static
 void my_exit ( int arg )
 {
-   my_do_syscall1(__NR_exit, arg);
+   VG_(do_syscall)(__NR_exit, arg);
    /*NOTREACHED*/
 }
 
@@ -149,7 +144,7 @@
 static
 void my_write ( int fd, const void *buf, int count )
 {
-   my_do_syscall3(__NR_write, fd, (int)buf, count );
+   VG_(do_syscall)(__NR_write, fd, (int)buf, count );
 }
 */
 
@@ -1208,7 +1203,7 @@
    return res;
 }
 
-static __inline__
+static
 void __my_pthread_testcancel(void)
 {
    int res;
@@ -1272,10 +1267,6 @@
                return EINVAL;
    }
 
-   /* Crude check */
-   if (newmask == NULL)
-      return EFAULT;
-
    VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
                            VG_USERREQ__PTHREAD_SIGMASK,
                            how, newmask, oldmask, 0);
@@ -1284,17 +1275,20 @@
    return res == 0 ? 0 : EFAULT;
 }
 
-
 int sigwait ( const sigset_t* set, int* sig )
 {
    int res;
-   ensure_valgrind("sigwait");
+   vki_ksiginfo_t si;
+   
+   __my_pthread_testcancel();
+
    /* As with pthread_sigmask we deliberately confuse sigset_t with
       vki_ksigset_t. */
-   VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
-                           VG_USERREQ__SIGWAIT,
-                           set, sig, 0, 0);
-   return res;
+   si.si_signo = 0;
+   res = VG_(ksigtimedwait)((const vki_ksigset_t *)set, &si, NULL);
+   *sig = si.si_signo;
+
+   return 0;			/* always returns 0 */
 }
 
 
@@ -1324,40 +1318,6 @@
 }
 
 
-int pause ( void )
-{
-   unsigned int n_orig, n_now;
-   struct vki_timespec nanosleep_interval;
-   ensure_valgrind("pause");
-
-   /* This is surely a cancellation point. */
-   __my_pthread_testcancel();
-
-   VALGRIND_MAGIC_SEQUENCE(n_orig, 0xFFFFFFFF /* default */,
-                           VG_USERREQ__GET_N_SIGS_RETURNED, 
-                           0, 0, 0, 0);
-   my_assert(n_orig != 0xFFFFFFFF);
-
-   while (1) {
-      VALGRIND_MAGIC_SEQUENCE(n_now, 0xFFFFFFFF /* default */,
-                              VG_USERREQ__GET_N_SIGS_RETURNED, 
-                              0, 0, 0, 0);
-      my_assert(n_now != 0xFFFFFFFF);
-      my_assert(n_now >= n_orig);
-      if (n_now != n_orig) break;
-
-      nanosleep_interval.tv_sec  = 0;
-      nanosleep_interval.tv_nsec = 12 * 1000 * 1000; /* 12 milliseconds */
-      /* It's critical here that valgrind's nanosleep implementation
-         is nonblocking. */
-      (void)my_do_syscall2(__NR_nanosleep, 
-                           (int)(&nanosleep_interval), (int)NULL);
-   }
-
-   *(__errno_location()) = EINTR;
-   return -1;
-}
-
 
 /* ---------------------------------------------------
    THREAD-SPECIFICs
@@ -1433,15 +1393,16 @@
    VALGRIND_MAGIC_SEQUENCE(res, 1 /* default */,
                            VG_USERREQ__PTHREAD_KEY_CREATE,
                            key, destr_function, 0, 0);
-   my_assert(res == 0);
-
-   /* POSIX sez: "Upon key creation, the value NULL shall be
-      associated with the new key in all active threads." */
-   for (i = 0; i < VG_N_THREADS; i++) {
-      specifics_ptr = get_or_allocate_specifics_ptr(i);
-      /* we get NULL if i is an invalid thread. */
-      if (specifics_ptr != NULL)
-         specifics_ptr[*key] = NULL;
+   
+   if (res == 0) {
+      /* POSIX sez: "Upon key creation, the value NULL shall be
+	 associated with the new key in all active threads." */
+      for (i = 0; i < VG_N_THREADS; i++) {
+	 specifics_ptr = get_or_allocate_specifics_ptr(i);
+	 /* we get NULL if i is an invalid thread. */
+	 if (specifics_ptr != NULL)
+	    specifics_ptr[*key] = NULL;
+      }
    }
 
    return res;
@@ -1525,7 +1486,6 @@
 {
    int res;
    int done;
-   ensure_valgrind("pthread_once");
 
 #  define TAKE_LOCK                                   \
       res = __pthread_mutex_lock(&once_masterlock);   \
@@ -1535,6 +1495,14 @@
       res = __pthread_mutex_unlock(&once_masterlock); \
       my_assert(res == 0);
 
+   void cleanup(void *v) {
+      TAKE_LOCK;
+      *once_control = P_ONCE_NOT_DONE;
+      RELEASE_LOCK;
+   }
+
+   ensure_valgrind("pthread_once");
+
    /* Grab the lock transiently, so we can safely see what state this
       once_control is in. */
 
@@ -1546,10 +1514,12 @@
  	 /* Not started.  Change state to indicate running, drop the
 	    lock and run.  */
          *once_control = P_ONCE_RUNNING;
+	 _pthread_cleanup_push(NULL, cleanup, NULL);
 	 RELEASE_LOCK;
          init_routine();
          /* re-take the lock, and set state to indicate done. */
 	 TAKE_LOCK;
+	 _pthread_cleanup_pop(NULL, False);
          *once_control = P_ONCE_COMPLETED;
 	 RELEASE_LOCK;
 	 break;
@@ -1891,6 +1861,15 @@
 #  endif
 }
 
+extern 
+int __libc_accept(int fd, struct sockaddr *addr, socklen_t *len);
+
+WEAK int __accept(int fd, struct sockaddr *addr, socklen_t *len)
+{
+   __my_pthread_testcancel();
+   return __libc_accept(fd, addr, len);
+}
+strong_alias(__accept, accept);
 
 extern
 int  __libc_connect(int  sockfd,  
@@ -1935,149 +1914,12 @@
    return __libc_read(fd, buf, count);
 }
 
-/*
- * Ugh, this is horrible but here goes:
- *
- * Open of a named pipe (fifo file) can block.  In a threaded program,
- * this means that the whole thing can block.  We therefore need to
- * make the open appear to block to the caller, but still keep polling
- * for everyone else.
- *
- * There are four cases:
- *
- * - the caller asked for O_NONBLOCK.  The easy one: we just do it.
- *
- * - the caller asked for a blocking O_RDONLY open.  We open it with
- *   O_NONBLOCK and then use poll to wait for it to become ready.
- *
- * - the caller asked for a blocking O_WRONLY open.  Unfortunately, this
- *   will fail with ENXIO when we make it non-blocking.  Doubly
- *   unfortunate is that we can only rely on these semantics if it is
- *   actually a fifo file; the hack is that if we see that it is a
- *   O_WRONLY open and we get ENXIO, then stat the path and see if it
- *   actually is a fifo.  This is racy, but it is the best we can do.
- *   If it is a fifo, then keep trying the open until it works; if not
- *   just return the error.
- *
- * - the caller asked for a blocking O_RDWR open.  Well, under Linux,
- *   this never blocks, so we just clear the non-blocking flag and
- *   return.
- *
- * This code assumes that for whatever we open, O_NONBLOCK followed by
- * a fcntl clearing O_NONBLOCK is the same as opening without
- * O_NONBLOCK.  Also assumes that stat and fstat have no side-effects.
- *
- * XXX Should probably put in special cases for some devices as well,
- * like serial ports.  Unfortunately they don't work like fifos, so
- * this logic will become even more tortured.  Wait until we really
- * need it.
- */ 
-static int _open(const char *pathname, int flags, mode_t mode,
-	         int (*openp)(const char *, int, mode_t))
-{
-   int fd;
-   struct stat st;
-   struct vki_timespec nanosleep_interval;
-   int saved_errno;
-
-   __my_pthread_testcancel();
-
-   /* Assume we can only get O_RDONLY, O_WRONLY or O_RDWR */
-   my_assert((flags & VKI_O_ACCMODE) != VKI_O_ACCMODE);
-
-   for(;;) {
-      fd = (*openp)(pathname, flags | VKI_O_NONBLOCK, mode);
-
-      /* return immediately if caller wanted nonblocking anyway */
-      if (flags & VKI_O_NONBLOCK)
-	 return fd;
-
-      saved_errno = *(__errno_location());
-
-      if (fd != -1)
-	 break;			/* open worked */
-
-      /* If we got ENXIO and we're opening WRONLY, and it turns out
-	 to really be a FIFO, then poll waiting for open to succeed */
-      if (*(__errno_location()) == ENXIO &&
-	  (flags & VKI_O_ACCMODE) == VKI_O_WRONLY &&
-	  (stat(pathname, &st) == 0 && S_ISFIFO(st.st_mode))) {
-
-	 /* OK, we're opening a FIFO for writing; sleep and spin */
-	 nanosleep_interval.tv_sec  = 0;
-	 nanosleep_interval.tv_nsec = 13 * 1000 * 1000; /* 13 milliseconds */
-	 /* It's critical here that valgrind's nanosleep implementation
-	    is nonblocking. */
-	 (void)my_do_syscall2(__NR_nanosleep, 
-			      (int)(&nanosleep_interval), (int)NULL);
-      } else {
-	 /* it was just an error */
-	 *(__errno_location()) = saved_errno;
-	 return -1;
-      }
-   }
-
-   /* OK, we've got a nonblocking FD for a caller who wants blocking;
-      reset the flags to what they asked for */
-   fcntl(fd, VKI_F_SETFL, flags);
-
-   /* Return now if one of:
-      - we were opening O_RDWR (never blocks)
-      - we opened with O_WRONLY (polling already done)
-      - the thing we opened wasn't a FIFO after all (or fstat failed)
-   */
-   if ((flags & VKI_O_ACCMODE) != VKI_O_RDONLY ||
-       (fstat(fd, &st) == -1 || !S_ISFIFO(st.st_mode))) {
-      *(__errno_location()) = saved_errno;
-      return fd;
-   }
-
-   /* OK, drop into the poll loop looking for something to read on the fd */
-   my_assert((flags & VKI_O_ACCMODE) == VKI_O_RDONLY);
-   for(;;) {
-      struct pollfd pollfd;
-      int res;
-
-      pollfd.fd = fd;
-      pollfd.events = POLLIN;
-      pollfd.revents = 0;
-
-      res = my_do_syscall3(__NR_poll, (int)&pollfd, 1, 0);
-      
-      my_assert(res == 0 || res == 1);
-
-      if (res == 1) {
-	 /* OK, got it.
-
-	    XXX This is wrong: we're waiting for either something to
-	    read or a HUP on the file descriptor, but the semantics of
-	    fifo open are that we should unblock as soon as someone
-	    simply opens the other end, not that they write something.
-	    With luck this won't matter in practice.
-	 */
-	 my_assert(pollfd.revents & (POLLIN|POLLHUP));
-	 break;
-      }
-
-      /* Still nobody home; sleep and spin */
-      nanosleep_interval.tv_sec  = 0;
-      nanosleep_interval.tv_nsec = 13 * 1000 * 1000; /* 13 milliseconds */
-      /* It's critical here that valgrind's nanosleep implementation
-	 is nonblocking. */
-      (void)my_do_syscall2(__NR_nanosleep, 
-			   (int)(&nanosleep_interval), (int)NULL);
-   }
-
-   *(__errno_location()) = saved_errno;
-   return fd;
-}
-
 extern
 int __libc_open64(const char *pathname, int flags, mode_t mode);
 /* WEAK */
 int open64(const char *pathname, int flags, mode_t mode)
 {
-   return _open(pathname, flags, mode, __libc_open64);
+   return __libc_open64(pathname, flags, mode);
 }
 
 extern
@@ -2085,7 +1927,7 @@
 /* WEAK */
 int open(const char *pathname, int flags, mode_t mode)
 {
-   return _open(pathname, flags, mode, __libc_open);
+   return __libc_open(pathname, flags, mode);
 }
 
 extern
@@ -2098,30 +1940,6 @@
 }
 
 
-WEAK
-int accept(int s, struct sockaddr *addr, socklen_t *addrlen)
-{
-   return VGR_(accept)(s, addr, addrlen);
-}
-
-WEAK
-int recv(int s, void *buf, size_t len, int flags)
-{
-   return VGR_(recv)(s, buf, len, flags);
-}
-
-WEAK
-int readv(int fd, const struct iovec *iov, int count)
-{
-  return VGR_(readv)(fd, iov, count);
-}
-
-WEAK
-int writev(int fd, const struct iovec *iov, int count)
-{
-  return VGR_(writev)(fd, iov, count);
-}
-
 extern
 pid_t __libc_waitpid(pid_t pid, int *status, int options);
 WEAK
@@ -2135,12 +1953,21 @@
 extern
 int __libc_nanosleep(const struct timespec *req, struct timespec *rem);
 WEAK
-int nanosleep(const struct timespec *req, struct timespec *rem)
+int __nanosleep(const struct timespec *req, struct timespec *rem)
 {
    __my_pthread_testcancel();
    return __libc_nanosleep(req, rem);
 }
 
+extern
+int __libc_pause(void);
+WEAK
+int __pause(void)
+{
+   __my_pthread_testcancel();
+   return __libc_pause();
+}
+
 
 extern
 int __libc_fsync(int fd);
@@ -2213,25 +2040,15 @@
    return __libc_pread(fd, buf, count, offset);
 }
 
-
-extern  
-void __libc_longjmp(jmp_buf env, int val) __attribute((noreturn));
-/* not weak: WEAK */
-void longjmp(jmp_buf env, int val)
+extern
+int __libc_recv(int s, void *msg, size_t len, int flags);
+WEAK
+int recv(int s, void *msg, size_t len, int flags)
 {
-   __libc_longjmp(env, val);
+   __my_pthread_testcancel();
+   return __libc_recv(s, msg, len, flags);
 }
 
-
-extern void __libc_siglongjmp (sigjmp_buf env, int val)
-                               __attribute__ ((noreturn));
-void siglongjmp(sigjmp_buf env, int val)
-{
-   kludged("siglongjmp (cleanup handlers are ignored)");
-   __libc_siglongjmp(env, val);
-}
-
-
 extern
 int __libc_send(int s, const void *msg, size_t len, int flags);
 WEAK
@@ -2270,8 +2087,6 @@
              struct sockaddr *from, socklen_t *fromlen)
 {
    __my_pthread_testcancel();
-   VGR_(wait_for_fd_to_be_readable_or_erring)(s);
-   __my_pthread_testcancel();
    return __libc_recvfrom(s, buf, len, flags, from, fromlen);
 }
 
@@ -2317,6 +2132,40 @@
    return __libc_msync(start, length, flags);
 }
 
+strong_alias(close, __close)
+strong_alias(fcntl, __fcntl)
+strong_alias(lseek, __lseek)
+strong_alias(open, __open)
+strong_alias(open64, __open64)
+strong_alias(read, __read)
+strong_alias(wait, __wait)
+strong_alias(write, __write)
+strong_alias(connect, __connect)
+strong_alias(send, __send)
+
+weak_alias (__pread64, pread64)
+weak_alias (__pwrite64, pwrite64)
+weak_alias(__nanosleep, nanosleep)
+weak_alias(__pause, pause)
+
+
+extern  
+void __libc_longjmp(jmp_buf env, int val) __attribute((noreturn));
+/* not weak: WEAK */
+void longjmp(jmp_buf env, int val)
+{
+   __libc_longjmp(env, val);
+}
+
+
+extern void __libc_siglongjmp (sigjmp_buf env, int val)
+                               __attribute__ ((noreturn));
+void siglongjmp(sigjmp_buf env, int val)
+{
+   kludged("siglongjmp (cleanup handlers are ignored)");
+   __libc_siglongjmp(env, val);
+}
+
 
 /*--- fork and its helper ---*/
 
@@ -2397,71 +2246,6 @@
 }
 
 
-static
-int my_do_syscall1 ( int syscallno, int arg1 )
-{ 
-   int __res;
-   __asm__ volatile ("pushl %%ebx; movl %%edx,%%ebx ; int $0x80 ; popl %%ebx"
-                     : "=a" (__res)
-                     : "0" (syscallno),
-                       "d" (arg1) );
-   return __res;
-}
-
-static
-int my_do_syscall2 ( int syscallno, 
-                     int arg1, int arg2 )
-{ 
-   int __res;
-   __asm__ volatile ("pushl %%ebx; movl %%edx,%%ebx ; int $0x80 ; popl %%ebx"
-                     : "=a" (__res)
-                     : "0" (syscallno),
-                       "d" (arg1),
-                       "c" (arg2) );
-   return __res;
-}
-
-static
-int my_do_syscall3 ( int syscallno, 
-                     int arg1, int arg2, int arg3 )
-{ 
-   int __res;
-   __asm__ volatile ("pushl %%ebx; movl %%esi,%%ebx ; int $0x80 ; popl %%ebx"
-                     : "=a" (__res)
-                     : "0" (syscallno),
-                       "S" (arg1),
-                       "c" (arg2),
-                       "d" (arg3) );
-   return __res;
-}
-
-static inline
-int my_do_syscall5 ( int syscallno, 
-                     int arg1, int arg2, int arg3, int arg4, int arg5 )
-{ 
-   int __res;
-   __asm__ volatile ("int $0x80"
-                     : "=a" (__res)
-                     : "0" (syscallno),
-                       "b" (arg1),
-                       "c" (arg2),
-                       "d" (arg3),
-                       "S" (arg4),
-                       "D" (arg5));
-   return __res;
-}
-
-
-WEAK
-int select ( int n, 
-             fd_set *rfds, 
-             fd_set *wfds, 
-             fd_set *xfds, 
-             struct timeval *timeout )
-{
-   return VGR_(select)(n, rfds, wfds, xfds, timeout);
-}
-
 
 /* ---------------------------------------------------------------------
    Hacky implementation of semaphores.
@@ -3013,64 +2797,30 @@
 }
 
 
-/* ---------------------------------------------------------------------
-   Make SYSV IPC not block everything -- pass to vg_intercept.c.
-   ------------------------------------------------------------------ */
-
-WEAK
-int msgsnd(int msgid, const void *msgp, size_t msgsz, int msgflg)
-{
-   return VGR_(msgsnd)(msgid, msgp, msgsz, msgflg);
-}
-
-WEAK
-int msgrcv(int msqid, void* msgp, size_t msgsz, 
-           long msgtyp, int msgflg )
-{
-   return VGR_(msgrcv)(msqid, msgp, msgsz, msgtyp, msgflg );
-}
-
 
 /* ---------------------------------------------------------------------
-   The glibc sources say that returning -1 in these 3 functions
-   causes real time signals not to be used.
+   Manage the allocation and use of RT signals.  The Valgrind core
+   uses one.  glibc needs us to implement this to make RT signals
+   work; things just seem to crash if we don't.
    ------------------------------------------------------------------ */
-
 int __libc_current_sigrtmin (void)
 {
-   static int moans = N_MOANS;
-   if (moans-- > 0) 
-      kludged("__libc_current_sigrtmin");
-   return -1;
+   return VG_(sig_rtmin);
 }
 
 int __libc_current_sigrtmax (void)
 {
-   static int moans = N_MOANS;
-   if (moans-- > 0) 
-      kludged("__libc_current_sigrtmax");
-   return -1;
+   return VG_(sig_rtmax);
 }
 
 int __libc_allocate_rtsig (int high)
 {
-   static int moans = N_MOANS;
-   if (moans-- > 0) 
-      kludged("__libc_allocate_rtsig");
-   return -1;
+   return VG_(sig_alloc_rtsig)(high);
 }
 
-
 /* ---------------------------------------------------------------------
    B'stard.
    ------------------------------------------------------------------ */
-
-# define strong_alias(name, aliasname) \
-  extern __typeof (name) aliasname __attribute__ ((alias (#name)));
-
-# define weak_alias(name, aliasname) \
-  extern __typeof (name) aliasname __attribute__ ((weak, alias (#name)));
-
 strong_alias(__pthread_mutex_lock, pthread_mutex_lock)
 strong_alias(__pthread_mutex_trylock, pthread_mutex_trylock)
 strong_alias(__pthread_mutex_unlock, pthread_mutex_unlock)
@@ -3090,22 +2840,8 @@
 strong_alias(sigaction, __sigaction)
 #endif
      
-strong_alias(close, __close)
-strong_alias(fcntl, __fcntl)
-strong_alias(lseek, __lseek)
-strong_alias(open, __open)
-strong_alias(open64, __open64)
-strong_alias(read, __read)
-strong_alias(wait, __wait)
-strong_alias(write, __write)
-strong_alias(connect, __connect)
-strong_alias(send, __send)
-
-weak_alias (__pread64, pread64)
-weak_alias (__pwrite64, pwrite64)
 weak_alias(__fork, fork)
 weak_alias(__vfork, vfork)
-
 weak_alias (__pthread_kill_other_threads_np, pthread_kill_other_threads_np)
 
 /*--------------------------------------------------*/
diff --git a/coregrind/vg_main.c b/coregrind/vg_main.c
index a9fc417..8680af0 100644
--- a/coregrind/vg_main.c
+++ b/coregrind/vg_main.c
@@ -120,10 +120,20 @@
 /* This is the actual defn of baseblock. */
 UInt VG_(baseBlock)[VG_BASEBLOCK_WORDS];
 
+/* PID of the main thread */
+Int VG_(main_pid);
+
+/* PGRP of process */
+Int VG_(main_pgrp);
 
 /* Words. */
 static Int baB_off = 0;
 
+/* jmp_buf for fatal signals */
+Int	VG_(fatal_sigNo) = -1;
+Bool	VG_(fatal_signal_set) = False;
+jmp_buf VG_(fatal_signal_jmpbuf);
+
 /* Returns the offset, in words. */
 static Int alloc_BaB ( Int words )
 {
@@ -549,6 +559,17 @@
 Bool   VG_(clo_run_libc_freeres) = True;
 Bool   VG_(clo_chain_bb)       = True;
 
+static Bool   VG_(clo_wait_for_gdb)   = False;
+
+/* If we're doing signal routing, poll for signals every 50mS by
+   default. */
+Int    VG_(clo_signal_polltime) = 50;
+
+/* These flags reduce thread wakeup latency on syscall completion and
+   signal delivery, respectively.  The downside is possible unfairness. */
+Bool   VG_(clo_lowlat_syscalls) = False; /* low-latency syscalls */
+Bool   VG_(clo_lowlat_signals)  = False; /* low-latency signals */
+
 /* This Bool is needed by wrappers in vg_clientmalloc.c to decide how
    to behave.  Initially we say False. */
 Bool VG_(running_on_simd_CPU) = False;
@@ -633,6 +654,13 @@
 "                              suppressions file <filename>\n"
 "    --weird-hacks=hack1,hack2,...  [no hacks selected]\n"
 "         recognised hacks are: ioctl-VTIME truncate-writes lax-ioctls\n"
+"    --signal-polltime=<time>  time, in mS, we should poll for signals.\n"
+"                              Only applies for older kernels which need\n"
+"                              signal routing [50]\n"
+"    --lowlat-signals=no|yes   improve wake-up latency when a thread receives\n"
+"			       a signal [no]\n"
+"    --lowlat-syscalls=no|yes  improve wake-up latency when a thread's\n"
+"			       syscall completes [no]\n"
 "\n"
 "  %s skin user options:\n";
 
@@ -656,6 +684,7 @@
 "    --dump-error=<number>     show translation for basic block\n"
 "                              associated with <number>'th\n"
 "                              error context [0=don't show any]\n"
+"    --wait-for-gdb=yes|no     pause on startup to wait for gdb attach\n"
 "\n"
 "  %s skin debugging options:\n";
 
@@ -1056,12 +1085,30 @@
       else if (VG_CLO_STREQN(14, argv[i], "--weird-hacks="))
          VG_(clo_weird_hacks) = &argv[i][14];
 
+      else if (VG_CLO_STREQN(17, argv[i], "--signal-polltime="))
+	 VG_(clo_signal_polltime) = VG_(atoll)(&argv[i][17]);
+
+      else if (VG_CLO_STREQ(argv[i], "--lowlat-signals=yes"))
+	 VG_(clo_lowlat_signals) = True;
+      else if (VG_CLO_STREQ(argv[i], "--lowlat-signals=no"))
+	 VG_(clo_lowlat_signals) = False;
+
+      else if (VG_CLO_STREQ(argv[i], "--lowlat-syscalls=yes"))
+	 VG_(clo_lowlat_syscalls) = True;
+      else if (VG_CLO_STREQ(argv[i], "--lowlat-syscalls=no"))
+	 VG_(clo_lowlat_syscalls) = False;
+
       else if (VG_CLO_STREQN(13, argv[i], "--stop-after="))
          VG_(clo_stop_after) = VG_(atoll)(&argv[i][13]);
 
       else if (VG_CLO_STREQN(13, argv[i], "--dump-error="))
          VG_(clo_dump_error) = (Int)VG_(atoll)(&argv[i][13]);
 
+      else if (VG_CLO_STREQ(argv[i], "--wait-for-gdb=yes"))
+	 VG_(clo_wait_for_gdb) = True;
+      else if (VG_CLO_STREQ(argv[i], "--wait-for-gdb=no"))
+	 VG_(clo_wait_for_gdb) = False;
+
       else if (VG_CLO_STREQN(14, argv[i], "--num-callers=")) {
          /* Make sure it's sane. */
 	 VG_(clo_backtrace_size) = (Int)VG_(atoll)(&argv[i][14]);
@@ -1165,6 +1212,15 @@
 
    }
 
+   /* Move logfile_fd into the safe range, so it doesn't conflict with any app fds */
+   eventually_logfile_fd = VG_(fcntl)(VG_(clo_logfile_fd), VKI_F_DUPFD, VG_MAX_FD+1);
+   if (eventually_logfile_fd < 0)
+      VG_(message)(Vg_UserMsg, "valgrind: failed to move logfile fd into safe range");
+   else {
+      VG_(clo_logfile_fd) = eventually_logfile_fd;
+      VG_(fcntl)(VG_(clo_logfile_fd), VKI_F_SETFD, VKI_FD_CLOEXEC);
+   }
+
    /* Ok, the logging sink is running now.  Print a suitable preamble.
       If logging to file or a socket, write details of parent PID and
       command line args, to help people trying to interpret the
@@ -1413,6 +1469,15 @@
    Main!
    ------------------------------------------------------------------ */
 
+/* Initialize the PID and PGRP of scheduler LWP; this is also called
+   in any new children after fork. */
+static void newpid(ThreadId unused)
+{
+   /* PID of scheduler LWP */
+   VG_(main_pid) = VG_(getpid)();
+   VG_(main_pgrp) = VG_(getpgrp)();
+}
+
 /* Where we jump to once Valgrind has got control, and the real
    machine's state has been copied to the m_state_static. */
 
@@ -1420,7 +1485,6 @@
 {
    Int               i;
    VgSchedReturnCode src;
-   ThreadState*      tst;
 
    if (0) {
       if (VG_(have_ssestate))
@@ -1449,6 +1513,9 @@
       VG_(exit)(1);
    }
 
+   VG_(atfork)(NULL, NULL, newpid);
+   newpid(VG_INVALID_THREADID);
+
    /* Set up our stack sanity-check words. */
    for (i = 0; i < 10; i++) {
       VG_(stack)[i] = (UInt)(&VG_(stack)[i])                   ^ 0xA4B3C2D1;
@@ -1467,15 +1534,6 @@
    */
    VG_(read_procselfmaps)();
 
-   /* Hook to delay things long enough so we can get the pid and
-      attach GDB in another shell. */
-   if (0) {
-      Int p, q;
-      VG_(printf)("pid=%d\n", VG_(getpid)());
-      for (p = 0; p < 50000; p++)
-         for (q = 0; q < 50000; q++) ;
-   }
-
    /* Setup stuff that depends on the skin.  Must be before:
       - vg_init_baseBlock(): to register helpers
       - process_cmd_line_options(): to register skin name and description,
@@ -1488,6 +1546,14 @@
    /* Process Valgrind's command-line opts (from env var VG_ARGS). */
    process_cmd_line_options();
 
+   /* Hook to delay things long enough so we can get the pid and
+      attach GDB in another shell. */
+   if (VG_(clo_wait_for_gdb)) {
+      VG_(printf)("pid=%d\n", VG_(getpid)());
+      /* do "jump *$eip" to skip this in gdb */
+      VG_(do_syscall)(__NR_pause);
+   }
+
    /* Do post command-line processing initialisation.  Must be before:
       - vg_init_baseBlock(): to register any more helpers
    */
@@ -1502,6 +1568,9 @@
    */
    VG_(scheduler_init)();
 
+   /* Set up the ProxyLWP machinery */
+   VG_(proxy_init)();
+
    /* Initialise the signal handling subsystem, temporarily parking
       the saved blocking-mask in saved_sigmask. */
    VG_(sigstartup_actions)();
@@ -1549,7 +1618,13 @@
    /* Run! */
    VG_(running_on_simd_CPU) = True;
    VGP_PUSHCC(VgpSched);
-   src = VG_(scheduler)();
+
+   if (__builtin_setjmp(&VG_(fatal_signal_jmpbuf)) == 0) {
+      VG_(fatal_signal_set) = True;
+      src = VG_(scheduler)();
+   } else
+      src = VgSrc_FatalSig;
+
    VGP_POPCC(VgpSched);
    VG_(running_on_simd_CPU) = False;
 
@@ -1604,14 +1679,21 @@
       );
    }
 
+   /* We're exiting, so nuke all the threads and clean up the proxy LWPs */
+   vg_assert(src == VgSrc_FatalSig ||
+	     VG_(threads)[VG_(last_run_tid)].status == VgTs_Runnable ||
+	     VG_(threads)[VG_(last_run_tid)].status == VgTs_WaitJoiner);
+   VG_(nuke_all_threads_except)(VG_INVALID_THREADID);
+
    /* Decide how to exit.  This depends on what the scheduler
       returned. */
+  
    switch (src) {
       case VgSrc_ExitSyscall: /* the normal way out */
          vg_assert(VG_(last_run_tid) > 0 
                    && VG_(last_run_tid) < VG_N_THREADS);
-         tst = & VG_(threads)[VG_(last_run_tid)];
-         vg_assert(tst->status == VgTs_Runnable);
+	 VG_(proxy_shutdown)();
+
          /* The thread's %EBX at the time it did __NR_exit() will hold
             the arg to __NR_exit(), so we just do __NR_exit() with
             that arg. */
@@ -1622,6 +1704,7 @@
 
       case VgSrc_Deadlock:
          /* Just exit now.  No point in continuing. */
+	 VG_(proxy_shutdown)();
          VG_(exit)(0);
          VG_(core_panic)("entered the afterlife in vg_main() -- Deadlock");
          break;
@@ -1636,6 +1719,8 @@
          VG_(load_thread_state)(1 /* root thread */ );
          VG_(copy_baseBlock_to_m_state_static)();
 
+	 VG_(proxy_shutdown)();
+
          /* This pushes a return address on the simulator's stack,
             which is abandoned.  We call vg_sigshutdown_actions() at
             the end of vg_switch_to_real_CPU(), so as to ensure that
@@ -1643,6 +1728,13 @@
             the real signal mechanism is restored.  */
          VG_(switch_to_real_CPU)();
 
+      case VgSrc_FatalSig:
+	 /* We were killed by a fatal signal, so replicate the effect */
+	 vg_assert(VG_(fatal_sigNo) != -1);
+	 VG_(kill_self)(VG_(fatal_sigNo));
+	 VG_(core_panic)("vg_main(): signal was supposed to be fatal");
+	 break;
+
       default:
          VG_(core_panic)("vg_main(): unexpected scheduler return code");
    }
@@ -1901,6 +1993,8 @@
       VGP_PUSHCC(VgpCoreExpensiveSanity);
       VG_(sanity_slow_count)++;
 
+      VG_(proxy_sanity)();
+
 #     if 0
       { void zzzmemscan(void); zzzmemscan(); }
 #     endif
diff --git a/coregrind/vg_memory.c b/coregrind/vg_memory.c
index 353fd79..549348e 100644
--- a/coregrind/vg_memory.c
+++ b/coregrind/vg_memory.c
@@ -317,6 +317,47 @@
    }
 }
 
+static jmp_buf segv_jmpbuf;
+
+static void segv_handler(Int seg)
+{
+   __builtin_longjmp(segv_jmpbuf, 1);
+   VG_(core_panic)("longjmp failed");
+}
+
+/* 
+   Test if a piece of memory is addressable by setting up a temporary
+   SIGSEGV handler, then try to touch the memory.  No signal = good,
+   signal = bad.
+ */
+Bool VG_(is_addressable)(Addr p, Int size)
+{
+   volatile Char * volatile cp = (volatile Char *)p;
+   volatile Bool ret;
+   vki_ksigaction sa, origsa;
+   vki_ksigset_t mask;
+
+   vg_assert(size > 0);
+
+   sa.ksa_handler = segv_handler;
+   sa.ksa_flags = 0;
+   VG_(ksigfillset)(&sa.ksa_mask);
+   VG_(ksigaction)(VKI_SIGSEGV, &sa, &origsa);
+   VG_(ksigprocmask)(VKI_SIG_SETMASK, NULL, &mask);
+
+   if (__builtin_setjmp(&segv_jmpbuf) == 0) {
+      while(size--)
+	 *cp++;
+      ret = True;
+    } else
+      ret = False;
+
+   VG_(ksigaction)(VKI_SIGSEGV, &origsa, NULL);
+   VG_(ksigprocmask)(VKI_SIG_SETMASK, &mask, NULL);
+
+   return ret;
+}
+
 /*--------------------------------------------------------------------*/
 /*--- end                                              vg_memory.c ---*/
 /*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_mylibc.c b/coregrind/vg_mylibc.c
index d0dcb57..811818f 100644
--- a/coregrind/vg_mylibc.c
+++ b/coregrind/vg_mylibc.c
@@ -32,101 +32,6 @@
 
 #include "vg_include.h"
 
-
-
-/* ---------------------------------------------------------------------
-   Really Actually DO system calls.
-   ------------------------------------------------------------------ */
-
-/* Ripped off from /usr/include/asm/unistd.h. */
-
-static
-UInt vg_do_syscall0 ( UInt syscallno )
-{ 
-   UInt __res;
-   __asm__ volatile ("int $0x80"
-                     : "=a" (__res)
-                     : "0" (syscallno) );
-   return __res;
-}
-
-
-static
-UInt vg_do_syscall1 ( UInt syscallno, UInt arg1 )
-{ 
-   UInt __res;
-   __asm__ volatile ("int $0x80"
-                     : "=a" (__res)
-                     : "0" (syscallno),
-                       "b" (arg1) );
-   return __res;
-}
-
-
-static
-UInt vg_do_syscall2 ( UInt syscallno, 
-                      UInt arg1, UInt arg2 )
-{ 
-   UInt __res;
-   __asm__ volatile ("int $0x80"
-                     : "=a" (__res)
-                     : "0" (syscallno),
-                       "b" (arg1),
-                       "c" (arg2) );
-   return __res;
-}
-
-
-static
-UInt vg_do_syscall3 ( UInt syscallno, 
-                      UInt arg1, UInt arg2, UInt arg3 )
-{ 
-   UInt __res;
-   __asm__ volatile ("int $0x80"
-                     : "=a" (__res)
-                     : "0" (syscallno),
-                       "b" (arg1),
-                       "c" (arg2),
-                       "d" (arg3) );
-   return __res;
-}
-
-
-static
-UInt vg_do_syscall4 ( UInt syscallno, 
-                      UInt arg1, UInt arg2, UInt arg3, UInt arg4 )
-{ 
-   UInt __res;
-   __asm__ volatile ("int $0x80"
-                     : "=a" (__res)
-                     : "0" (syscallno),
-                       "b" (arg1),
-                       "c" (arg2),
-                       "d" (arg3),
-                       "S" (arg4) );
-   return __res;
-}
-
-
-#if 0
-static
-UInt vg_do_syscall5 ( UInt syscallno, 
-                      UInt arg1, UInt arg2, UInt arg3, UInt arg4, 
-                      UInt arg5 )
-{ 
-   UInt __res;
-   __asm__ volatile ("int $0x80"
-                     : "=a" (__res)
-                     : "0" (syscallno),
-                       "b" (arg1),
-                       "c" (arg2),
-                       "d" (arg3),
-                       "S" (arg4),
-                       "D" (arg5) );
-   return __res;
-}
-#endif
-
 /* ---------------------------------------------------------------------
    Wrappers around system calls, and other stuff, to do with signals.
    ------------------------------------------------------------------ */
@@ -236,9 +141,9 @@
                        vki_ksigset_t* oldset)
 {
    Int res 
-      = vg_do_syscall4(__NR_rt_sigprocmask, 
-                       how, (UInt)set, (UInt)oldset, 
-                       VKI_KNSIG_WORDS * VKI_BYTES_PER_WORD);
+      = VG_(do_syscall)(__NR_rt_sigprocmask, 
+			how, (UInt)set, (UInt)oldset, 
+			VKI_KNSIG_WORDS * VKI_BYTES_PER_WORD);
    return VG_(is_kerror)(res) ? -1 : 0;
 }
 
@@ -248,9 +153,9 @@
                       vki_ksigaction* oldact)
 {
    Int res
-     = vg_do_syscall4(__NR_rt_sigaction,
-                      signum, (UInt)act, (UInt)oldact, 
-                      VKI_KNSIG_WORDS * VKI_BYTES_PER_WORD);
+     = VG_(do_syscall)(__NR_rt_sigaction,
+		       signum, (UInt)act, (UInt)oldact, 
+		       VKI_KNSIG_WORDS * VKI_BYTES_PER_WORD);
    /* VG_(printf)("res = %d\n",res); */
    return VG_(is_kerror)(res) ? -1 : 0;
 }
@@ -259,10 +164,17 @@
 Int VG_(ksigaltstack)( const vki_kstack_t* ss, vki_kstack_t* oss )
 {
    Int res
-     = vg_do_syscall2(__NR_sigaltstack, (UInt)ss, (UInt)oss);
+     = VG_(do_syscall)(__NR_sigaltstack, (UInt)ss, (UInt)oss);
    return VG_(is_kerror)(res) ? -1 : 0;
 }
 
+Int VG_(ksigtimedwait)( const vki_ksigset_t *set, vki_ksiginfo_t *info, 
+			const struct vki_timespec *timeout )
+{
+   Int res = VG_(do_syscall)(__NR_rt_sigtimedwait, set, info, timeout, sizeof(*set));
+
+   return VG_(is_kerror)(res) ? -1 : res;
+}
  
 Int VG_(ksignal)(Int signum, void (*sighandler)(Int))
 {
@@ -273,26 +185,60 @@
    sa.ksa_restorer = NULL;
    res = VG_(ksigemptyset)( &sa.ksa_mask );
    vg_assert(res == 0);
-   res = vg_do_syscall4(__NR_rt_sigaction,
-                        signum, (UInt)(&sa), (UInt)NULL,
-                        VKI_KNSIG_WORDS * VKI_BYTES_PER_WORD);
+   res = VG_(do_syscall)(__NR_rt_sigaction,
+			 signum, (UInt)(&sa), (UInt)NULL,
+			 VKI_KNSIG_WORDS * VKI_BYTES_PER_WORD);
    return VG_(is_kerror)(res) ? -1 : 0;
 }
 
 
 Int VG_(kkill)( Int pid, Int signo )
 {
-   Int res = vg_do_syscall2(__NR_kill, pid, signo);
+   Int res = VG_(do_syscall)(__NR_kill, pid, signo);
    return VG_(is_kerror)(res) ? -1 : 0;
 }
 
 
+Int VG_(ktkill)( Int tid, Int signo )
+{
+   Int ret = -VKI_ENOSYS;
+
+#ifdef __NR_tkill
+   ret = VG_(do_syscall)(__NR_tkill, tid, signo);
+#endif /* __NR_tkill */
+
+   if (ret == -VKI_ENOSYS)
+      ret = VG_(do_syscall)(__NR_kill, tid, signo);
+
+   return VG_(is_kerror)(ret) ? -1 : 0;
+}
+
 Int VG_(ksigpending) ( vki_ksigset_t* set )
 {
-   Int res = vg_do_syscall1(__NR_sigpending, (UInt)set);
+   Int res = VG_(do_syscall)(__NR_sigpending, (UInt)set);
    return VG_(is_kerror)(res) ? -1 : 0;
 }
 
+Int VG_(waitpid)(Int pid, Int *status, Int options)
+{
+   Int ret = VG_(do_syscall)(__NR_wait4, pid, status, options, NULL);
+
+   return VG_(is_kerror)(ret) ? -1 : ret;
+}
+
+Int VG_(gettid)(void)
+{
+   Int ret;
+
+   ret = VG_(do_syscall)(__NR_gettid);
+
+   if (ret == -VKI_ENOSYS)
+      ret = VG_(do_syscall)(__NR_getpid);
+
+   return ret;
+}
+
+
 
 /* ---------------------------------------------------------------------
    mmap/munmap, exit, fcntl
@@ -310,29 +256,31 @@
    args[3] = flags;
    args[4] = fd;
    args[5] = offset;
-   res = vg_do_syscall1(__NR_mmap, (UInt)(&(args[0])) );
+   res = VG_(do_syscall)(__NR_mmap, (UInt)(&(args[0])) );
    return VG_(is_kerror)(res) ? ((void*)(-1)) : (void*)res;
 }
 
 /* Returns -1 on failure. */
 Int VG_(munmap)( void* start, Int length )
 {
-   Int res = vg_do_syscall2(__NR_munmap, (UInt)start, (UInt)length );
+   Int res = VG_(do_syscall)(__NR_munmap, (UInt)start, (UInt)length );
    return VG_(is_kerror)(res) ? -1 : 0;
 }
 
 void VG_(exit)( Int status )
 {
-   (void)vg_do_syscall1(__NR_exit, (UInt)status );
+   (void)VG_(do_syscall)(__NR_exit_group, (UInt)status );
+   (void)VG_(do_syscall)(__NR_exit, (UInt)status );
    /* Why are we still alive here? */
    /*NOTREACHED*/
+   *(volatile Int *)0 = 'x';
    vg_assert(2+2 == 5);
 }
 
 /* Returns -1 on error. */
 Int VG_(fcntl) ( Int fd, Int cmd, Int arg )
 {
-   Int res = vg_do_syscall3(__NR_fcntl, fd, cmd, arg);
+   Int res = VG_(do_syscall)(__NR_fcntl, fd, cmd, arg);
    return VG_(is_kerror)(res) ? -1 : res;
 }
 
@@ -350,16 +298,23 @@
    args[2] = (UInt)writefds;
    args[3] = (UInt)exceptfds;
    args[4] = (UInt)timeout;
-   res = vg_do_syscall1(__NR_select, (UInt)(&(args[0])) );
+   res = VG_(do_syscall)(__NR_select, (UInt)(&(args[0])) );
    return VG_(is_kerror)(res) ? -1 : res;
 }
 
+Int VG_(poll)( struct vki_pollfd *ufds, UInt nfds, Int timeout)
+{
+   Int res = VG_(do_syscall)(__NR_poll, ufds, nfds, timeout);
+
+   return res;
+}
+
 /* Returns -1 on error, 0 if ok, 1 if interrupted. */
 Int VG_(nanosleep)( const struct vki_timespec *req, 
                     struct vki_timespec *rem )
 {
    Int res;
-   res = vg_do_syscall2(__NR_nanosleep, (UInt)req, (UInt)rem);
+   res = VG_(do_syscall)(__NR_nanosleep, (UInt)req, (UInt)rem);
    if (res == -VKI_EINVAL) return -1;
    if (res == -VKI_EINTR)  return 1;
    return 0;
@@ -368,7 +323,7 @@
 void* VG_(brk) ( void* end_data_segment )
 {
    Int res;
-   res = vg_do_syscall1(__NR_brk, (UInt)end_data_segment);
+   res = VG_(do_syscall)(__NR_brk, (UInt)end_data_segment);
    return (void*)(  VG_(is_kerror)(res) ? -1 : res  );
 }
 
@@ -1089,9 +1044,35 @@
    Assertery.
    ------------------------------------------------------------------ */
 
+/* Fake up an ExeContext which is of our actual real CPU state, so we
+   can print a stack trace.  This isn't terribly useful in the case
+   where we were killed by a signal, since we just get a backtrace
+   into the signal handler.  Also, it could be somewhat risky if we
+   actully got the panic/exception within the execontext/stack
+   dump/symtab code.  But it's better than nothing. */
+static inline ExeContext *get_real_execontext(Addr ret)
+{
+   ExeContext *ec;
+   Addr ebp;
+   Addr stacktop;
+   Addr esp = (Addr)&esp;
+
+   asm("movl %%ebp, %0" : "=r" (ebp));
+   stacktop = (Addr)&VG_(stack)[VG_STACK_SIZE_W];
+   if (esp >= (Addr)&VG_(sigstack)[0] && esp < (Addr)&VG_(sigstack)[VG_STACK_SIZE_W])
+      stacktop = (Addr)&VG_(sigstack)[VG_STACK_SIZE_W];
+      
+   ec = VG_(get_ExeContext2)(ret, ebp, esp, stacktop);
+
+   return ec;
+}
+
 __attribute__ ((noreturn))
 static void report_and_quit ( const Char* report )
 {
+   ExeContext *ec = get_real_execontext((Addr)__builtin_return_address(0));
+   VG_(pp_ExeContext)(ec);
+   
    VG_(pp_sched_status)();
    VG_(printf)("\n");
    VG_(printf)("Note: see also the FAQ.txt in the source distribution.\n");
@@ -1152,6 +1133,30 @@
    Primitive support for reading files.
    ------------------------------------------------------------------ */
 
+static inline Bool fd_exists(Int fd)
+{
+   struct vki_stat st;
+
+   return VG_(fstat)(fd, &st) == 0;
+}
+
+/* Move an fd into the Valgrind-safe range */
+Int VG_(safe_fd)(Int oldfd)
+{
+   Int newfd;
+
+   newfd = VG_(fcntl)(oldfd, VKI_F_DUPFD, VG_MAX_FD+1);
+   if (newfd != -1)
+      VG_(close)(oldfd);
+
+   VG_(fcntl)(newfd, VKI_F_SETFD, VKI_FD_CLOEXEC);
+
+   vg_assert(newfd > VG_MAX_FD);
+   return newfd;
+}
+
+
+
 /* Returns -1 on failure. */
 Int VG_(open) ( const Char* pathname, Int flags, Int mode )
 {  
@@ -1164,15 +1169,21 @@
    /* fd = open( pathname, O_RDONLY ); */
    /* ... so we go direct to the horse's mouth, which seems to work
       ok: */
-   fd = vg_do_syscall3(__NR_open, (UInt)pathname, flags, mode);
+   fd = VG_(do_syscall)(__NR_open, (UInt)pathname, flags, mode);
    /* VG_(printf)("result = %d\n", fd); */
    if (VG_(is_kerror)(fd)) fd = -1;
    return fd;
 }
 
+Int VG_(pipe) ( Int fd[2] )
+{
+   Int ret = VG_(do_syscall)(__NR_pipe, fd);
+   return VG_(is_kerror)(ret) ? -1 : 0;
+}
+
 void VG_(close) ( Int fd )
 {
-   vg_do_syscall1(__NR_close, fd);
+   VG_(do_syscall)(__NR_close, fd);
 }
 
 
@@ -1180,38 +1191,52 @@
 {
    Int res;
    /* res = read( fd, buf, count ); */
-   res = vg_do_syscall3(__NR_read, fd, (UInt)buf, count);
-   if (VG_(is_kerror)(res)) res = -1;
+   res = VG_(do_syscall)(__NR_read, fd, (UInt)buf, count);
+   /* return -ERRNO on error */
    return res;
 }
 
-Int VG_(write) ( Int fd, void* buf, Int count)
+Int VG_(write) ( Int fd, const void* buf, Int count)
 {
    Int res;
    /* res = write( fd, buf, count ); */
-   res = vg_do_syscall3(__NR_write, fd, (UInt)buf, count);
-   if (VG_(is_kerror)(res)) res = -1;
+   res = VG_(do_syscall)(__NR_write, fd, (UInt)buf, count);
+   /* return -ERRNO on error */
    return res;
 }
 
 Int VG_(stat) ( Char* file_name, struct vki_stat* buf )
 {
    Int res;
-   res = vg_do_syscall2(__NR_stat, (UInt)file_name, (UInt)buf);
+   res = VG_(do_syscall)(__NR_stat, (UInt)file_name, (UInt)buf);
    return VG_(is_kerror)(res) ? (-1) : 0;
 }
 
+Int VG_(fstat) ( Int fd, struct vki_stat* buf )
+{
+   Int res;
+   res = VG_(do_syscall)(__NR_fstat, (UInt)fd, (UInt)buf);
+   return VG_(is_kerror)(res) ? (-1) : 0;
+}
+
+Int VG_(dup2) ( Int oldfd, Int newfd )
+{
+   Int res;
+   res = VG_(do_syscall)(__NR_dup2, (UInt)oldfd, (UInt)newfd);
+   return VG_(is_kerror)(res) ? (-1) : res;
+}
+
 Int VG_(rename) ( Char* old_name, Char* new_name )
 {
    Int res;
-   res = vg_do_syscall2(__NR_rename, (UInt)old_name, (UInt)new_name);
+   res = VG_(do_syscall)(__NR_rename, (UInt)old_name, (UInt)new_name);
    return VG_(is_kerror)(res) ? (-1) : 0;
 }
 
 Int VG_(unlink) ( Char* file_name )
 {
    Int res;
-   res = vg_do_syscall1(__NR_unlink, (UInt)file_name);
+   res = VG_(do_syscall)(__NR_unlink, (UInt)file_name);
    return VG_(is_kerror)(res) ? (-1) : 0;
 }
 
@@ -1221,7 +1246,7 @@
 {
    Int res;
    vg_assert(buf != NULL);
-   res = vg_do_syscall2(__NR_getcwd, (UInt)buf, (UInt)size);
+   res = VG_(do_syscall)(__NR_getcwd, (UInt)buf, (UInt)size);
    return VG_(is_kerror)(res) ? ((Char*)NULL) : (Char*)res;
 }
 
@@ -1270,17 +1295,29 @@
 {
    Int res;
    /* res = getpid(); */
-   res = vg_do_syscall0(__NR_getpid);
+   res = VG_(do_syscall)(__NR_getpid);
+   return res;
+}
+
+Int VG_(getpgrp) ( void )
+{
+   Int res;
+   /* res = getpgid(); */
+   res = VG_(do_syscall)(__NR_getpgrp);
    return res;
 }
 
 Int VG_(getppid) ( void )
 {
    Int res;
-   res = vg_do_syscall0(__NR_getppid);
+   res = VG_(do_syscall)(__NR_getppid);
    return res;
 }
 
+Int VG_(setpgid) ( Int pid, Int pgrp )
+{
+   return VG_(do_syscall)(__NR_setpgid, pid, pgrp);
+}
 
 /* Return -1 if error, else 0.  NOTE does not indicate return code of
    child! */
@@ -1290,7 +1327,7 @@
    void* environ[1] = { NULL };
    if (cmd == NULL)
       return 1;
-   pid = vg_do_syscall0(__NR_fork);
+   pid = VG_(do_syscall)(__NR_fork);
    if (VG_(is_kerror)(pid))
       return -1;
    if (pid == 0) {
@@ -1300,13 +1337,13 @@
       argv[1] = "-c";
       argv[2] = cmd;
       argv[3] = 0;
-      (void)vg_do_syscall3(__NR_execve, 
-                           (UInt)"/bin/sh", (UInt)argv, (UInt)&environ);
+      (void)VG_(do_syscall)(__NR_execve, 
+			    (UInt)"/bin/sh", (UInt)argv, (UInt)&environ);
       /* If we're still alive here, execve failed. */
       return -1;
    } else {
       /* parent */
-      res = vg_do_syscall3(__NR_waitpid, pid, (UInt)NULL, 0);
+      res = VG_(do_syscall)(__NR_waitpid, pid, (UInt)NULL, 0);
       if (VG_(is_kerror)(res)) {
          return -1;
       } else {
@@ -1357,8 +1394,8 @@
    vg_assert(rdtsc_calibration_state == 0);
    rdtsc_calibration_state = 1;
    rdtsc_cal_start_raw = do_rdtsc_insn();
-   res = vg_do_syscall2(__NR_gettimeofday, (UInt)&rdtsc_cal_start_timeval, 
-                                           (UInt)NULL);
+   res = VG_(do_syscall)(__NR_gettimeofday, (UInt)&rdtsc_cal_start_timeval, 
+			 (UInt)NULL);
    vg_assert(!VG_(is_kerror)(res));
 }
 
@@ -1396,8 +1433,8 @@
 
    /* Now read both timers, and do the Math. */
    rdtsc_cal_end_raw = do_rdtsc_insn();
-   res = vg_do_syscall2(__NR_gettimeofday, (UInt)&rdtsc_cal_end_timeval, 
-                                           (UInt)NULL);
+   res = VG_(do_syscall)(__NR_gettimeofday, (UInt)&rdtsc_cal_end_timeval, 
+			 (UInt)NULL);
 
    vg_assert(rdtsc_cal_end_raw > rdtsc_cal_start_raw);
    cal_clock_ticks = rdtsc_cal_end_raw - rdtsc_cal_start_raw;
@@ -1749,7 +1786,7 @@
    args[0] = domain;
    args[1] = type;
    args[2] = protocol;
-   res = vg_do_syscall2(__NR_socketcall, SYS_SOCKET, (UInt)&args);
+   res = VG_(do_syscall)(__NR_socketcall, SYS_SOCKET, (UInt)&args);
    if (VG_(is_kerror)(res)) 
       res = -1;
    return res;
@@ -1764,7 +1801,7 @@
    args[0] = sockfd;
    args[1] = (UInt)serv_addr;
    args[2] = addrlen;
-   res = vg_do_syscall2(__NR_socketcall, SYS_CONNECT, (UInt)&args);
+   res = VG_(do_syscall)(__NR_socketcall, SYS_CONNECT, (UInt)&args);
    if (VG_(is_kerror)(res)) 
       res = -1;
    return res;
@@ -1785,7 +1822,7 @@
    args[1] = (UInt)msg;
    args[2] = count;
    args[3] = flags;
-   res = vg_do_syscall2(__NR_socketcall, SYS_SEND, (UInt)&args);
+   res = VG_(do_syscall)(__NR_socketcall, SYS_SEND, (UInt)&args);
    if (VG_(is_kerror)(res)) 
       res = -1;
    return res;
diff --git a/coregrind/vg_proxylwp.c b/coregrind/vg_proxylwp.c
new file mode 100644
index 0000000..e7fa5cb
--- /dev/null
+++ b/coregrind/vg_proxylwp.c
@@ -0,0 +1,1336 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Proxy LWP machinery.                           vg_proxylwp.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an extensible x86 protected-mode
+   emulator for monitoring program execution on x86-Unixes.
+
+   Copyright (C) 2000-2003 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+
+#include "vg_include.h"
+
+/* We need our own copy of VG_(do_syscall)() to handle a special
+   race-condition.  If we've got signals unblocked, and we take a
+   signal in the gap either just before or after the syscall, we may
+   end up not running the syscall at all, or running it more than
+   once.
+
+   The solution is to make the signal handler derive the proxy's
+   precise state by looking to see which eip it is executing at
+   exception time.
+
+   Ranges:
+
+   sys_before ... sys_restarted:
+	Setting up register arguments and running state.  If
+	interrupted, then the syscall should be considered to return
+	ERESTARTSYS.
+
+   sys_restarted:
+	If interrupted and eip==sys_restarted, then either the syscall
+	was about to start running, or it has run, was interrupted and
+	the kernel wants to restart it.  eax still contains the
+	syscall number.  If interrupted, then the syscall return value
+	should be ERESTARTSYS.
+
+   sys_after:
+	If interrupted and eip==sys_after, the syscall either just
+	finished, or it was interrupted and the kernel doesn't want to
+	restart it.  Either way, eax equals the correct return value
+	(either the actual return value, or EINTR).
+
+   sys_after ... sys_done:
+	System call is complete, but the state hasn't been updated,
+	nor has the result been written back.  eax contains the return
+	value.
+*/
+
+enum PXState
+{
+   PXS_BAD = -1,
+   PXS_WaitReq,		/* waiting for a request */
+   PXS_RunSyscall,	/* running a syscall */
+   PXS_IntReply,	/* request interrupted - need to send reply */
+   PXS_SysDone,		/* small window between syscall
+			   complete and results written out */
+   PXS_SigACK,		/* waiting for a signal ACK */
+};
+
+enum RequestType {
+   PX_BAD = -1,
+   PX_SetSigmask,		/* sched->proxy; proxy->sched */
+   PX_RunSyscall,		/* sched->proxy; proxy->sched */
+   PX_Signal,			/* proxy->sched */
+   PX_SigACK,			/* sched->proxy */
+   PX_Ping,			/* use for sanity-checking */
+   PX_Exiting,			/* reply sent by proxy for exit sync */
+};
+
+extern void do_thread_syscall(Int sys, 
+			      Int arg1, Int arg2, Int arg3, Int arg4, Int arg5, Int arg6,
+			      Int *result, enum PXState *statep, enum PXState poststate);
+
+asm(
+".text\n"
+"	.type do_thread_syscall,@function\n"
+
+"do_thread_syscall:\n"
+"	push	%esi\n"
+"	push	%edi\n"
+"	push	%ebx\n"
+"	push	%ebp\n"
+".sys_before:\n"
+"	movl	16+ 4(%esp),%eax\n" /* syscall */
+"	movl	16+ 8(%esp),%ebx\n" /* arg1 */
+"	movl	16+12(%esp),%ecx\n" /* arg2 */
+"	movl	16+16(%esp),%edx\n" /* arg3 */
+"	movl	16+20(%esp),%esi\n" /* arg4 */
+"	movl	16+24(%esp),%edi\n" /* arg5 */
+"	movl	16+28(%esp),%ebp\n" /* arg6 */
+".sys_restarted:\n"
+"	int	$0x80\n"
+".sys_after:\n"
+"	movl	16+32(%esp),%ebx\n"	/* ebx = Int *res */
+"	movl	%eax, (%ebx)\n"		/* write the syscall retval */
+
+"	movl	16+36(%esp),%ebx\n"	/* ebx = enum PXState * */
+"	testl	%ebx, %ebx\n"
+"	jz	1f\n"
+
+"	movl	16+40(%esp),%ecx\n"	/* write the post state (must be after retval write) */
+"	movl	%ecx,(%ebx)\n"
+
+".sys_done:\n"				/* OK, all clear from here */
+"1:	popl	%ebp\n"
+"	popl	%ebx\n"
+"	popl	%edi\n"
+"	popl	%esi\n"
+"	ret\n"
+"	.size do_thread_syscall,.-do_thread_syscall\n"
+".previous\n"
+
+".section .rodata\n"
+"sys_before:	.long	.sys_before\n"
+"sys_restarted:	.long	.sys_restarted\n"
+"sys_after:	.long	.sys_after\n"
+"sys_done:	.long	.sys_done\n"
+".previous\n"
+);
+extern const Addr sys_before, sys_restarted, sys_after, sys_done;
+
+/* Run a syscall for a particular thread, getting the arguments from
+   the thread's registers, and returning the result in the thread's
+   eax.
+
+   Assumes that the only thread state which matters is the contents of
+   %eax-%ebp and the return value in %eax.
+ */
+static void thread_syscall(Int syscallno, ThreadState *tst, 
+			   enum PXState *state , enum PXState poststate)
+{
+   do_thread_syscall(syscallno,   /* syscall no. */
+		     tst->m_ebx,  /* arg 1 */
+		     tst->m_ecx,  /* arg 2 */
+		     tst->m_edx,  /* arg 3 */
+		     tst->m_esi,  /* arg 4 */
+		     tst->m_edi,  /* arg 5 */
+		     tst->m_ebp,  /* arg 6 */
+		     &tst->m_eax, /* result */
+		     state,	  /* state to update */
+		     poststate);  /* state when syscall has finished */
+}
+
+#define VG_PROXY_MAGIC	0xef83b192
+struct ProxyLWP {
+   UInt			magic;		/* magic number */
+   ThreadId		tid;		/* scheduler's tid */
+   ThreadState		*tst;		/* thread state */
+   Int			lwp;		/* kernel's ID for LWP */
+   Int			exitcode;	/* ProxyLWP exit code */
+
+   Int			topx, frommain;	/* pipe fds */
+   vki_ksiginfo_t	siginfo;	/* received signal */
+   Bool			terminating;	/* in the middle of exiting */
+
+   /* State of proxy */
+   enum PXState		state;
+
+   jmp_buf		jumpbuf;
+};
+
+static void sys_wait_results(Bool block, ThreadId tid, enum RequestType reqtype);
+
+struct PX_Request {
+   enum RequestType	request;
+
+   vki_ksigset_t	sigmask;	/* sigmask applied by SigACK */
+};
+
+/* All replies are multiplexed over a single pipe, so we need to disinguish them */
+struct PX_Reply {
+   ThreadId		tid;		/* tid this reply pertains to */
+   enum RequestType	req;		/* what this relates to */
+
+   union {
+      Int		syscallno;	/* system call completed */
+      vki_ksiginfo_t	siginfo;	/* signal */
+   };
+};
+
+/* results pipe */
+static Int result_send = -1, result_recv = -1;
+
+/* reentrant printf for proxy use */
+#if 0
+static void px_printf(const Char *fmt, ...)
+{
+   Char buf[1024];
+   Char *cp = buf;
+   va_list vargs;
+
+   void addbuf(Char c) { *cp++ = c; }
+
+   cp += VG_(sprintf)(buf, "[%d, %d]: ", VG_(getpid)(), VG_(gettid)());
+
+   va_start(vargs,fmt);
+   VG_(vprintf)(addbuf, fmt, vargs);
+   va_end(vargs);
+   VG_(send_bytes_to_logging_sink)(buf, cp-buf);
+}
+#else
+static void px_printf(const Char *fmt, ...)
+{
+}
+#endif
+
+static const Char *pxs_name(enum PXState s)
+{
+   switch(s) {
+#define S(x)	case PXS_##x: return #x
+      S(BAD);
+      S(WaitReq);
+      S(RunSyscall);
+      S(IntReply);
+      S(SysDone);
+      S(SigACK);
+#undef S
+   default: return "???";
+   }
+}
+
+static const Char *px_name(enum RequestType r)
+{
+   switch(r) {
+#define S(x)	case PX_##x: return #x
+      S(BAD);
+      S(SetSigmask);
+      S(RunSyscall);
+      S(Signal);
+      S(SigACK);
+      S(Ping);
+      S(Exiting);
+#undef S
+   default: return "???";
+   }
+}
+
+#define PROXYLWP_OFFSET	(VKI_BYTES_PER_PAGE - sizeof(ProxyLWP))
+#define ROUNDDN(p)	((UChar *)((UInt)(p) & ~(VKI_BYTES_PER_PAGE-1)))
+
+/* 
+   Allocate a page for the ProxyLWP and its stack.
+
+   This uses the trick for finding the LWP's private data by knowing
+   that the stack is a single page, and that the ProxyLWP structure is
+   at the end of it.  Therefore, given any %esp in the stack, you can
+   find the ProxyLWP structure (see LWP_TSD()).
+ */
+static ProxyLWP *LWP_alloc(void)
+{
+   UChar *p = VG_(get_memory_from_mmap)(VKI_BYTES_PER_PAGE, "alloc_LWP");
+   ProxyLWP *ret;
+   vg_assert(p == ROUNDDN(p)); /* px must be page aligned */
+
+   ret = (ProxyLWP *)(p + PROXYLWP_OFFSET);
+
+   ret->magic = VG_PROXY_MAGIC;
+
+   return ret;
+}
+
+/* Free a thread structure */
+static void LWP_free(ProxyLWP *px)
+{
+   UChar *p = ROUNDDN(px);
+   
+   vg_assert(px->magic == VG_PROXY_MAGIC);
+   px->magic = 0;
+   vg_assert((p + PROXYLWP_OFFSET) == (UChar *)px);
+
+   VG_(munmap)(p, VKI_BYTES_PER_PAGE);
+}
+
+/* Get a particular ProxyLWP's LWP structure from its esp (relies on
+   stacks being page aligned, with the ProxyLWP structure at the
+   end). */
+static inline ProxyLWP *LWP_TSD(void *esp)
+{
+   UChar *p = ROUNDDN(esp);
+   ProxyLWP *ret;
+
+   ret = (ProxyLWP *)(p + PROXYLWP_OFFSET);
+   vg_assert(ret->magic == VG_PROXY_MAGIC);
+
+   return ret;
+}
+
+/* Get top of stack */
+static inline void *LWP_stack(ProxyLWP *px)
+{
+   vg_assert(px->magic == VG_PROXY_MAGIC);
+
+   return (void *)(((void **)px) - 1);
+}
+
+static void proxy_fork_cleanup(ThreadId tid);
+
+/* Init the proxy mechanism */
+void VG_(proxy_init)(void)
+{
+   Int p[2];
+   Int res;
+
+   /* this will ignore any duplicate registrations */
+   VG_(atfork)(NULL, NULL, proxy_fork_cleanup);
+
+   vg_assert(result_recv == -1);
+   vg_assert(result_send == -1);
+
+   res = VG_(pipe)(p);
+   vg_assert(res == 0);
+
+   result_recv = VG_(safe_fd)(p[0]);
+   result_send = VG_(safe_fd)(p[1]);
+   
+   /* Make reading end non-blocking */
+   VG_(fcntl)(result_recv, VKI_F_SETFL, VKI_O_NONBLOCK);
+}
+
+/* After fork, the forking thread is in a strange state of having a
+   couple of pipes still linked to the parent. */
+static void proxy_fork_cleanup(ThreadId tid)
+{
+   ThreadId t;
+
+   VG_(close)(result_recv);
+   VG_(close)(result_send);
+
+   result_recv = result_send = -1;
+
+   VG_(proxy_init)();
+
+   for(t = 1; t < VG_N_THREADS; t++) {
+      ThreadState *tst = VG_(get_ThreadState)(t);
+      ProxyLWP *proxy = tst->proxy;
+
+      if (tst->status == VgTs_Empty) {
+	 vg_assert(proxy == NULL);
+	 continue;
+      }
+
+      vg_assert(proxy != NULL);
+
+      /* We need to do a manual teardown, since the proxy this structure
+	 describes is our parent's */
+      VG_(close)(proxy->topx);
+      VG_(close)(proxy->frommain);
+   
+      LWP_free(proxy);
+      tst->proxy = NULL;
+   }
+
+   /* Create a proxy for calling thread
+      
+      We need to temporarily set the state back to Runnable for
+      proxy_create's benefit.
+    */
+
+   {
+      ThreadState *tst = VG_(get_ThreadState)(tid);
+
+      vg_assert(tst->proxy == NULL);
+      vg_assert(tst->status == VgTs_WaitSys);
+      tst->status = VgTs_Runnable;
+      VG_(proxy_create)(tid);
+      VG_(proxy_setsigmask)(tid);
+      tst->status = VgTs_WaitSys;
+   }
+}
+
+Int VG_(proxy_resfd)(void)
+{
+   return result_recv;
+}
+
+void VG_(proxy_shutdown)(void)
+{
+   VG_(close)(result_recv);
+   VG_(close)(result_send);
+
+   result_recv = result_send = -1;
+}
+
+/* This is called from within a proxy LWP signal handler.  This
+   function records the siginfo, then longjmps back into the proxy
+   main state machine loop.  The presumption is that the signal
+   handler is being run with all signals blocked; the longjmp is
+   there to make sure they stay masked until the application thread is
+   ready to run its signal handler. */
+void VG_(proxy_handlesig)(const vki_ksiginfo_t *siginfo, 
+			  const struct vki_sigcontext *sigcontext)
+{
+   UChar local;
+   ProxyLWP *px = LWP_TSD(&local);
+   Addr eip = sigcontext->eip;
+   Int eax = sigcontext->eax;
+
+   vg_assert(siginfo->si_signo != 0);
+   if (px->siginfo.si_signo != 0) {
+      px_printf("proxy_handlesig: tid %d already has %d pending, new sig %d\n",
+		px->lwp, px->siginfo.si_signo, siginfo->si_signo);
+   }
+   vg_assert(px->siginfo.si_signo == 0);
+
+   px->siginfo = *siginfo;
+
+   px_printf("proxy got signal %d\n", siginfo->si_signo);
+
+   /* First look to see if the EIP is within our interesting ranges
+      near a syscall to work out what should happen. */
+   if (sys_before <= eip && eip <= sys_restarted) {
+      /* We are before the syscall actually ran, or it did run and
+	 wants to be restarted.  Either way, set the return code to
+	 indicate a restart.  This is not really any different from
+	 anywhere else, except that we can make some assertions about
+	 the proxy and machine state here. */
+      vg_assert(px->state == PXS_RunSyscall);
+      vg_assert(px->tst->m_eax == -VKI_ERESTARTSYS);
+   } else if (sys_after <= eip && eip <= sys_done) {
+      /* We're after the syscall.  Either it was interrupted by the
+	 signal, or the syscall completed normally.  In either case
+	 eax contains the correct syscall return value, and the new
+	 state is effectively PXS_SysDone. */
+      vg_assert(px->state == PXS_RunSyscall || px->state == PXS_SysDone);
+      px->state = PXS_SysDone;
+      px->tst->m_eax = eax;
+   }
+   px_printf("  signalled in state %s\n", pxs_name(px->state));
+
+   __builtin_longjmp(px->jumpbuf, 1);
+}
+
+static Bool send_reply(const struct PX_Reply *reply)
+{
+   const Int size = sizeof(struct PX_Reply);
+
+   return VG_(write)(result_send, reply, size) == size;
+}
+
+static Bool recv_reply(struct PX_Reply *reply)
+{
+   const Int size = sizeof(struct PX_Reply);
+
+   return VG_(read)(result_recv, reply, size) == size;
+}
+
+/* Proxy LWP thread.  This is run as a separate cloned() thread, so it
+   MUST NOT touch any core Valgrind data structures directly: the only
+   exception is while we're running a PX_RunSyscall command, we may
+   look at and update the thread's register state.  It interacts with
+   the rest of Valgrind by receiving messages through its pipe and
+   sending results through result_send. */
+static Int proxylwp(void *v)
+{
+   ProxyLWP *px = (ProxyLWP *)v;
+   Int frommain = px->frommain;
+   ThreadState *tst = px->tst;
+   vki_ksigset_t allsig;
+   vki_ksigset_t appsigmask;	/* signal mask the client has asked for */
+   Int ret = 0;
+   static const vki_kstack_t ss = { .ss_flags = VKI_SS_DISABLE };
+
+   /* Block everything until we're told otherwise (LWP should have
+      been started with all signals blocked anyway) */
+   VG_(ksigfillset)(&allsig);
+   VG_(ksigdelset)(&allsig, VKI_SIGVGKILL);	/* but allow SIGVGKILL to interrupt */
+
+   VG_(ksigprocmask)(VKI_SIG_SETMASK, &allsig, NULL);
+
+   appsigmask = allsig;
+
+   /* no signal stack for us */
+   VG_(ksigaltstack)(&ss, NULL);
+
+   for(;;) {
+      struct PX_Reply reply, sigreply;
+      struct PX_Request req;
+      Int res;
+
+      if (__builtin_setjmp(px->jumpbuf)) {
+	 /* We were hit by a signal.  This is the signal-driven part
+	    of the state machine. 
+
+	    This code prepares a reply which is suitable for whatever
+	    was interrupted by this signal.  If "no reply" is the
+	    right response, then it sets reply.req = PX_BAD.
+
+	    NOTE: the ST:N notation represents the correspondence
+	    between states where we can be interrupted in the main
+	    state machine loop, and where those states are handled
+	    here.
+	 */
+
+	 if (px->siginfo.si_signo != VKI_SIGVGKILL) {
+	    /* First, send the signal info */
+	    sigreply.tid = px->tid;
+	    sigreply.req = PX_Signal;
+	    sigreply.siginfo = px->siginfo;
+
+	    if (!send_reply(&sigreply)) {
+	       ret = 44;		/* incomplete or failed write */
+	       goto out;
+	    }
+	 } else {
+	    /* We got VKI_SIGVGKILL, which means we just skip all the
+	       below and get back to the state machine - probably to
+	       exit. */
+	    px->state = PXS_WaitReq;
+	    px->siginfo.si_signo = 0;
+	    goto state_machine;
+	 }
+
+	 px->siginfo.si_signo = 0;
+
+	 /* Now work out what our new state is, and what to do on the way. */
+	 switch(px->state) {
+	 case PXS_WaitReq:
+	    /* We were interrupted while waiting for a request.  See
+	       if we had actually read the request, and do the
+	       appropriate thing if so. */
+	    reply.req = req.request;
+	    reply.tid = px->tid;
+
+	    switch(req.request) {
+	    case PX_BAD:
+	       /* ST:1 */
+	       /* nothing read; just wait for SigACK */
+	       px->state = PXS_SigACK;
+	       break;
+
+	    case PX_RunSyscall:
+	       /* ST:2 */
+	       /* They asked for a syscall, but we were signalled
+		  before even getting started.  Claim the syscall was
+		  interrupted.
+
+		  XXX how to distunguish between restartable and
+		  non-restartable syscalls?  Does it matter?
+	       */
+	       reply.syscallno = tst->m_eax;
+
+	       tst->m_eax = -VKI_ERESTARTSYS;
+	       px->state = PXS_IntReply;
+	       break;
+
+	    case PX_SetSigmask:
+	       /* ST:2 */
+	       /* ST:3 */
+	       /* They asked for a signal mask update. Ignore it,
+		  because they're going to give us a new mask when
+		  they send a SigACK, and we want all signals blocked
+		  in the meantime.  However, we set the state to
+		  PXS_IntReply to make sure the reply from the
+		  PX_SetSigmask is sent. */
+	       vg_assert(reply.req == PX_SetSigmask);
+	       px->state = PXS_IntReply;
+	       break;
+
+	    case PX_Ping:
+	       /* ST:2 */
+	       /* We read a Ping request, so we need to send a Ping
+		  reply. */
+	       vg_assert(reply.req == PX_Ping);
+	       px->state = PXS_IntReply;
+	       break;
+
+	    case PX_Exiting:
+	    case PX_Signal:
+	       ret = 10;	/* completely bogus - noone should send us a signal */
+	       goto out;
+
+	    case PX_SigACK:
+	       ret = 11;	/* Also bogus.  No way we should get a
+				   signal while waiting for a
+				   SigACK. */
+	       goto out;
+	    }
+	    break;
+
+	 case PXS_RunSyscall:
+	    /* ST:4 */
+	    /* We were actually running the syscall when interrupted.
+	       reply should already be set up, including return in eax. */
+	    vg_assert(reply.req == PX_RunSyscall);
+	    vg_assert(reply.syscallno == tst->syscallno);
+	    vg_assert(tst->status == VgTs_WaitSys);
+	    px->state = PXS_IntReply;
+	    break;
+
+	 case PXS_SysDone:
+	    /* The syscall is done; we just need to send the results
+	       back. */
+	    vg_assert(reply.req == PX_RunSyscall);
+	    vg_assert(reply.syscallno == tst->syscallno);
+	    px->state = PXS_IntReply;
+	    break;
+
+	 case PXS_IntReply:
+	 case PXS_SigACK:
+	    ret = 13;		/* Bogus.  Same as ret=11 above. */
+	    goto out;
+
+	 case PXS_BAD:
+	    ret = 33;
+	    goto out;
+	 }
+
+	 /* End of signal handling states.  If the scheduler LWP is
+	    currently running application code, tell it to drop back
+	    into the scheduler loop ASAP to handle the signal. */
+	 if (VG_(clo_lowlat_signals))
+	    VG_(need_resched)(px->tid);
+      }
+
+     state_machine:
+      px_printf("proxylwp main: state %s\n", pxs_name(px->state));
+
+      switch(px->state) {
+      case PXS_WaitReq:
+      case PXS_SigACK:
+	 req.request = PX_BAD;	/* init request so we know if the read() read anything */
+
+	 if (px->state == PXS_WaitReq) {
+	    /* allow signals when waiting for a normal request */
+	    VG_(ksigprocmask)(VKI_SIG_SETMASK, &appsigmask, NULL);
+	 }
+
+	 /* ST:1 */
+
+	 res = VG_(read)(frommain, &req, sizeof(req));
+
+	 /* ST:2 */
+
+	 /* process message with signals blocked */
+	 VG_(ksigprocmask)(VKI_SIG_SETMASK, &allsig, NULL);
+
+	 if (res == 0)
+	    goto out;		/* EOF - we're quitting */
+	 
+	 if (res < 0) {
+	    px_printf("read(frommain) failed %d\n", res);
+	    ret = 1;		/* error */
+	    goto out;
+	 }
+	 if (res != sizeof(req)) {
+	    ret = 2;		/* error - partial read */
+	    goto out;
+	 }
+
+	 px_printf("read req: %s\n", px_name(req.request));
+
+	 reply.tid = px->tid;
+	 reply.req = req.request;
+
+	 switch(req.request) {
+	 case PX_Ping:
+	    /* do nothing; just send reply */
+	    break;
+
+	 case PX_SigACK:
+	    /* The thread ACKed the signal, and sent the mask they
+	       want while running the handler. */
+	    vg_assert(px->state == PXS_SigACK);
+	    appsigmask = req.sigmask;
+	    VG_(ksigdelset)(&appsigmask, VKI_SIGVGKILL);  /* but allow SIGVGKILL to interrupt */
+	    px->state = PXS_WaitReq;
+	    reply.req = PX_BAD;	/* don't reply */
+	    break;
+	    
+	 case PX_SetSigmask:
+	    appsigmask = req.sigmask;
+	    VG_(ksigdelset)(&appsigmask, VKI_SIGVGKILL);   /* but allow SIGVGKILL to interrupt */
+
+	    vg_assert(px->state == PXS_WaitReq || 
+		      px->state == PXS_SigACK);
+
+	    if (px->state != PXS_SigACK) {
+	       /* If we're not waiting for a PX_SigACK, set the apps mask
+		  to get at least one of the pending signals, which will
+		  be delivered synchronously, so that some progress is
+		  made before the we tell the client the mask has been
+		  set..  Then reset the mask back to all blocked. */
+	       VG_(ksigprocmask)(VKI_SIG_SETMASK, &appsigmask, NULL);
+	       /* ST:3 */
+	       VG_(ksigprocmask)(VKI_SIG_SETMASK, &allsig, NULL);
+	    } else {
+	       /* Waiting for SigACK.  We want all signals blocked,
+		  and when the SigACK arrives, it will give us the
+		  thread's signal mask for its handler. */
+	    }
+	    break;
+
+	 case PX_RunSyscall:
+	    /* Run a syscall for our thread; results will be poked
+	       back into tst */
+	    reply.syscallno = tst->m_eax;
+
+	    vg_assert(px->state == PXS_WaitReq || 
+		      px->state == PXS_SigACK);
+	    if (px->state == PXS_SigACK) {
+	       /* If we're in the middle of signal handling, make the
+		  client's syscalls fail with ERESTARTSYS until its signal
+		  handler runs - there should be at most one, if it was
+		  on the way to us as we got the signal.  
+	       */
+	       px_printf("RunSyscall in SigACK: rejecting syscall %d with ERESTARTSYS\n",
+			 reply.syscallno);
+	       tst->m_eax = -VKI_ERESTARTSYS;
+	    } else {
+	       Int syscallno = tst->m_eax;
+	       
+	       px->state = PXS_RunSyscall;
+	       /* If we're interrupted before we get to the syscall
+		  itself, we want the syscall restarted. */
+	       tst->m_eax = -VKI_ERESTARTSYS;
+
+	       /* set our process group ID to match parent */
+	       if (VG_(getpgrp)() != VG_(main_pgrp))
+		  VG_(setpgid)(0, VG_(main_pgrp));
+
+	       VG_(ksigprocmask)(VKI_SIG_SETMASK, &appsigmask, NULL);
+
+	       /* ST:4 */
+	       
+	       thread_syscall(syscallno, tst, &px->state, PXS_SysDone);
+
+	       /* ST:5 */
+
+	       VG_(ksigprocmask)(VKI_SIG_SETMASK, &allsig, NULL);
+	       /* whew - made it here without being interrupted */
+	       px->state = PXS_WaitReq;
+
+	       if (VG_(clo_lowlat_syscalls))
+		  VG_(need_resched)(px->tid);
+	    }
+	    break;
+	    
+	 case PX_BAD:
+	 case PX_Signal:
+	 case PX_Exiting:
+	    /* we never expect to see these */
+	    ret = 3;
+	    goto out;
+	 }
+	 break;
+
+      case PXS_IntReply:
+	 /* This state only exists so that we fall out and write the
+	    interrupted syscall reply before moving to SigACK */
+	 px->state = PXS_SigACK;
+	 break;
+
+      case PXS_RunSyscall:
+      case PXS_SysDone:
+      case PXS_BAD:
+      default:
+	 /* Never expect to see these states here */
+	 ret = 5;
+	 goto out;
+      }
+
+      /* If we have something sensible to say, say it */
+      if (reply.req != PX_BAD) {
+	 px_printf("sending reply %s\n", px_name(reply.req));
+
+	 if (!send_reply(&reply)) {
+	    ret = 4;		/* error - didn't write full message */
+	    goto out;
+	 }
+	 reply.req = PX_BAD;
+      }
+   }
+      
+  out:
+   px_printf("proxy exiting with ret=%d\n", ret);
+
+   {
+      struct PX_Reply reply;
+      reply.req = PX_Exiting;
+      reply.tid = px->tid;
+      px_printf("exit: sending %s\n", px_name(reply.req));
+      
+      send_reply(&reply);
+   }
+
+   px->frommain = -1;
+   VG_(close)(frommain);
+
+   px->exitcode = ret;
+   return ret;
+}
+
+/* Send a signal to a proxy LWP */
+void VG_(proxy_sendsig)(ThreadId tid, Int sig)
+{
+   ThreadState *tst = VG_(get_ThreadState)(tid);
+   ProxyLWP *proxy = tst->proxy;
+   Int lwp;
+
+   if (proxy == NULL)
+      return;
+
+   lwp = proxy->lwp;		/* proxy->lwp may change async */
+
+   if (lwp != 0) {
+      /* SIGKILL and SIGSTOP always apply to all threads (need to
+	 route for route_signals case?) */
+      if (sig == VKI_SIGKILL || sig == VKI_SIGSTOP)
+	 VG_(kkill)(VG_(main_pid), sig);
+      else
+	 VG_(ktkill)(lwp, sig);
+   }
+
+   /* If a thread is sending a signal to itself and the signal isn't
+      blocked (ie, it will be delivered), wait until the signal
+      message gets sent back, thus making the signal synchronous. */
+   if (sig != 0 && 
+       !VG_(is_sig_ign)(sig) &&
+       tid == VG_(get_current_or_recent_tid)() && 
+       !VG_(ksigismember)(&tst->eff_sig_mask, sig)) {
+      /* If the LWP is actually blocked in a sigtimedwait, then it
+	 will eat the signal rather than make it pending and deliver
+	 it by the normal mechanism.  In this case, just wait for the
+	 syscall to dinish. */
+      if (tst->status == VgTs_WaitSys && tst->syscallno == __NR_rt_sigtimedwait)
+	 sys_wait_results(True, tid, PX_RunSyscall);
+      else
+	 sys_wait_results(True, tid, PX_Signal);
+   }
+}
+
+/* If a thread is blocked in a syscall, this function will interrupt
+   the proxy LWP's syscall by hitting it with a VKI_SIGVGINT signal.
+   This signal will not be reported to the client application. */
+void VG_(proxy_abort_syscall)(ThreadId tid)
+{
+   ThreadState *tst = VG_(get_ThreadState)(tid);
+   ProxyLWP *proxy = tst->proxy;
+   Int lwp;
+
+   if (tst->status != VgTs_WaitSys)
+      return;
+
+   vg_assert(proxy != NULL);
+
+   lwp = proxy->lwp;
+   
+   if (lwp != 0)
+      VG_(ktkill)(lwp, VKI_SIGVGINT);
+
+   sys_wait_results(True, tid, PX_RunSyscall);
+
+   vg_assert(tst->status == VgTs_Runnable);
+}
+
+static Int do_futex(void *addr, Int op, Int val, struct vki_timespec *time, void *addr2)
+{
+   return VG_(do_syscall)(__NR_futex, addr, op, val, time, addr2);
+}
+
+#define VKI_FUTEX_WAIT		0
+#define VKI_FUTEX_WAKE		1
+#define VKI_FUTEX_FD		2
+#define VKI_FUTEX_REQUEUE	3
+
+static Int have_futex = -1;	/* -1 -> unknown */
+
+/*
+  Create a proxy LWP using whatever varient of clone makes the most
+   sense for the current kernel.  We use futexes for termination
+   notification wherever possible.  Returns 0 on success, or a -ve
+   error code on failure.
+*/
+static Int proxy_clone(ProxyLWP *proxy)
+{
+   Int ret;
+
+   if (have_futex == -1)
+      have_futex = do_futex(NULL, VKI_FUTEX_WAKE, 0, NULL, NULL) != -VKI_ENOSYS;
+
+   if (have_futex) {
+      ret = VG_(clone)(proxylwp, 
+		       LWP_stack(proxy),
+		       VKI_CLONE_FS | VKI_CLONE_FILES | VKI_CLONE_VM |
+		       VKI_CLONE_SIGHAND | VKI_CLONE_THREAD | 
+		       VKI_CLONE_PARENT_SETTID |
+		       VKI_CLONE_CHILD_CLEARTID | VKI_CLONE_DETACHED, 
+		       proxy, &proxy->lwp, &proxy->lwp);
+   } else {
+      VG_(do_signal_routing) = True; /* XXX True, it seems kernels
+					which have futex also have
+					sensible signal handling, but
+					it would be nice to test it
+					directly. */
+
+      ret = VG_(clone)(proxylwp, 
+		       LWP_stack(proxy),
+		       VKI_CLONE_FS | VKI_CLONE_FILES | VKI_CLONE_VM |
+		       VKI_CLONE_SIGHAND | VKI_CLONE_THREAD, 
+		       proxy, NULL, NULL);
+      proxy->lwp = ret;
+   }
+
+   return (ret < 0) ? ret : 0;
+}
+
+/* Wait on a proxy LWP.  Returns True if the LWP has exited. */
+static Bool proxy_wait(ProxyLWP *proxy, Bool block, Int *status)
+{
+   Bool ret = False;
+
+   if (have_futex == -1)
+      return False;
+
+   if (have_futex) {
+      if (block) {
+	 Int lwp = proxy->lwp;
+
+	 while(proxy->lwp != 0)
+	    do_futex(&proxy->lwp, VKI_FUTEX_WAIT, lwp, NULL, NULL);
+
+	 if (status)
+	    *status = proxy->exitcode;
+	 ret = True;
+      } else {
+	 if (proxy->lwp == 0) {
+	    *status = proxy->exitcode;
+	    ret = True;
+	 }
+      }
+   } else {
+      Int flags = VKI__WCLONE;
+      Int res;
+
+      if (!block)
+	 flags |= VKI_WNOHANG;
+      res = VG_(waitpid)(proxy->lwp, status, flags);
+      if (res == proxy->lwp) {
+	 vg_assert(*status == proxy->exitcode);
+	 ret = True;
+      }
+   }
+
+   return ret;
+}
+
+/* Create a proxy for a new thread */
+void VG_(proxy_create)(ThreadId tid)
+{
+   ThreadState *tst = VG_(get_ThreadState)(tid);
+   ProxyLWP *proxy;
+   Int p[2];
+   vki_ksigset_t mask;
+   Int ret;
+
+   vg_assert(tst->proxy == NULL);
+   vg_assert(tst->status == VgTs_Runnable);
+
+   proxy = LWP_alloc();
+
+   VG_(pipe)(p);
+
+   proxy->tid = tid;
+   proxy->tst = tst;
+   proxy->siginfo.si_signo = 0;
+   proxy->frommain = VG_(safe_fd)(p[0]);
+   proxy->topx = VG_(safe_fd)(p[1]);
+   proxy->state = PXS_WaitReq;	/* start by waiting for requests */
+   proxy->terminating = False;
+
+   /* Make sure proxy LWP starts with all signals blocked (not even
+      SEGV, BUS, ILL or FPE) */
+   VG_(block_all_host_signals)(&mask);
+
+   ret = proxy_clone(proxy);
+   if (ret < 0) {
+	   VG_(printf)("Error %d trying to create proxy LWP for tid %d\n",
+		       ret, tid);
+	   VG_(core_panic)("Can't start proxy LWPs");
+   }
+
+   VG_(restore_all_host_signals)(&mask);
+
+   tst->proxy = proxy;
+}
+
+/* Clean up proxy after thread dies */
+void VG_(proxy_delete)(ThreadId tid, Bool force)
+{
+   ThreadState *tst = VG_(get_ThreadState)(tid);
+   ProxyLWP *proxy = tst->proxy;
+   Bool res;
+   Int status;
+   Int lwp;
+
+   if (proxy == NULL)
+      return;			/* nothing to do */
+
+   lwp = proxy->lwp;
+
+#if 0
+   MAYBE_PRINTF("VG_(proxy_delete)(tid=%d (lwp=%d), force=%s; tst->status=%d\n",
+		tid, lwp, force ? "true" : "false", tst->status);
+#endif
+   vg_assert(proxy->tid == tid);
+   if (proxy->terminating)
+      return;		/* already going away */
+
+   proxy->terminating = True;
+
+   VG_(close)(proxy->topx);
+   proxy->topx = -1;
+
+   /* proxy thread will close proxy->frommain itself */
+
+   if (force && lwp != 0) {
+      /* wouldn't need to force it if it were already dead */
+      vg_assert(tst->status != VgTs_Empty);
+      //VG_(printf)("kill %d with SIGVGKILL\n", lwp);
+      VG_(ktkill)(lwp, VKI_SIGVGKILL);
+   } else
+      vg_assert(tst->status == VgTs_Empty); /* just killed */
+
+   status = -1;
+   res = False;
+
+   /* We need to wait for the PX_Exiting message before doing the
+      proxy_wait, because if we don't read the results pipe, the proxy
+      may be blocked writing to it, causing a deadlock with us as we
+      wait for it to exit. */
+   sys_wait_results(True, tid, PX_Exiting);
+   res = proxy_wait(proxy, True, &status);
+
+   if (!res || status != 0)
+      VG_(printf)("proxy %d for tid %d exited status %d, res %d\n",
+		  lwp, tid, status, res);
+
+   LWP_free(proxy);
+   tst->proxy = NULL;
+}
+
+/* Read back the results of any completed syscalls.
+
+   At this point, there should be only one pending syscall per thread.
+   Those threads should be in VgTs_WaitSys state.  Each syscall return
+   may have multiple signals associated with it, so we read those and
+   set up some pending signals in our signal simulation.  When we
+   finally get the message saying the syscall is complete, we mark the
+   thread as runnable and return.
+
+   If block is set to True, then this call will block until anything
+   happens (ie, some progress was made).
+
+   If reqtype != PX_BAD, then this will block until some reply for
+   that request type appears (assuming you're expecting that kind of
+   reply, otherwise it will block forever).  If tid != 0, then it will
+   wait for a reply for that particular tid.
+ */
+static void sys_wait_results(Bool block, ThreadId tid, enum RequestType reqtype)
+{
+   Bool found_reply = (reqtype == PX_BAD);
+   struct PX_Reply res;
+   
+   vg_assert(VG_(gettid)() == VG_(main_pid));
+
+   do {
+      if (reqtype != PX_BAD || block) {
+	 /* wait for activity on recv_res */
+	 struct vki_pollfd pollfd;
+	 Int ret;
+
+	 /* result_recv could be -1 if we're asking for results before any
+	    syscalls are issued - which is OK - but we can't block on
+	    it. */
+	 vg_assert(result_recv != -1);
+
+	 pollfd.fd = result_recv;
+	 pollfd.events = VKI_POLLIN;
+
+	 do {
+	    ret = VG_(poll)(&pollfd, 1, -1);
+	 } while(ret == -VKI_EINTR);
+ 
+	 if (ret <= 0) {
+	    VG_(printf)("sys_wait_results: poll failed fd=%d errno=%d\n",
+			pollfd.fd, ret);
+	    return;
+	 }
+      }
+
+      while(recv_reply(&res)) {
+	 ThreadState *tst;
+
+	 if (reqtype != PX_BAD &&
+	     res.req == reqtype &&
+	     (tid == 0 || tid == res.tid))
+	    found_reply = True;
+
+	 tst = VG_(get_ThreadState)(res.tid);
+
+	 switch(res.req) {
+	 case PX_SetSigmask:
+	    /* Don't need to do anything */
+	    if (VG_(clo_trace_signals) || VG_(clo_trace_syscalls))
+	       VG_(message)(Vg_DebugMsg, "sys_wait_results: got PX_SetSigmask for TID %d",
+			    res.tid);
+	    break;
+
+	 case PX_RunSyscall:
+	    if (VG_(clo_trace_syscalls))
+	       VG_(message)(Vg_DebugMsg, "sys_wait_results: got PX_RunSyscall for TID %d: syscall %d result %d",
+			    res.tid, tst->syscallno, tst->m_eax);
+
+	    if (tst->status != VgTs_WaitSys)
+	       VG_(printf)("tid %d in status %d\n",
+			   tst->tid, tst->status);
+	 
+	    vg_assert(res.syscallno == tst->syscallno);
+	    vg_assert(tst->status == VgTs_WaitSys);
+
+	    VG_(post_syscall)(res.tid);
+	    break;
+
+	 case PX_Signal:
+	    if (VG_(clo_trace_signals) || VG_(clo_trace_syscalls))
+	       VG_(message)(Vg_DebugMsg, "sys_wait_results: got PX_Signal for TID %d, signal %d",
+			    res.tid, res.siginfo.si_signo);
+
+	    vg_assert(res.siginfo.si_signo != 0);
+	    if (VG_(threads)[res.tid].proxy && 
+		!VG_(threads)[res.tid].proxy->terminating)
+	       VG_(deliver_signal)(res.tid, &res.siginfo, True);
+	    break;
+
+	 case PX_Ping:
+	    /* Got a ping response. Great. */
+	    break;
+
+	 case PX_Exiting:
+	    /* They're exiting.  Hooray! */
+	    break;
+
+	 case PX_BAD:
+	 case PX_SigACK:
+	 default:
+	    VG_(core_panic)("sys_wait_results: got PX_BAD/PX_SigACK!\n");
+	 }
+      }
+   } while(!found_reply);
+}
+
+/* External version */
+void VG_(proxy_results)(void)
+{
+   sys_wait_results(False, 0, PX_BAD);
+}
+
+/* Tell proxy about it's thread's updated signal mask */
+void VG_(proxy_setsigmask)(ThreadId tid)
+{
+   ThreadState *tst = VG_(get_ThreadState)(tid);
+   ProxyLWP *proxy = tst->proxy;
+   Int res;
+   struct PX_Request req;
+
+   vg_assert(proxy != NULL);
+   vg_assert(proxy->tid == tid);
+
+   req.request = PX_SetSigmask;
+   req.sigmask = tst->sig_mask;
+
+   tst->eff_sig_mask = tst->sig_mask;
+
+   /* clear the results pipe before we try to write to a proxy to
+      prevent a deadlock */
+   VG_(proxy_results)();
+   res = VG_(write)(proxy->topx, &req, sizeof(req));
+   vg_assert(res == sizeof(req));
+
+   /* wait for proxy to ack mask update; mask changes don't really
+      have to be synchronous, but they do have to be fully ordered
+      with respect to each other (ie, if thread A then thread B
+      updates their signal masks, A's update must be done before B's
+      is).  */
+   sys_wait_results(True, tid, PX_SetSigmask);
+}
+
+void VG_(proxy_sigack)(ThreadId tid, const vki_ksigset_t *mask)
+{
+   ThreadState *tst = VG_(get_ThreadState)(tid);
+   ProxyLWP *proxy = tst->proxy;
+   Int res;
+   struct PX_Request req;
+
+   vg_assert(proxy != NULL);
+   vg_assert(proxy->tid == tid);
+   
+   if (proxy_wait(proxy, False, NULL))
+      return;
+
+   req.request = PX_SigACK;
+   req.sigmask = *mask;
+
+   tst->eff_sig_mask = *mask;
+
+#if 0
+   /* Clear the results pipe before we try to write to a proxy to
+      prevent a deadlock.
+
+      XXX this breaks things.  This is called as a result of a
+      PX_Signal message, and is called from within sys_wait_results.
+      If that sys_wait_results was blocking of a particular message,
+      it will never wake up if we eat those messages by calling
+      sys_wait_results ourselves from here.  Maybe make
+      sys_wait_results non-recursive?
+   */
+   VG_(proxy_results)();
+#endif
+
+   res = VG_(write)(proxy->topx, &req, sizeof(req));
+   vg_assert(res == sizeof(req));
+}
+
+/* Wait for a signal to be delivered to any thread */
+void VG_(proxy_waitsig)(void)
+{
+   if (VG_(do_signal_routing))
+      VG_(route_signals)();
+   else
+      sys_wait_results(True, VG_INVALID_THREADID /* any */, PX_Signal);
+}
+
+/* Issue a syscall to the thread's ProxyLWP */
+Int VG_(sys_issue)(int tid)
+{
+   ThreadState *tst = VG_(get_ThreadState)(tid);
+   ProxyLWP *proxy = tst->proxy;
+   Int res;
+   struct PX_Request req;
+
+   vg_assert(proxy != NULL);
+   vg_assert(proxy->tid == tid);
+
+   req.request = PX_RunSyscall;
+   
+   /* clear the results pipe before we try to write to a proxy to
+      prevent a deadlock */
+   VG_(proxy_results)();
+   res = VG_(write)(proxy->topx, &req, sizeof(req));
+
+   if (res != sizeof(req)) {
+      VG_(printf)("sys_issue: write to tid %d failed %d (not %d)\n",
+		  tid, res, sizeof(req));
+   }
+   return 0;
+}
+
+/* Relatively expensive sanity tests for the syscall machinery */
+void VG_(proxy_sanity)(void)
+{
+   Int tid;
+   Bool sane = True;
+   static const struct PX_Request req = { .request = PX_Ping };
+
+   for(tid = 0; tid < VG_N_THREADS; tid++) {
+      ThreadState *tst = &VG_(threads)[tid];
+      ProxyLWP *px;
+      Int status;
+      Int ret;
+
+      if (tst->status == VgTs_Empty)
+	 continue;
+
+      if (tst->proxy == NULL) {
+	 VG_(printf)("TID %d: NULL proxy");
+	 sane = False;
+	 continue;
+      }
+
+      px = tst->proxy;
+
+      if (px->tid != tid) {
+	 VG_(printf)("TID %d: proxy LWP %d doesn't have right tid (%d)\n",
+		     tid, px->lwp, px->tid);
+	 sane = False;
+      }
+
+      if (proxy_wait(px, False, &status)) {
+	 VG_(printf)("TID %d: proxy LWP %d exited with status %d\n",
+		     tid, px->lwp, status);
+	 sane = False;
+	 continue;
+      }
+
+      /* No point checking if proxy is busy in a syscall, but all
+	 other times it should respond promptly. */
+      if (tst->status != VgTs_WaitSys) {
+	 ret = VG_(write)(px->topx, &req, sizeof(req));
+	 if (ret != sizeof(req)) {
+	    VG_(printf)("TID %d: failed to write PX_Ping to lwp %d: %d\n",
+			tid, px->lwp, ret);
+	    sane = False;
+	 }
+	 sys_wait_results(True, tid, PX_Ping);
+	 /* Can't make an assertion here, fortunately; this will
+	    either come back or it won't. */
+      }
+   }
+
+   vg_assert(sane);
+}
+
+/*--------------------------------------------------------------------*/
+/*--- Proxy LWP machinery.                           vg_proxylwp.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_scheduler.c b/coregrind/vg_scheduler.c
index cc81d96..9c029a7 100644
--- a/coregrind/vg_scheduler.c
+++ b/coregrind/vg_scheduler.c
@@ -47,34 +47,21 @@
      ThreadStatus.retval
   Currently unsure, and so am not doing so.
 
-- Signals interrupting read/write and nanosleep: SA_RESTART settings.
-  Read/write correctly return with EINTR when SA_RESTART isn't
-  specified and they are interrupted by a signal.  nanosleep just
-  pretends signals don't exist -- should be fixed.
-
 - So, what's the deal with signals and mutexes?  If a thread is
   blocked on a mutex, or for a condition variable for that matter, can
   signals still be delivered to it?  This has serious consequences --
   deadlocks, etc.
 
-- Signals still not really right.  Each thread should have its
-  own pending-set, but there is just one process-wide pending set.
-
   TODO for valgrind-1.0:
 
 - Update assertion checking in scheduler_sanity().
 
   TODO sometime:
 
-- poll() in the vg_libpthread.c -- should it handle the nanosleep
-  being interrupted by a signal?  Ditto accept?
-
 - Mutex scrubbing - clearup_after_thread_exit: look for threads
   blocked on mutexes held by the exiting thread, and release them
   appropriately. (??)
 
-- pthread_atfork
-
 */
 
 
@@ -112,36 +99,13 @@
 Bool    VG_(scheduler_jmpbuf_valid) = False;
 /* ... and if so, here's the signal which caused it to do so. */
 Int     VG_(longjmpd_on_signal);
+/* If the current thread gets a syncronous unresumable signal, then
+   its details are placed here by the signal handler, to be passed to
+   the applications signal handler later on. */
+vki_ksiginfo_t VG_(unresumable_siginfo);
 
-
-/* Machinery to keep track of which threads are waiting on which
-   fds. */
-typedef
-   struct {
-      /* The thread which made the request. */
-      ThreadId tid;
-
-      /* The next two fields describe the request. */
-      /* File descriptor waited for.  -1 means this slot is not in use */
-      Int      fd;
-      /* The syscall number the fd is used in. */
-      UInt     syscall_no;
-
-      /* False => still waiting for select to tell us the fd is ready
-         to go.  True => the fd is ready, but the results have not yet
-         been delivered back to the calling thread.  Once the latter
-         happens, this entire record is marked as no longer in use, by
-         making the fd field be -1.  */
-      Bool     ready; 
-
-      /* The result from SK_(pre_blocking_syscall)();  is passed to
-       * SK_(post_blocking_syscall)(). */
-      void*    pre_result;
-   }
-   VgWaitedOnFd;
-
-static VgWaitedOnFd vg_waiting_fds[VG_N_WAITING_FDS];
-
+/* If != VG_INVALID_THREADID, this is the preferred tid to schedule */
+static ThreadId prefer_sched = VG_INVALID_THREADID;
 
 /* Keeping track of keys. */
 typedef
@@ -239,7 +203,6 @@
       VG_(printf)("\nThread %d: status = ", i);
       switch (VG_(threads)[i].status) {
          case VgTs_Runnable:   VG_(printf)("Runnable"); break;
-         case VgTs_WaitFD:     VG_(printf)("WaitFD"); break;
          case VgTs_WaitJoinee: VG_(printf)("WaitJoinee(%d)", 
                                            VG_(threads)[i].joiner_jee_tid);
                                break;
@@ -247,7 +210,7 @@
          case VgTs_Sleeping:   VG_(printf)("Sleeping"); break;
          case VgTs_WaitMX:     VG_(printf)("WaitMX"); break;
          case VgTs_WaitCV:     VG_(printf)("WaitCV"); break;
-         case VgTs_WaitSIG:    VG_(printf)("WaitSIG"); break;
+         case VgTs_WaitSys:    VG_(printf)("WaitSys"); break;
          default: VG_(printf)("???"); break;
       }
       VG_(printf)(", associated_mx = %p, associated_cv = %p\n", 
@@ -262,30 +225,6 @@
    VG_(printf)("\n");
 }
 
-static
-void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no, void* pre_res )
-{
-   Int i;
-
-   vg_assert(fd != -1); /* avoid total chaos */
-
-   for (i = 0;  i < VG_N_WAITING_FDS; i++)
-      if (vg_waiting_fds[i].fd == -1)
-         break;
-
-   if (i == VG_N_WAITING_FDS)
-      VG_(core_panic)("add_waiting_fd: VG_N_WAITING_FDS is too low");
-   /*
-   VG_(printf)("add_waiting_fd: add (tid %d, fd %d) at slot %d\n", 
-               tid, fd, i);
-   */
-   vg_waiting_fds[i].fd         = fd;
-   vg_waiting_fds[i].tid        = tid;
-   vg_waiting_fds[i].ready      = False;
-   vg_waiting_fds[i].syscall_no = syscall_no;
-   vg_waiting_fds[i].pre_result = pre_res;
-}
-
 
 
 static
@@ -363,6 +302,12 @@
    /*NOTREACHED*/
 }
 
+ThreadState *VG_(get_ThreadState)(ThreadId tid)
+{
+   vg_assert(tid >= 0 && tid < VG_N_THREADS);
+   return &VG_(threads)[tid];
+}
+
 Bool VG_(is_running_thread)(ThreadId tid)
 {
    ThreadId curr = VG_(get_current_tid)();
@@ -561,6 +506,10 @@
 
    VGP_PUSHCC(VgpRun);
    VG_(load_thread_state) ( tid );
+
+   /* there should be no undealt-with signals */
+   vg_assert(VG_(unresumable_siginfo).si_signo == 0);
+
    if (__builtin_setjmp(VG_(scheduler_jmpbuf)) == 0) {
       /* try this ... */
       VG_(scheduler_jmpbuf_valid) = True;
@@ -601,13 +550,18 @@
    VG_(threads)[tid].cancel_ty   = True; /* PTHREAD_CANCEL_DEFERRED */
    VG_(threads)[tid].cancel_pend = NULL; /* not pending */
    VG_(threads)[tid].custack_used = 0;
-   VG_(threads)[tid].n_signals_returned = 0;
    VG_(ksigemptyset)(&VG_(threads)[tid].sig_mask);
-   VG_(ksigemptyset)(&VG_(threads)[tid].sigs_waited_for);
+   VG_(ksigfillset)(&VG_(threads)[tid].eff_sig_mask);
    VG_(threads)[tid].specifics_ptr = NULL;
+
+   VG_(threads)[tid].syscallno		  = -1;
+   VG_(threads)[tid].sys_pre_res	  = NULL;
+
+   VG_(threads)[tid].proxy		  = NULL;
 }
 
 
+
 /* Initialise the scheduler.  Create a single "main" thread ready to
    run, with special ThreadId of one.  This is called at startup; the
    caller takes care to park the client's state is parked in
@@ -625,9 +579,6 @@
       VG_(threads)[i].stack_highest_word   = (Addr)NULL;
    }
 
-   for (i = 0; i < VG_N_WAITING_FDS; i++)
-      vg_waiting_fds[i].fd = -1; /* not in use */
-
    for (i = 0; i < VG_N_THREAD_KEYS; i++) {
       vg_thread_keys[i].inuse      = False;
       vg_thread_keys[i].destructor = NULL;
@@ -656,148 +607,32 @@
 
    /* Not running client code right now. */
    VG_(scheduler_jmpbuf_valid) = False;
+
+   /* Proxy for main thread */
+   VG_(proxy_create)(tid_main);
 }
 
 
-/* What if fd isn't a valid fd? */
-static
-void set_fd_nonblocking ( Int fd )
-{
-   Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
-   vg_assert(!VG_(is_kerror)(res));
-   res |= VKI_O_NONBLOCK;
-   res = VG_(fcntl)( fd, VKI_F_SETFL, res );
-   vg_assert(!VG_(is_kerror)(res));
-}
-
-static
-void set_fd_blocking ( Int fd )
-{
-   Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
-   vg_assert(!VG_(is_kerror)(res));
-   res &= ~VKI_O_NONBLOCK;
-   res = VG_(fcntl)( fd, VKI_F_SETFL, res );
-   vg_assert(!VG_(is_kerror)(res));
-}
-
-static
-Bool fd_is_blockful ( Int fd )
-{
-   Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
-   vg_assert(!VG_(is_kerror)(res));
-   return (res & VKI_O_NONBLOCK) ? False : True;
-}
-
-static
-Bool fd_is_valid ( Int fd )
-{
-   Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
-   return VG_(is_kerror)(res) ? False : True;
-}
 
 
 
 /* vthread tid is returning from a signal handler; modify its
    stack/regs accordingly. */
 
-/* [Helper fn for handle_signal_return] tid, assumed to be in WaitFD
-   for read or write, has been interrupted by a signal.  Find and
-   clear the relevant vg_waiting_fd[] entry.  Most of the code in this
-   procedure is total paranoia, if you look closely. */
-
-/* 4 Apr 2003: monty@mysql.com sent a fix, which adds the comparisons
-   against -1, and the following explaination.
-
-   Valgrind uses fd = -1 internally to tell that a file descriptor is
-   not in use, as the following code shows (at end of
-   cleanup_waiting_fd_table()).
-
-   vg_assert(waiters == 1);
-   for (i = 0; i < VG_N_WAITING_FDS; i++)
-     if (vg_waiting_fds[i].tid == tid && vg_waiting_fds[i].fd != -1)
-         break;
-   vg_assert(i < VG_N_WAITING_FDS);
-   vg_assert(vg_waiting_fds[i].fd != -1);
-   vg_waiting_fds[i].fd = -1;    -- not in use
-                     ^^^^^^^
-
-   The bug is that valrind is setting fd = -1 for a not used file
-   descriptor but vg_waiting_fds[i].tid is not reset.
-
-   What happens is that on a later call to cleanup_waiting_fd_table()
-   the function will find old files that was waited on before by the
-   same thread, even if they are marked as 'not in use' by the above
-   code.
-
-   I first tried to fix the bug by setting vg_waiting_fds[i].tid to 0
-   at the end of the above function but this didn't fix the bug.
-   (Maybe there is other places in the code where tid is not properly
-   reset).  After adding the test for 'fd == -1' to the loops in
-   cleanup_waiting_fd_table() all problems disappeared.
-*/
-
-static
-void cleanup_waiting_fd_table ( ThreadId tid )
-{
-   Int  i, waiters;
-
-   vg_assert(VG_(is_valid_tid)(tid));
-   vg_assert(VG_(threads)[tid].status == VgTs_WaitFD);
-   vg_assert(VG_(threads)[tid].m_eax == __NR_read 
-             || VG_(threads)[tid].m_eax == __NR_write);
-
-   /* Excessively paranoidly ... find the fd this op was waiting
-      for, and mark it as not being waited on. */
-   waiters = 0;
-   for (i = 0; i < VG_N_WAITING_FDS; i++) {
-     if (vg_waiting_fds[i].tid == tid && vg_waiting_fds[i].fd != -1) {
-         waiters++;
-         vg_assert(vg_waiting_fds[i].syscall_no == VG_(threads)[tid].m_eax);
-      }
-   }
-   vg_assert(waiters == 1);
-   for (i = 0; i < VG_N_WAITING_FDS; i++)
-     if (vg_waiting_fds[i].tid == tid && vg_waiting_fds[i].fd != -1)
-         break;
-   vg_assert(i < VG_N_WAITING_FDS);
-   vg_assert(vg_waiting_fds[i].fd != -1);
-   vg_waiting_fds[i].fd = -1; /* not in use */
-}
-
-
 static
 void handle_signal_return ( ThreadId tid )
 {
-   Char msg_buf[100];
    Bool restart_blocked_syscalls;
    struct vki_timespec * rem;
 
    vg_assert(VG_(is_valid_tid)(tid));
 
-   /* Increment signal-returned counter.  Used only to implement pause(). */
-   VG_(threads)[tid].n_signals_returned++;
-
    restart_blocked_syscalls = VG_(signal_returns)(tid);
 
    if (restart_blocked_syscalls)
       /* Easy; we don't have to do anything. */
       return;
 
-   if (VG_(threads)[tid].status == VgTs_WaitFD
-       && (VG_(threads)[tid].m_eax == __NR_read 
-           || VG_(threads)[tid].m_eax == __NR_write)) {
-      /* read() or write() interrupted.  Force a return with EINTR. */
-      cleanup_waiting_fd_table(tid);
-      SET_SYSCALL_RETVAL(tid, -VKI_EINTR);
-      VG_(threads)[tid].status = VgTs_Runnable;
-      if (VG_(clo_trace_sched)) {
-         VG_(sprintf)(msg_buf, 
-            "read() / write() interrupted by signal; return EINTR" );
-         print_sched_event(tid, msg_buf);
-      }
-      return;
-   }
-
    if (VG_(threads)[tid].status == VgTs_Sleeping
        && VG_(threads)[tid].m_eax == __NR_nanosleep) {
       /* We interrupted a nanosleep().  The right thing to do is to
@@ -814,10 +649,6 @@
       return;
    }
 
-   if (VG_(threads)[tid].status == VgTs_WaitFD) {
-      VG_(core_panic)("handle_signal_return: unknown interrupted syscall");
-   }
-
    /* All other cases?  Just return. */
 }
 
@@ -825,11 +656,7 @@
 static
 void sched_do_syscall ( ThreadId tid )
 {
-   UInt  saved_meax, saved_sheax;
-   Int   res, syscall_no;
-   UInt  fd;
-   void* pre_res;
-   Bool  orig_fd_blockness;
+   Int   syscall_no;
    Char  msg_buf[100];
 
    vg_assert(VG_(is_valid_tid)(tid));
@@ -837,10 +664,21 @@
 
    syscall_no = VG_(threads)[tid].m_eax; /* syscall number */
 
-   if (syscall_no == __NR_nanosleep) {
+   /* Special-case nanosleep because we can.  But should we?
+
+      XXX not doing so for now, because it doesn't seem to work
+      properly, and we can use the syscall nanosleep just as easily.
+    */
+   if (0 && syscall_no == __NR_nanosleep) {
       UInt t_now, t_awaken;
       struct vki_timespec* req;
       req = (struct vki_timespec*)VG_(threads)[tid].m_ebx; /* arg1 */
+
+      if (req->tv_sec < 0 || req->tv_nsec < 0 || req->tv_nsec >= 1000000000) {
+	 SET_SYSCALL_RETVAL(tid, -VKI_EINVAL);
+	 return;
+      }
+
       t_now = VG_(read_millisecond_timer)();     
       t_awaken 
          = t_now
@@ -853,369 +691,161 @@
                                t_now, t_awaken-t_now);
 	 print_sched_event(tid, msg_buf);
       }
+      VG_(add_timeout)(tid, t_awaken);
       /* Force the scheduler to run something else for a while. */
       return;
    }
 
-   if (syscall_no != __NR_read && syscall_no != __NR_write) {
-      /* We think it's non-blocking.  Just do it in the normal way. */
-      VG_(perform_assumed_nonblocking_syscall)(tid);
-      /* The thread is still runnable. */
-      return;
-   }
-
-   /* Set the fd to nonblocking, and do the syscall, which will return
-      immediately, in order to lodge a request with the Linux kernel.
-      We later poll for I/O completion using select().  */
-
-   fd = VG_(threads)[tid].m_ebx /* arg1 */;
-
-   /* Deal with error case immediately. */
-   if (!fd_is_valid(fd)) {
-      if (VG_(needs).core_errors)
-         VG_(message)(Vg_UserMsg, 
-            "Warning: invalid file descriptor %d in syscall %s",
-            fd, syscall_no == __NR_read ? "read()" : "write()" );
-      pre_res = VG_(pre_known_blocking_syscall)(tid, syscall_no);
-      KERNEL_DO_SYSCALL(tid, res);
-      VG_(post_known_blocking_syscall)(tid, syscall_no, pre_res, res);
-      /* We're still runnable. */
+   /* If pre_syscall returns true, then we're done immediately */
+   if (VG_(pre_syscall)(tid)) {
+      VG_(post_syscall(tid));
       vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
-      return;
-   }
-
-   /* From here onwards we know that fd is valid. */
-
-   orig_fd_blockness = fd_is_blockful(fd);
-   set_fd_nonblocking(fd);
-   vg_assert(!fd_is_blockful(fd));
-   pre_res = VG_(pre_known_blocking_syscall)(tid, syscall_no);
-
-   /* This trashes the thread's %eax; we have to preserve it. */
-   saved_meax  = VG_(threads)[tid].m_eax;
-   saved_sheax = VG_(threads)[tid].sh_eax;
-   KERNEL_DO_SYSCALL(tid,res);
-
-   /* Restore original blockfulness of the fd. */
-   if (orig_fd_blockness)
-      set_fd_blocking(fd);
-   else
-      set_fd_nonblocking(fd);
-
-   if (res != -VKI_EWOULDBLOCK || !orig_fd_blockness) {
-      /* Finish off in the normal way.  Don't restore %EAX, since that
-         now (correctly) holds the result of the call.  We get here if either:
-         1.  The call didn't block, or
-         2.  The fd was already in nonblocking mode before we started to
-             mess with it.  In this case, we're not expecting to handle 
-             the I/O completion -- the client is.  So don't file a 
-             completion-wait entry. 
-      */
-      VG_(post_known_blocking_syscall)(tid, syscall_no, pre_res, res);
-      /* We're still runnable. */
-      vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
-
    } else {
-
-      vg_assert(res == -VKI_EWOULDBLOCK && orig_fd_blockness);
-
-      /* It would have blocked.  First, restore %EAX to what it was
-         before our speculative call. */
-      saved_meax  = VG_(threads)[tid].m_eax  = saved_meax;
-      saved_sheax = VG_(threads)[tid].sh_eax = saved_sheax;
-      
-      /* Put this fd in a table of fds on which we are waiting for
-         completion. The arguments for select() later are constructed
-         from this table.  */
-      add_waiting_fd(tid, fd, saved_meax /* which holds the syscall # */,
-                     pre_res);
-      /* Deschedule thread until an I/O completion happens. */
-      VG_(threads)[tid].status = VgTs_WaitFD;
-      if (VG_(clo_trace_sched)) {
-         VG_(sprintf)(msg_buf,"block until I/O ready on fd %d", fd);
-	 print_sched_event(tid, msg_buf);
-      }
+      vg_assert(VG_(threads)[tid].status == VgTs_WaitSys);
    }
 }
 
 
-/* Find out which of the fds in vg_waiting_fds are now ready to go, by
-   making enquiries with select(), and mark them as ready.  We have to
-   wait for the requesting threads to fall into the the WaitFD state
-   before we can actually finally deliver the results, so this
-   procedure doesn't do that; complete_blocked_syscalls() does it.
 
-   It might seem odd that a thread which has done a blocking syscall
-   is not in WaitFD state; the way this can happen is if it initially
-   becomes WaitFD, but then a signal is delivered to it, so it becomes
-   Runnable for a while.  In this case we have to wait for the
-   sighandler to return, whereupon the WaitFD state is resumed, and
-   only at that point can the I/O result be delivered to it.  However,
-   this point may be long after the fd is actually ready.  
+struct timeout {
+   UInt		time;		/* time we should awaken */
+   ThreadId	tid;		/* thread which cares about this timeout */
+   struct timeout *next;
+};
 
-   So, poll_for_ready_fds() merely detects fds which are ready.
-   complete_blocked_syscalls() does the second half of the trick,
-   possibly much later: it delivers the results from ready fds to
-   threads in WaitFD state. 
-*/
-static
-void poll_for_ready_fds ( void )
+static struct timeout *timeouts;
+
+void VG_(add_timeout)(ThreadId tid, UInt time)
 {
-   vki_ksigset_t      saved_procmask;
-   vki_fd_set         readfds;
-   vki_fd_set         writefds;
-   vki_fd_set         exceptfds;
-   struct vki_timeval timeout;
-   Int                fd, fd_max, i, n_ready, syscall_no, n_ok;
-   ThreadId           tid;
-   Bool               rd_ok, wr_ok, ex_ok;
-   Char               msg_buf[100];
+   struct timeout *t = VG_(arena_malloc)(VG_AR_CORE, sizeof(*t));
+   struct timeout **prev, *tp;
 
-   struct vki_timespec* rem;
-   UInt                 t_now;
+   t->time = time;
+   t->tid = tid;
 
-   /* Awaken any sleeping threads whose sleep has expired. */
-   for (tid = 1; tid < VG_N_THREADS; tid++)
-      if (VG_(threads)[tid].status == VgTs_Sleeping)
-         break;
+   if (VG_(clo_trace_sched)) {
+      Char msg_buf[100];
+      VG_(sprintf)(msg_buf, "add_timeout: now=%u adding timeout at %u",
+		   VG_(read_millisecond_timer)(), time);
+      print_sched_event(tid, msg_buf);
+   }
 
-   /* Avoid pointless calls to VG_(read_millisecond_timer). */
-   if (tid < VG_N_THREADS) {
-      t_now = VG_(read_millisecond_timer)();
-      for (tid = 1; tid < VG_N_THREADS; tid++) {
-         if (VG_(threads)[tid].status != VgTs_Sleeping)
-            continue;
-         if (t_now >= VG_(threads)[tid].awaken_at) {
-            /* Resume this thread.  Set to zero the remaining-time
-               (second) arg of nanosleep, since it's used up all its
-               time. */
-            vg_assert(VG_(threads)[tid].m_eax == __NR_nanosleep);
-            rem = (struct vki_timespec *)VG_(threads)[tid].m_ecx; /* arg2 */
-            if (rem != NULL) {
-	       rem->tv_sec = 0;
-               rem->tv_nsec = 0;
-            }
-            /* Make the syscall return 0 (success). */
-            SET_SYSCALL_RETVAL(tid, 0);
-            
-	    /* Reschedule this thread. */
-            VG_(threads)[tid].status = VgTs_Runnable;
-            if (VG_(clo_trace_sched)) {
-               VG_(sprintf)(msg_buf, "at %d: nanosleep done", 
-                                     t_now);
-               print_sched_event(tid, msg_buf);
-            }
-         }
+   for(tp = timeouts, prev = &timeouts; 
+       tp != NULL && tp->time < time; 
+       prev = &tp->next, tp = tp->next)
+      ;
+   t->next = tp;
+   *prev = t;
+}
+
+/* Sleep for a while, but be willing to be woken. */
+static
+void idle ( void )
+{
+   struct vki_pollfd pollfd[1];
+   Int delta = -1;
+   Int fd = VG_(proxy_resfd)();
+
+   pollfd[0].fd = fd;
+   pollfd[0].events = VKI_POLLIN;
+
+   /* Look though the nearest timeouts, looking for the next future
+      one (there may be stale past timeouts).  They'll all be mopped
+      below up when the poll() finishes. */
+   if (timeouts != NULL) {
+      struct timeout *tp;
+      Bool wicked = False;
+      UInt now = VG_(read_millisecond_timer)();
+
+      for(tp = timeouts; tp != NULL && tp->time < now; tp = tp->next) {
+	 /* If a thread is still sleeping in the past, make it runnable */
+	 ThreadState *tst = VG_(get_ThreadState)(tp->tid);
+	 if (tst->status == VgTs_Sleeping)
+	    tst->status = VgTs_Runnable;
+	 wicked = True;		/* no sleep for the wicked */
       }
-   }
 
-   /* And look for threads waiting on file descriptors which are now
-      ready for I/O.*/
-   timeout.tv_sec = 0;
-   timeout.tv_usec = 0;
-
-   VKI_FD_ZERO(&readfds);
-   VKI_FD_ZERO(&writefds);
-   VKI_FD_ZERO(&exceptfds);
-   fd_max = -1;
-   for (i = 0; i < VG_N_WAITING_FDS; i++) {
-      if (vg_waiting_fds[i].fd == -1 /* not in use */) 
-         continue;
-      if (vg_waiting_fds[i].ready /* already ready? */) 
-         continue;
-      fd = vg_waiting_fds[i].fd;
-      /* VG_(printf)("adding QUERY for fd %d\n", fd); */
-      vg_assert(fd >= 0);
-      if (fd > fd_max) 
-         fd_max = fd;
-      tid = vg_waiting_fds[i].tid;
-      vg_assert(VG_(is_valid_tid)(tid));
-      syscall_no = vg_waiting_fds[i].syscall_no;
-      switch (syscall_no) {
-         case __NR_read:
-            /* In order to catch timeout events on fds which are
-               readable and which have been ioctl(TCSETA)'d with a
-               VTIMEout, we appear to need to ask if the fd is
-               writable, for some reason.  Ask me not why.  Since this
-               is strange and potentially troublesome we only do it if
-               the user asks specially. */
-            if (VG_(strstr)(VG_(clo_weird_hacks), "ioctl-VTIME") != NULL)
-               VKI_FD_SET(fd, &writefds);
-            VKI_FD_SET(fd, &readfds); break;
-         case __NR_write: 
-            VKI_FD_SET(fd, &writefds); break;
-         default: 
-            VG_(core_panic)("poll_for_ready_fds: unexpected syscall");
-            /*NOTREACHED*/
-            break;
+      if (tp != NULL) {
+	 delta = tp->time - now;
+	 vg_assert(delta >= 0);
       }
+      if (wicked)
+	 delta = 0;
    }
 
-   /* Short cut: if no fds are waiting, give up now. */
-   if (fd_max == -1)
-      return;
+   /* gotta wake up for something! */
+   vg_assert(fd != -1 || delta != -1);
 
-   /* BLOCK ALL SIGNALS.  We don't want the complication of select()
-      getting interrupted. */
-   VG_(block_all_host_signals)( &saved_procmask );
-
-   n_ready = VG_(select)
-                ( fd_max+1, &readfds, &writefds, &exceptfds, &timeout);
-   if (VG_(is_kerror)(n_ready)) {
-      VG_(printf)("poll_for_ready_fds: select returned %d\n", n_ready);
-      VG_(core_panic)("poll_for_ready_fds: select failed?!");
-      /*NOTREACHED*/
-   }
+   /* If we need to do signal routing, then poll for pending signals
+      every VG_(clo_signal_polltime) mS */
+   if (VG_(do_signal_routing) && (delta > VG_(clo_signal_polltime) || delta == -1))
+      delta = VG_(clo_signal_polltime);
    
-   /* UNBLOCK ALL SIGNALS */
-   VG_(restore_all_host_signals)( &saved_procmask );
-
-   /* VG_(printf)("poll_for_io_completions: %d fs ready\n", n_ready); */
-
-   if (n_ready == 0)
-      return;   
-
-   /* Inspect all the fds we know about, and handle any completions that
-      have happened. */
-   /*
-   VG_(printf)("\n\n");
-   for (fd = 0; fd < 100; fd++)
-     if (VKI_FD_ISSET(fd, &writefds) || VKI_FD_ISSET(fd, &readfds)) {
-       VG_(printf)("X"); } else { VG_(printf)("."); };
-   VG_(printf)("\n\nfd_max = %d\n", fd_max);
-   */
-
-   for (fd = 0; fd <= fd_max; fd++) {
-      rd_ok = VKI_FD_ISSET(fd, &readfds);
-      wr_ok = VKI_FD_ISSET(fd, &writefds);
-      ex_ok = VKI_FD_ISSET(fd, &exceptfds);
-
-      n_ok = (rd_ok ? 1 : 0) + (wr_ok ? 1 : 0) + (ex_ok ? 1 : 0);
-      if (n_ok == 0) 
-         continue;
-      if (n_ok > 1) {
-         VG_(printf)("offending fd = %d\n", fd);
-         VG_(core_panic)("poll_for_ready_fds: multiple events on fd");
-      }
-
-      /* An I/O event completed for fd.  Find the thread which
-         requested this. */
-      for (i = 0; i < VG_N_WAITING_FDS; i++) {
-         if (vg_waiting_fds[i].fd == -1 /* not in use */) 
-            continue;
-         if (vg_waiting_fds[i].fd == fd) 
-            break;
-      }
-
-      /* And a bit more paranoia ... */
-      vg_assert(i >= 0 && i < VG_N_WAITING_FDS);
-
-      /* Mark the fd as ready. */      
-      vg_assert(! vg_waiting_fds[i].ready);
-      vg_waiting_fds[i].ready = True;
+   if (VG_(clo_trace_sched)) {
+      Char msg_buf[100];
+      VG_(sprintf)(msg_buf, "idle: waiting for %dms and fd %d",
+		   delta, fd);
+      print_sched_event(0, msg_buf);
    }
-}
 
+   VG_(poll)(pollfd, fd != -1 ? 1 : 0, delta);
 
-/* See comment attached to poll_for_ready_fds() for explaination. */
-static
-void complete_blocked_syscalls ( void )
-{
-   Int      fd, i, res;
-   UInt     syscall_no;
-   void*    pre_res;
-   ThreadId tid;
-   Char     msg_buf[100];
+   /* See if there's anything on the timeout list which needs
+      waking, and mop up anything in the past. */
+   {
+      UInt now = VG_(read_millisecond_timer)();
+      struct timeout *tp;
 
-   /* Inspect all the outstanding fds we know about. */
+      tp = timeouts;
 
-   for (i = 0; i < VG_N_WAITING_FDS; i++) {
-      if (vg_waiting_fds[i].fd == -1 /* not in use */) 
-         continue;
-      if (! vg_waiting_fds[i].ready)
-         continue;
+      while(tp && tp->time <= now) {
+	 struct timeout *dead;
+	 ThreadState *tst;
+	 
+	 tst = VG_(get_ThreadState)(tp->tid);
+	
+	 if (VG_(clo_trace_sched)) {
+	    Char msg_buf[100];
+	    VG_(sprintf)(msg_buf, "idle: now=%u removing timeout at %u",
+			 now, tp->time);
+	    print_sched_event(tp->tid, msg_buf);
+	 }
 
-      fd  = vg_waiting_fds[i].fd;
-      tid = vg_waiting_fds[i].tid;
-      vg_assert(VG_(is_valid_tid)(tid));
+	 /* If awaken_at != tp->time then it means the timeout is
+	    stale and we should just ignore it. */
+	 if(tst->awaken_at == tp->time) {
+	    switch(tst->status) {
+	    case VgTs_Sleeping:
+	       tst->awaken_at = 0xFFFFFFFF;
+	       tst->status = VgTs_Runnable;
+	       break;
 
-      /* The thread actually has to be waiting for the I/O event it
-         requested before we can deliver the result! */
-      if (VG_(threads)[tid].status != VgTs_WaitFD)
-         continue;
+	    case VgTs_WaitCV:
+	       do_pthread_cond_timedwait_TIMEOUT(tst->tid);
+	       break;
 
-      /* Ok, actually do it!  We can safely use %EAX as the syscall
-         number, because the speculative call made by
-         sched_do_syscall() doesn't change %EAX in the case where the
-         call would have blocked. */
-      syscall_no = vg_waiting_fds[i].syscall_no;
-      vg_assert(syscall_no == VG_(threads)[tid].m_eax);
+	    default:
+	       /* This is a bit odd but OK; if a thread had a timeout
+		  but woke for some other reason (signal, condvar
+		  wakeup), then it will still be on the list. */
+	       if (0)
+		  VG_(printf)("idle(): unexpected status tp->tid=%d tst->status = %d\n", 
+			      tp->tid, tst->status);
+	       break;
+	    }
+	 }
 
-      pre_res = vg_waiting_fds[i].pre_result;
+	 dead = tp;
+	 tp = tp->next;
 
-      /* In a rare case pertaining to writing into a pipe, write()
-         will block when asked to write > 4096 bytes even though the
-         kernel claims, when asked via select(), that blocking will
-         not occur for a write on that fd.  This can cause deadlocks.
-         An easy answer is to limit the size of the write to 4096
-         anyway and hope that the client program's logic can handle
-         the short write.  That shoulds dubious to me, so we don't do
-         it by default. */
-      if (syscall_no == __NR_write 
-          && VG_(threads)[tid].m_edx /* arg3, count */ > 4096
-          && VG_(strstr)(VG_(clo_weird_hacks), "truncate-writes") != NULL) {
-         /* VG_(printf)("truncate write from %d to 4096\n", 
-            VG_(threads)[tid].m_edx ); */
-         VG_(threads)[tid].m_edx = 4096;
+	 VG_(arena_free)(VG_AR_CORE, dead);
       }
 
-      KERNEL_DO_SYSCALL(tid,res);
-      VG_(post_known_blocking_syscall)(tid, syscall_no, pre_res, res);
-
-      /* Reschedule. */
-      VG_(threads)[tid].status = VgTs_Runnable;
-      /* Mark slot as no longer in use. */
-      vg_waiting_fds[i].fd = -1;
-      /* pp_sched_status(); */
-      if (VG_(clo_trace_sched)) {
-         VG_(sprintf)(msg_buf,"resume due to I/O completion on fd %d", fd);
-	 print_sched_event(tid, msg_buf);
-      }
+      timeouts = tp;
    }
 }
 
 
-static
-void check_for_pthread_cond_timedwait ( void )
-{
-   Int  i;
-   UInt now;
-   for (i = 1; i < VG_N_THREADS; i++) {
-      if (VG_(threads)[i].status != VgTs_WaitCV)
-         continue;
-      if (VG_(threads)[i].awaken_at == 0xFFFFFFFF /* no timeout */)
-         continue;
-      now = VG_(read_millisecond_timer)();
-      if (now >= VG_(threads)[i].awaken_at) {
-         do_pthread_cond_timedwait_TIMEOUT(i);
-      }
-   }
-}
-
-
-static
-void nanosleep_for_a_while ( void )
-{
-   Int res;
-   struct vki_timespec req;
-   struct vki_timespec rem;
-   req.tv_sec = 0;
-   req.tv_nsec = 10 * 1000 * 1000;
-   res = VG_(nanosleep)( &req, &rem );   
-   vg_assert(res == 0 /* ok */ || res == 1 /* interrupted by signal */);
-}
-
-
 /* ---------------------------------------------------------------------
    The scheduler proper.
    ------------------------------------------------------------------ */
@@ -1231,8 +861,8 @@
    UInt     trc;
    UInt     dispatch_ctr_SAVED;
    Int      done_this_time, n_in_bounded_wait;
+   Int	    n_exists, n_waiting_for_reaper;
    Addr     trans_addr;
-   Bool     sigs_delivered;
 
    /* Start with the root thread.  tid in general indicates the
       currently runnable/just-finished-running thread. */
@@ -1265,42 +895,34 @@
          /* For stats purposes only. */
          VG_(num_scheduling_events_MAJOR) ++;
 
-         /* See if any I/O operations which we were waiting for have
-            completed, and, if so, make runnable the relevant waiting
-            threads. */
-         poll_for_ready_fds();
-         complete_blocked_syscalls();
-         check_for_pthread_cond_timedwait();
+	 /* Route signals to their proper places */
+	 VG_(route_signals)();
 
-         /* See if there are any signals which need to be delivered.  If
-            so, choose thread(s) to deliver them to, and build signal
-            delivery frames on those thread(s) stacks. */
-
-	 /* Be careful about delivering signals to a thread waiting
-            for a mutex.  In particular, when the handler is running,
-            that thread is temporarily apparently-not-waiting for the
-            mutex, so if it is unlocked by another thread whilst the
-            handler is running, this thread is not informed.  When the
-            handler returns, the thread resumes waiting on the mutex,
-            even if, as a result, it has missed the unlocking of it.
-            Potential deadlock.  This sounds all very strange, but the
-            POSIX standard appears to require this behaviour.  */
-         sigs_delivered = VG_(deliver_signals)();
-	 if (sigs_delivered)
-            VG_(do_sanity_checks)( False );
+         /* See if any of the proxy LWPs report any activity: either a
+	    syscall completing or a signal arriving. */
+	 VG_(proxy_results)();
 
          /* Try and find a thread (tid) to run. */
          tid_next = tid;
+	 if (prefer_sched != VG_INVALID_THREADID) {
+	    tid_next = prefer_sched-1;
+	    prefer_sched = VG_INVALID_THREADID;
+	 }
          n_in_bounded_wait = 0;
+	 n_exists = 0;
+	 n_waiting_for_reaper = 0;
          while (True) {
             tid_next++;
             if (tid_next >= VG_N_THREADS) tid_next = 1;
-            if (VG_(threads)[tid_next].status == VgTs_WaitFD
-                || VG_(threads)[tid_next].status == VgTs_Sleeping
-                || VG_(threads)[tid_next].status == VgTs_WaitSIG
+            if (VG_(threads)[tid_next].status == VgTs_Sleeping
+                || VG_(threads)[tid_next].status == VgTs_WaitSys
                 || (VG_(threads)[tid_next].status == VgTs_WaitCV 
                     && VG_(threads)[tid_next].awaken_at != 0xFFFFFFFF))
                n_in_bounded_wait ++;
+	    if (VG_(threads)[tid_next].status != VgTs_Empty)
+	       n_exists++;
+	    if (VG_(threads)[tid_next].status == VgTs_WaitJoiner)
+	       n_waiting_for_reaper++;
             if (VG_(threads)[tid_next].status == VgTs_Runnable) 
                break; /* We can run this one. */
             if (tid_next == tid) 
@@ -1315,6 +937,12 @@
             break;
 	 }
 
+	 /* All threads have exited - pretend someone called exit() */
+	 if (n_waiting_for_reaper == n_exists) {
+	    VG_(exitcode) = 0;	/* ? */
+	    return VgSrc_ExitSyscall;
+	 }
+
          /* We didn't find a runnable thread.  Now what? */
          if (n_in_bounded_wait == 0) {
             /* No runnable threads and no prospect of any appearing
@@ -1324,10 +952,9 @@
             return VgSrc_Deadlock;
          }
 
-         /* At least one thread is in a fd-wait state.  Delay for a
-            while, and go round again, in the hope that eventually a
-            thread becomes runnable. */
-         nanosleep_for_a_while();
+	 /* Nothing needs doing, so sit in idle until either a timeout
+	    happens or a thread's syscall completes. */
+         idle();
 	 /* pp_sched_status(); */
 	 /* VG_(printf)("."); */
       }
@@ -1463,9 +1090,7 @@
                the unprotected malloc/free system. */
 
             if (VG_(threads)[tid].m_eax == __NR_exit
-#               if defined(__NR_exit_group)
                 || VG_(threads)[tid].m_eax == __NR_exit_group
-#               endif
                ) {
 
                /* If __NR_exit, remember the supplied argument. */
@@ -1500,14 +1125,8 @@
             }
 
             /* We've dealt with __NR_exit at this point. */
-            { Bool b
-                  = VG_(threads)[tid].m_eax != __NR_exit
-#                   if defined(__NR_exit_group)
-                    && VG_(threads)[tid].m_eax != __NR_exit_group
-#                   endif
-                    ;
-              vg_assert(b);
-            }
+	    vg_assert(VG_(threads)[tid].m_eax != __NR_exit && 
+		      VG_(threads)[tid].m_eax != __NR_exit_group);
 
             /* Trap syscalls to __NR_sched_yield and just have this
                thread yield instead.  Not essential, just an
@@ -1529,12 +1148,6 @@
 #           endif
 
             if (VG_(threads)[tid].status == VgTs_Runnable) {
-               /* Better do a signal check, since if in a tight loop
-                  with a slow syscall it may be a very long time
-                  before we get back to the main signal check in Stage 1. */
-               sigs_delivered = VG_(deliver_signals)();
-               if (sigs_delivered)
-                  VG_(do_sanity_checks)( False );
                continue; /* with this thread */
             } else {
                goto stage1;
@@ -1583,9 +1196,19 @@
             break;
 
          case VG_TRC_UNRESUMABLE_SIGNAL:
-            /* It got a SIGSEGV/SIGBUS, which we need to deliver right
-               away.  Again, do nothing, so we wind up back at Phase
-               1, whereupon the signal will be "delivered". */
+            /* It got a SIGSEGV/SIGBUS/SIGILL/SIGFPE, which we need to
+               deliver right away.  */
+	    vg_assert(VG_(unresumable_siginfo).si_signo == VKI_SIGSEGV ||
+		      VG_(unresumable_siginfo).si_signo == VKI_SIGBUS  ||
+		      VG_(unresumable_siginfo).si_signo == VKI_SIGILL  ||
+		      VG_(unresumable_siginfo).si_signo == VKI_SIGFPE);
+	    vg_assert(VG_(longjmpd_on_signal) == VG_(unresumable_siginfo).si_signo);
+
+	    /* make sure we've unblocked the signals which the handler blocked */
+	    VG_(unblock_host_signal)(VG_(longjmpd_on_signal));
+
+	    VG_(deliver_signal)(tid, &VG_(unresumable_siginfo), False);
+	    VG_(unresumable_siginfo).si_signo = 0; /* done */
 	    break;
 
          default: 
@@ -1622,6 +1245,41 @@
    return VgSrc_BbsDone;
 }
 
+void VG_(need_resched) ( ThreadId prefer )
+{
+   /* Tell the scheduler now might be a good time to find a new
+      runnable thread, because something happened which woke a thread
+      up.
+
+      NB: This can be called unsynchronized from either a signal
+      handler, or from another LWP (ie, real kernel thread).
+
+      In principle this could simply be a matter of setting
+      VG_(dispatch_ctr) to a small value (say, 2), which would make
+      any running code come back to the scheduler fairly quickly.
+
+      However, since the scheduler implements a strict round-robin
+      policy with only one priority level, there are, by definition,
+      no better threads to be running than the current thread anyway,
+      so we may as well ignore this hint.  For processes with a
+      mixture of compute and I/O bound threads, this means the compute
+      threads could introduce longish latencies before the I/O threads
+      run.  For programs with only I/O bound threads, need_resched
+      won't have any effect anyway.
+
+      OK, so I've added command-line switches to enable low-latency
+      syscalls and signals.  The prefer_sched variable is in effect
+      the ID of a single thread which has higher priority than all the
+      others.  If set, the scheduler will prefer to schedule that
+      thread over all others.  Naturally, this could lead to
+      starvation or other unfairness.
+    */
+
+   if (VG_(dispatch_ctr) > 10)
+      VG_(dispatch_ctr) = 2;
+   prefer_sched = prefer;
+}
+
 
 /* ---------------------------------------------------------------------
    The pthread implementation.
@@ -1671,13 +1329,7 @@
    /* .cancel_pend will hold &thread_exit_wrapper */
    VG_(threads)[tid].m_eip = (UInt)VG_(threads)[tid].cancel_pend;
 
-   /* Clear out the waited-for-signals set, if needed, so as not to
-      cause the sanity checker to bomb before
-      cleanup_after_thread_exited() really cleans up properly for this
-      thread. */
-   if (VG_(threads)[tid].status == VgTs_WaitSIG) {
-      VG_(ksigemptyset)( & VG_(threads)[tid].sigs_waited_for );
-   }
+   VG_(proxy_abort_syscall)(tid);
 
    VG_(threads)[tid].status = VgTs_Runnable;
 
@@ -1697,32 +1349,23 @@
 /* Release resources and generally clean up once a thread has finally
    disappeared. */
 static
-void cleanup_after_thread_exited ( ThreadId tid )
+void cleanup_after_thread_exited ( ThreadId tid, Bool forcekill )
 {
-   Int           i;
-   vki_ksigset_t irrelevant_sigmask;
    vg_assert(VG_(is_valid_or_empty_tid)(tid));
    vg_assert(VG_(threads)[tid].status == VgTs_Empty);
    /* Its stack is now off-limits */
    VG_TRACK( die_mem_stack, VG_(threads)[tid].stack_base,
                             VG_(threads)[tid].stack_size );
 
-   /* Forget about any pending signals directed specifically at this
-      thread, and get rid of signal handlers specifically arranged for
-      this thread. */
-   VG_(block_all_host_signals)( &irrelevant_sigmask );
-   VG_(handle_SCSS_change)( False /* lazy update */ );
-
-   /* Clean up the waiting_fd table */
-   for (i = 0; i < VG_N_WAITING_FDS; i++) {
-      if (vg_waiting_fds[i].tid == tid) {
-         vg_waiting_fds[i].fd = -1; /* not in use */
-      }
-   }
-
    /* Deallocate its LDT, if it ever had one. */
    VG_(deallocate_LDT_for_thread)( VG_(threads)[tid].ldt );
    VG_(threads)[tid].ldt = NULL;
+
+   /* Not interested in the timeout anymore */
+   VG_(threads)[tid].awaken_at = 0xFFFFFFFF;
+
+   /* Delete proxy LWP */
+   VG_(proxy_delete)(tid, forcekill);
 }
 
 
@@ -1744,8 +1387,16 @@
       if (jee == VG_INVALID_THREADID) 
          continue;
       vg_assert(VG_(is_valid_tid)(jee));
-      if (VG_(threads)[jee].status != VgTs_WaitJoiner)
+      if (VG_(threads)[jee].status != VgTs_WaitJoiner) {
+	 /* if joinee has become detached, then make join fail with
+	    EINVAL */
+	 if (VG_(threads)[jee].detached) {
+	    VG_(threads)[jnr].status = VgTs_Runnable;
+	    VG_(threads)[jnr].joiner_jee_tid = VG_INVALID_THREADID;
+	    SET_PTHREQ_RETVAL(jnr, VKI_EINVAL);
+	 }
          continue;
+      }
       /* ok!  jnr is waiting to join with jee, and jee is waiting to be
          joined by ... well, any thread.  So let's do it! */
 
@@ -1765,7 +1416,7 @@
 
       /* Joinee is discarded */
       VG_(threads)[jee].status = VgTs_Empty; /* bye! */
-      cleanup_after_thread_exited ( jee );
+      cleanup_after_thread_exited ( jee, False );
       if (VG_(clo_trace_sched)) {
 	 VG_(sprintf)(msg_buf,
 		      "rendezvous with joinee %d.  %d resumes, %d exits.",
@@ -1784,19 +1435,21 @@
 
 /* Nuke all threads other than tid.  POSIX specifies that this should
    happen in __NR_exec, and after a __NR_fork() when I am the child,
-   as POSIX requires. */
+   as POSIX requires.  Also used at process exit time with
+   me==VG_INVALID_THREADID */
 void VG_(nuke_all_threads_except) ( ThreadId me )
 {
    ThreadId tid;
    for (tid = 1; tid < VG_N_THREADS; tid++) {
       if (tid == me
-          || VG_(threads)[tid].status == VgTs_Empty) 
+          || VG_(threads)[tid].status == VgTs_Empty)
          continue;
       if (0)
          VG_(printf)(
             "VG_(nuke_all_threads_except): nuking tid %d\n", tid);
+      VG_(proxy_delete)(tid, True);
       VG_(threads)[tid].status = VgTs_Empty;
-      cleanup_after_thread_exited( tid );
+      cleanup_after_thread_exited( tid, True );
    }
 }
 
@@ -1949,7 +1602,6 @@
 void do__set_or_get_detach ( ThreadId tid, 
                              Int what, ThreadId det )
 {
-   ThreadId i;
    Char     msg_buf[100];
    /* VG_(printf)("do__set_or_get_detach tid %d what %d det %d\n", 
       tid, what, det); */
@@ -1972,23 +1624,11 @@
       case 2: /* get */
          SET_PTHREQ_RETVAL(tid, VG_(threads)[det].detached ? 1 : 0);
          return;
-      case 1: /* set detached.  If someone is in a join-wait for det,
-                 do not detach. */
-         for (i = 1; i < VG_N_THREADS; i++) {
-            if (VG_(threads)[i].status == VgTs_WaitJoinee
-                && VG_(threads)[i].joiner_jee_tid == det) {
-               SET_PTHREQ_RETVAL(tid, 0);
-               if (VG_(clo_trace_sched)) {
-                  VG_(sprintf)(msg_buf,
-                     "tid %d not detached because %d in join-wait for it",
-                     det, i);
-                  print_sched_event(tid, msg_buf);
-               }
-               return;
-            }
-         }
+      case 1:
          VG_(threads)[det].detached = True;
          SET_PTHREQ_RETVAL(tid, 0); 
+	 /* wake anyone who was joining on us */
+	 maybe_rendezvous_joiners_and_joinees();
          return;
       case 0: /* set not detached */
          VG_(threads)[det].detached = False;
@@ -2018,12 +1658,15 @@
       }
       VG_(record_pthread_error)( tid, 
          "pthread_cancel: target thread does not exist, or invalid");
-      SET_PTHREQ_RETVAL(tid, -VKI_ESRCH);
+      SET_PTHREQ_RETVAL(tid, VKI_ESRCH);
       return;
    }
 
    VG_(threads)[cee].cancel_pend = cancelpend_hdlr;
 
+   /* interrupt a pending syscall */
+   VG_(proxy_abort_syscall)(cee);
+
    if (VG_(clo_trace_sched)) {
       VG_(sprintf)(msg_buf, 
          "set_cancelpend (hdlr = %p, set by tid %d)", 
@@ -2035,7 +1678,8 @@
    SET_PTHREQ_RETVAL(tid, 0);
 
    /* Perhaps we can nuke the cancellee right now? */
-   do__testcancel(cee);
+   if (!VG_(threads)[cee].cancel_ty) /* if PTHREAD_CANCEL_ASYNCHRONOUS */
+      do__testcancel(cee);
 }
 
 
@@ -2054,7 +1698,7 @@
       VG_(record_pthread_error)( tid, 
          "pthread_join: attempt to join to self");
       SET_PTHREQ_RETVAL(tid, EDEADLK); /* libc constant, not a kernel one */
-      VG_(threads)[tid].status = VgTs_Runnable;
+      vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
       return;
    }
 
@@ -2063,12 +1707,12 @@
    maybe_rendezvous_joiners_and_joinees();
 
    /* Is this a sane request? */
-   if ( ! VG_(is_valid_tid)(jee) ) {
+   if ( ! VG_(is_valid_tid)(jee) ||
+	VG_(threads)[jee].detached) {
       /* Invalid thread to join to. */
       VG_(record_pthread_error)( tid, 
-         "pthread_join: target thread does not exist, or invalid");
-      SET_PTHREQ_RETVAL(tid, EINVAL);
-      VG_(threads)[tid].status = VgTs_Runnable;
+         "pthread_join: target thread does not exist, invalid, or detached");
+      SET_PTHREQ_RETVAL(tid, VKI_EINVAL);
       return;
    }
 
@@ -2081,8 +1725,8 @@
          VG_(record_pthread_error)( tid, 
             "pthread_join: another thread already "
             "in join-wait for target thread");
-         SET_PTHREQ_RETVAL(tid, EINVAL);
-         VG_(threads)[tid].status = VgTs_Runnable;
+         SET_PTHREQ_RETVAL(tid, VKI_EINVAL);
+	 vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
          return;
       }
    }
@@ -2136,11 +1780,12 @@
    vg_assert(VG_(is_valid_tid)(tid));
    vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
    VG_(threads)[tid].status = VgTs_Empty; /* bye! */
-   cleanup_after_thread_exited ( tid );
+   cleanup_after_thread_exited ( tid, False );
    if (VG_(clo_trace_sched)) {
       VG_(sprintf)(msg_buf, "do__quit (detached thread exit)");
       print_sched_event(tid, msg_buf);
    }
+   maybe_rendezvous_joiners_and_joinees();
    /* Return value is irrelevant; this thread will not get
       rescheduled. */
 }
@@ -2264,7 +1909,12 @@
 
    /* We inherit our parent's signal mask. */
    VG_(threads)[tid].sig_mask = VG_(threads)[parent_tid].sig_mask;
-   VG_(ksigemptyset)(&VG_(threads)[tid].sigs_waited_for);
+
+   /* Now that the signal mask is set up, create a proxy LWP for this thread */
+   VG_(proxy_create)(tid);
+
+   /* Set the proxy's signal mask */
+   VG_(proxy_setsigmask)(tid);
 
    /* return child's tid to parent */
    SET_PTHREQ_RETVAL(parent_tid, tid); /* success */
@@ -2388,7 +2038,7 @@
    if (mutex == NULL) {
       VG_(record_pthread_error)( tid, 
          "pthread_mutex_lock/trylock: mutex is NULL");
-      SET_PTHREQ_RETVAL(tid, EINVAL);
+      SET_PTHREQ_RETVAL(tid, VKI_EINVAL);
       return;
    }
 
@@ -2408,7 +2058,7 @@
       default:
          VG_(record_pthread_error)( tid, 
             "pthread_mutex_lock/trylock: mutex is invalid");
-         SET_PTHREQ_RETVAL(tid, EINVAL);
+         SET_PTHREQ_RETVAL(tid, VKI_EINVAL);
          return;
    }
 
@@ -2492,7 +2142,7 @@
    if (mutex == NULL) {
       VG_(record_pthread_error)( tid, 
          "pthread_mutex_unlock: mutex is NULL");
-      SET_PTHREQ_RETVAL(tid, EINVAL);
+      SET_PTHREQ_RETVAL(tid, VKI_EINVAL);
       return;
    }
 
@@ -2520,7 +2170,7 @@
       default:
          VG_(record_pthread_error)( tid, 
             "pthread_mutex_unlock: mutex is invalid");
-         SET_PTHREQ_RETVAL(tid, EINVAL);
+         SET_PTHREQ_RETVAL(tid, VKI_EINVAL);
          return;
    }
 
@@ -2640,7 +2290,6 @@
             cv, mx );
          print_pthread_event(tid, msg_buf);
       }
-
    }
 }
 
@@ -2711,7 +2360,7 @@
          }
 
       }
-
+   
       n_to_release--;
    }
 }
@@ -2742,7 +2391,7 @@
    if (mutex == NULL || cond == NULL) {
       VG_(record_pthread_error)( tid, 
          "pthread_cond_wait/timedwait: cond or mutex is NULL");
-      SET_PTHREQ_RETVAL(tid, EINVAL);
+      SET_PTHREQ_RETVAL(tid, VKI_EINVAL);
       return;
    }
 
@@ -2762,7 +2411,7 @@
       default:
          VG_(record_pthread_error)( tid, 
             "pthread_cond_wait/timedwait: mutex is invalid");
-         SET_PTHREQ_RETVAL(tid, EINVAL);
+         SET_PTHREQ_RETVAL(tid, VKI_EINVAL);
          return;
    }
 
@@ -2772,7 +2421,7 @@
          VG_(record_pthread_error)( tid, 
             "pthread_cond_wait/timedwait: mutex is unlocked "
             "or is locked but not owned by thread");
-      SET_PTHREQ_RETVAL(tid, EINVAL);
+      SET_PTHREQ_RETVAL(tid, VKI_EINVAL);
       return;
    }
 
@@ -2781,6 +2430,8 @@
    VG_(threads)[tid].associated_cv = cond;
    VG_(threads)[tid].associated_mx = mutex;
    VG_(threads)[tid].awaken_at     = ms_end;
+   if (ms_end != 0xFFFFFFFF)
+      VG_(add_timeout)(tid, ms_end);
 
    if (VG_(clo_trace_pthread_level) >= 1) {
       VG_(sprintf)(msg_buf, 
@@ -2817,7 +2468,7 @@
    if (cond == NULL) {
       VG_(record_pthread_error)( tid, 
          "pthread_cond_signal/broadcast: cond is NULL");
-      SET_PTHREQ_RETVAL(tid, EINVAL);
+      SET_PTHREQ_RETVAL(tid, VKI_EINVAL);
       return;
    }
    
@@ -2893,11 +2544,10 @@
          break;
 
    if (i == VG_N_THREAD_KEYS) {
-      /* SET_PTHREQ_RETVAL(tid, EAGAIN); 
-         return; 
-      */
-      VG_(core_panic)("pthread_key_create: VG_N_THREAD_KEYS is too low;"
-                      " increase and recompile");
+      VG_(message)(Vg_UserMsg, "pthread_key_create() asked for too many keys (more than %d): increase VG_N_THREAD_KEYS and recompile Valgrind.",
+		   VG_N_THREAD_KEYS);
+      SET_PTHREQ_RETVAL(tid, EAGAIN); 
+      return; 
    }
 
    vg_thread_keys[i].inuse      = True;
@@ -2929,7 +2579,7 @@
    if (!is_valid_key(key)) {
       VG_(record_pthread_error)( tid, 
          "pthread_key_delete: key is invalid");
-      SET_PTHREQ_RETVAL(tid, EINVAL);
+      SET_PTHREQ_RETVAL(tid, VKI_EINVAL);
       return;
    }
 
@@ -2949,7 +2599,7 @@
    void** specifics_ptr;
    Char   msg_buf[100];
 
-   if (VG_(clo_trace_pthread_level) >= 1) {
+   if (VG_(clo_trace_pthread_level) >= 2) {
       VG_(sprintf)(msg_buf, "pthread_getspecific_ptr" );
       print_pthread_event(tid, msg_buf);
    }
@@ -2997,7 +2647,7 @@
                                   CleanupEntry* cu )
 {
    Char msg_buf[100];
-   if (VG_(clo_trace_pthread_level) >= 1) {
+   if (VG_(clo_trace_pthread_level) >= 2) {
       VG_(sprintf)(msg_buf, 
          "get_key_destr_and_arg (key = %d)", key );
       print_pthread_event(tid, msg_buf);
@@ -3072,33 +2722,6 @@
 
 
 static
-void do_sigwait ( ThreadId tid,
-                  vki_ksigset_t* set, 
-                  Int* sig )
-{
-   vki_ksigset_t irrelevant_sigmask;
-   Char          msg_buf[100];
-
-   if (VG_(clo_trace_signals) || VG_(clo_trace_sched)) {
-      VG_(sprintf)(msg_buf, 
-         "suspend due to sigwait(): set %p, sig %p",
-         set, sig );
-      print_pthread_event(tid, msg_buf);
-   }
-
-   vg_assert(VG_(is_valid_tid)(tid) 
-             && VG_(threads)[tid].status == VgTs_Runnable);
-
-   /* Change SCSS */
-   VG_(threads)[tid].sigs_waited_for = *set;
-   VG_(threads)[tid].status = VgTs_WaitSIG;
-
-   VG_(block_all_host_signals)( &irrelevant_sigmask );
-   VG_(handle_SCSS_change)( False /* lazy update */ );
-}
-
-
-static
 void do_pthread_kill ( ThreadId tid, /* me */
                        ThreadId thread, /* thread to signal */
                        Int sig )
@@ -3118,12 +2741,18 @@
    if (!VG_(is_valid_tid)(thread)) {
       VG_(record_pthread_error)( tid, 
          "pthread_kill: invalid target thread");
-      SET_PTHREQ_RETVAL(tid, -VKI_ESRCH);
+      SET_PTHREQ_RETVAL(tid, VKI_ESRCH);
+      return;
+   }
+
+   if (sig == 0) {
+      /* OK, signal 0 is just for testing */
+      SET_PTHREQ_RETVAL(tid, 0);
       return;
    }
 
    if (sig < 1 || sig > VKI_KNSIG) {
-      SET_PTHREQ_RETVAL(tid, -VKI_EINVAL);
+      SET_PTHREQ_RETVAL(tid, VKI_EINVAL);
       return;
    }
 
@@ -3352,10 +2981,6 @@
          do__testcancel ( tid );
          break;
 
-      case VG_USERREQ__GET_N_SIGS_RETURNED:
-         SET_PTHREQ_RETVAL(tid, VG_(threads)[tid].n_signals_returned);
-         break;
-
       case VG_USERREQ__PTHREAD_JOIN:
          do_pthread_join( tid, arg[1], (void**)(arg[2]) );
          break;
@@ -3416,12 +3041,6 @@
                               (vki_ksigset_t*)(arg[3]) );
 	 break;
 
-      case VG_USERREQ__SIGWAIT:
-         do_sigwait ( tid,
-                      (vki_ksigset_t*)(arg[1]),
-                      (Int*)(arg[2]) );
-	 break;
-
       case VG_USERREQ__PTHREAD_KILL:
          do_pthread_kill ( tid, arg[1], arg[2] );
 	 break;
@@ -3555,6 +3174,26 @@
    pthread_mutex_t* mx;
    pthread_cond_t*  cv;
    Int              i;
+   struct timeout*  top;
+   UInt		    lasttime = 0;
+
+   for(top = timeouts; top != NULL; top = top->next) {
+      vg_assert(top->time >= lasttime);
+      vg_assert(VG_(is_valid_or_empty_tid)(top->tid));
+
+#if 0
+      /* assert timeout entry is either stale, or associated with a
+	 thread in the right state
+	 
+	 XXX disable for now - can be stale, but times happen to match
+      */
+      vg_assert(VG_(threads)[top->tid].awaken_at != top->time ||
+		VG_(threads)[top->tid].status == VgTs_Sleeping ||
+		VG_(threads)[top->tid].status == VgTs_WaitCV);
+#endif
+
+      lasttime = top->time;
+   }
 
    /* VG_(printf)("scheduler_sanity\n"); */
    for (i = 1; i < VG_N_THREADS; i++) {
@@ -3604,15 +3243,6 @@
                "VG_PTHREAD_STACK_SIZE in vg_include.h and recompile.");
             VG_(exit)(1);
 	 }
-
-         if (VG_(threads)[i].status == VgTs_WaitSIG) {
-            vg_assert( ! VG_(kisemptysigset)(
-                            & VG_(threads)[i].sigs_waited_for) );
-	 } else {
-            vg_assert( VG_(kisemptysigset)(
-                          & VG_(threads)[i].sigs_waited_for) );
-	 }
-
       }
    }
 
diff --git a/coregrind/vg_signals.c b/coregrind/vg_signals.c
index e87bdfa..bdbd841 100644
--- a/coregrind/vg_signals.c
+++ b/coregrind/vg_signals.c
@@ -29,35 +29,66 @@
    The GNU General Public License is contained in the file COPYING.
 */
 
+/* 
+   New signal handling.
+
+   Now that all threads have a ProxyLWP to deal with signals for them,
+   we can use the kernel to do a lot more work for us.  The kernel
+   will deal with blocking signals, pending blocked signals, queues
+   and thread selection.  We just need to deal with setting a signal
+   handler and signal delivery.
+
+   In order to match the proper kernel signal semantics, the proxy LWP
+   which recieves a signal goes through an exchange of messages with
+   the scheduler LWP.  When the proxy first gets a signal, it
+   immediately blocks all signals and sends a message back to the
+   scheduler LWP.  It then enters a SigACK state, in which requests to
+   run system calls are ignored, and all signals remain blocked.  When
+   the scheduler gets the signal message, it sets up the thread to
+   enter its signal handler, and sends a SigACK message back to the
+   proxy, which includes the signal mask to be applied while running
+   the handler.  On recieving SigACK, the proxy sets the new signal
+   mask and reverts to its normal mode of operation. (All this is
+   implemented in vg_syscalls.c)
+
+   This protocol allows the application thread to take delivery of the
+   signal at some arbitary time after the signal was sent to the
+   process, while still getting proper signal delivery semantics (most
+   notably, getting the signal block sets right while running the
+   signal handler, and not allowing recursion where there wouldn't
+   have been normally).
+
+   Important point: the main LWP *always* has all signals blocked
+   except for SIGSEGV, SIGBUS, SIGFPE and SIGILL (ie, signals which
+   are synchronously changed .  If the kernel supports thread groups
+   with shared signal state (Linux 2.5+, RedHat's 2.4), then these are
+   the only signals it needs to handle.
+
+   If we get a synchronous signal, the details are placed into
+   VG_(unresumable_siginfo) and we longjmp back into the scheduler,
+   since we can't resume executing the client code.  The scheduler
+   immediately starts signal delivery to the thread which generated
+   the signal.
+
+   On older kernels without thread-groups, we need to poll the pending
+   signal with sigtimedwait() and farm any signals off to the
+   appropriate proxy LWP.
+ */
 
 #include "vg_include.h"
-#include "vg_unsafe.h"
-
-/* Sidestep the normal check which disallows using valgrind.h
-   directly. */
-#define __VALGRIND_SOMESKIN_H
-#include "valgrind.h"  /* for VALGRIND_MAGIC_SEQUENCE */
+#include <stddef.h>		/* OK, no library dependencies */
 
 /* Define to give more sanity checking for signals. */
 #define DEBUG_SIGNALS
 
 
-/* KNOWN BUGS 24 May 02:
-
-   - A signal is not masked in its own handler.  Neither are the
-     signals in the signal's blocking mask.
-
-   - There is only one pending set for the entire process, whereas
-     POSIX seems to require each thread have its own pending set.
-     This means that a signal can only be pending for one thread at
-     a time.
-
+/*
    - The following causes an infinite loop: start Hugs, Feb 2001 
      version, and do Control-C at the prompt.  There is an infinite
      series of sigints delivered (to the client); but also seemingly
      to valgrind, which is very strange.  I don't know why.
 
-   Probably a lot more bugs which I haven't discovered yet.
+     [I haven't re-tested this, but this is likely fixed - JSGF]
 */
 
 
@@ -65,13 +96,41 @@
    Forwards decls.
    ------------------------------------------------------------------ */
 
-static void vg_oursignalhandler ( Int sigNo );
+static void vg_sync_signalhandler  ( Int sigNo, vki_ksiginfo_t *info, struct vki_ucontext * );
+static void vg_async_signalhandler ( Int sigNo, vki_ksiginfo_t *info, struct vki_ucontext * );
+static void vg_babyeater	   ( Int sigNo, vki_ksiginfo_t *info, struct vki_ucontext * );
+static void proxy_sigvg_handler	   ( Int sigNo, vki_ksiginfo_t *info, struct vki_ucontext * );
 
+static Bool is_correct_sigmask(void);
+static const Char *signame(Int sigNo);
 
 /* ---------------------------------------------------------------------
    HIGH LEVEL STUFF TO DO WITH SIGNALS: POLICY (MOSTLY)
    ------------------------------------------------------------------ */
 
+/* If set to true, the currently running kernel doesn't do the right
+   thing with signals and LWPs, so we need to do our own. */
+Bool VG_(do_signal_routing) = False;
+
+/* Since we use a couple of RT signals, we need to handle allocating
+   the rest for application use. */
+Int VG_(sig_rtmin) = VKI_SIGRTUSERMIN;
+Int VG_(sig_rtmax) = VKI_SIGRTMAX;
+
+Int VG_(sig_alloc_rtsig)(Int high)
+{
+   Int ret;
+
+   if (VG_(sig_rtmin) >= VG_(sig_rtmax))
+      ret = -1;
+   else
+      ret = high ? VG_(sig_rtmin)++ : VG_(sig_rtmax)--;
+
+   vg_assert(ret >= VKI_SIGRTUSERMIN);
+
+   return ret;
+}
+
 /* ---------------------------------------------------------------------
    Signal state for this process.
    ------------------------------------------------------------------ */
@@ -130,11 +189,14 @@
      sigprocmask assigns to all thread masks
      so that at least everything is always consistent
    Flags:
+     SA_SIGINFO -- we always set it, and honour it for the client
      SA_NOCLDSTOP -- passed to kernel
      SA_ONESHOT or SA_RESETHAND -- required; abort if not set
-     SA_RESTART -- we observe this but set our handlers always to restart
-     SA_NOMASK or SA_NODEFER -- required to not be set; abort if set
+     SA_RESTART -- we observe this but set our handlers to always restart
+     SA_NOMASK or SA_NODEFER -- we observe this, but our handlers block everything
      SA_ONSTACK -- currently not supported; abort if set.
+     SA_NOCLDWAIT -- we observe this, but we never set it (doesn't quite 
+	work if client is blocked in a wait4() syscall)
 */
 
 
@@ -144,8 +206,7 @@
                               or ptr to our handler */
       UInt skss_flags;
       /* There is no skss_mask, since we know that we will always ask
-         for all signals to be blocked in our one-and-only
-         sighandler. */
+         for all signals to be blocked in our sighandlers. */
       /* Also there is no skss_restorer. */
    }
    SKSS_Per_Signal;
@@ -153,32 +214,17 @@
 typedef 
    struct {
       SKSS_Per_Signal skss_per_sig[1+VKI_KNSIG];
-      vki_ksigset_t skss_sigmask; /* process' blocked signal mask */   
    } 
    SKSS;
 
 static SKSS vg_skss;
 
+Bool VG_(is_sig_ign)(Int sigNo)
+{
+   vg_assert(sigNo >= 1 && sigNo <= VKI_KNSIG);
 
-/* -----------------------------------------------------
-   Dynamic client signal state (DCSS).  This holds transient
-   information about state of client signals.
-   -------------------------------------------------- */
-
-typedef 
-   struct {
-      /* True iff a signal has been received but not yet passed to
-         client. */
-      Bool dcss_sigpending[1+VKI_KNSIG];
-      /* If sigpending[] is True, has meaning: 
-         VG_INVALID_THREADID -- to be passed to any suitable thread 
-         other -- to be passed only to the specified thread. */
-      ThreadId dcss_destthread[1+VKI_KNSIG];
-   } 
-   DCSS;
-
-static DCSS vg_dcss;
-
+   return vg_scss.scss_per_sig[sigNo].scss_handler == VKI_SIG_IGN;
+}
 
 /* ---------------------------------------------------------------------
    Compute the SKSS required by the current SCSS.
@@ -195,76 +241,63 @@
                   vg_skss.skss_per_sig[sig].skss_flags );
 
    }
-   VG_(printf)("Global sigmask (63 .. 0) = 0x%x 0x%x\n",
-	       vg_skss.skss_sigmask.ws[1],
-	       vg_skss.skss_sigmask.ws[0] );
 }
 
-static __inline__
-Bool is_WaitSIGd_by_any_thread ( Int sig )
-{
-   ThreadId tid;
-   for (tid = 1; tid < VG_N_THREADS; tid++) {
-      if (VG_(threads)[tid].status != VgTs_WaitSIG) 
-         continue;
-      if (VG_(ksigismember)( &VG_(threads)[tid].sigs_waited_for, sig ))
-         return True;
-   }
-   return False;
-}
-
-static __inline__
-Bool is_blocked_by_all_threads ( Int sig )
-{
-   ThreadId tid;
-   for (tid = 1; tid < VG_N_THREADS; tid++) {
-      if (VG_(threads)[tid].status == VgTs_Empty) 
-         continue;
-      if (! VG_(ksigismember)( &VG_(threads)[tid].sig_mask, sig ))
-         return False;
-   }
-   return True;
-}
-
-
 /* This is the core, clever bit.  Computation is as follows:
 
    For each signal
       handler = if client has a handler, then our handler
-                else if is WaitSIG'd by any thread, then our handler
-                else if client is DFL, then DFL
-                else (client must be IGN) IGN
+                else if client is DFL, then our handler as well
+                else (client must be IGN)
+			if (signal == SIGCHLD), then handler is vg_babyeater
+			else IGN
 
-      blocked = if is blocked by all threads and not WaitSIG'd by
-                   any thread
-                then BLOCKED 
-                else UNBLOCKED
+   We don't really bother with blocking signals here, because the we
+   rely on the proxyLWP having set it as part of its kernel state.
 */
 static
 void calculate_SKSS_from_SCSS ( SKSS* dst )
 {
    Int   sig;
-   void* skss_handler;
-   void* scss_handler;
-   Bool  iz_WaitSIGd_by_any_thread;
-   Bool  iz_blocked_by_all_threads;
-   Bool  skss_blocked;
    UInt  scss_flags;
    UInt  skss_flags;
 
-   VG_(ksigemptyset)( &dst->skss_sigmask );
-
    for (sig = 1; sig <= VKI_KNSIG; sig++) {
-
-      /* Calculate kernel handler and blockedness for sig, as per rules
-         in above comment. */
-
-      iz_WaitSIGd_by_any_thread = is_WaitSIGd_by_any_thread(sig);
-      iz_blocked_by_all_threads = is_blocked_by_all_threads(sig);
-  
+      void *skss_handler;
+      void *scss_handler;
+      
       scss_handler = vg_scss.scss_per_sig[sig].scss_handler;
       scss_flags   = vg_scss.scss_per_sig[sig].scss_flags;
 
+      switch(sig) {
+      case VKI_SIGSEGV:
+      case VKI_SIGBUS:
+      case VKI_SIGFPE:
+      case VKI_SIGILL:
+	 /* For these, we always want to catch them and report, even
+	    if the client code doesn't. */
+	 skss_handler = vg_sync_signalhandler;
+	 break;
+
+      case VKI_SIGVGINT:
+      case VKI_SIGVGKILL:
+	 skss_handler = proxy_sigvg_handler;
+	 break;
+
+      case VKI_SIGCHLD:
+	 if (scss_handler == VKI_SIG_IGN) {
+	    skss_handler = vg_babyeater;
+	    break;
+	 }
+	 /* FALLTHROUGH */
+      default:
+	 if (scss_handler == VKI_SIG_IGN)
+	    skss_handler = VKI_SIG_IGN;
+	 else 
+	    skss_handler = vg_async_signalhandler;
+	 break;
+      }
+
       /* Restorer */
       /* 
       Doesn't seem like we can spin this one.
@@ -273,33 +306,19 @@
             ("sigactions with non-NULL .sa_restorer field");
       */
 
-      /* Handler */
-
-      if (scss_handler != VKI_SIG_DFL && scss_handler != VKI_SIG_IGN) {
-         skss_handler = &vg_oursignalhandler;
-      } else
-      if (iz_WaitSIGd_by_any_thread) {
-         skss_handler = &vg_oursignalhandler;
-      } else
-      if (scss_handler == VKI_SIG_DFL) {
-         skss_handler = VKI_SIG_DFL;
-      }
-      else {
-         vg_assert(scss_handler == VKI_SIG_IGN);
-         skss_handler = VKI_SIG_IGN;
-      }
-
-      /* Blockfulness */
-
-      skss_blocked
-         = iz_blocked_by_all_threads && !iz_WaitSIGd_by_any_thread;
-
       /* Flags */
 
       skss_flags = 0;
+
       /* SA_NOCLDSTOP: pass to kernel */
       if (scss_flags & VKI_SA_NOCLDSTOP)
          skss_flags |= VKI_SA_NOCLDSTOP;
+
+      /* SA_NOCLDWAIT - don't set */
+      /* XXX we could set this if we're not using wait() ourselves for
+	 tracking proxyLWPs (ie, have_futex is true in
+	 vg_syscalls.c. */
+
       /* SA_ONESHOT: ignore client setting */
       /*
       if (!(scss_flags & VKI_SA_ONESHOT))
@@ -308,16 +327,15 @@
       vg_assert(scss_flags & VKI_SA_ONESHOT);
       skss_flags |= VKI_SA_ONESHOT;
       */
-      /* SA_RESTART: ignore client setting and set for us */
+
+      /* SA_RESTART: ignore client setting and always set it for us
+	 (even though we never rely on the kernel to restart a
+	 syscall, we observe whether it wanted to restart the syscall
+	 or not, which guides our actions) */
       skss_flags |= VKI_SA_RESTART;
-      /* SA_NOMASK: not allowed */
-      /*
-      .. well, ignore it. 
-      if (scss_flags & VKI_SA_NOMASK)
-         VG_(unimplemented)
-            ("sigactions with SA_NOMASK");
-      vg_assert(!(scss_flags & VKI_SA_NOMASK));
-      */
+
+      /* SA_NOMASK: ignore it */
+
       /* SA_ONSTACK: client setting is irrelevant here */
       /*
       if (scss_flags & VKI_SA_ONSTACK)
@@ -328,11 +346,10 @@
       /* ... but WE ask for on-stack ourselves ... */
       skss_flags |= VKI_SA_ONSTACK;
 
-      /* Create SKSS entry for this signal. */
+      /* always ask for SA_SIGINFO */
+      skss_flags |= VKI_SA_SIGINFO;
 
-      if (skss_blocked
-          && sig != VKI_SIGKILL && sig != VKI_SIGSTOP)
-         VG_(ksigaddset)( &dst->skss_sigmask, sig );
+      /* Create SKSS entry for this signal. */
 
       if (sig != VKI_SIGKILL && sig != VKI_SIGSTOP)
          dst->skss_per_sig[sig].skss_handler = skss_handler;
@@ -347,8 +364,6 @@
              == VKI_SIG_DFL);
    vg_assert(dst->skss_per_sig[VKI_SIGSTOP].skss_handler 
              == VKI_SIG_DFL);
-   vg_assert(!VG_(ksigismember)( &dst->skss_sigmask, VKI_SIGKILL ));
-   vg_assert(!VG_(ksigismember)( &dst->skss_sigmask, VKI_SIGSTOP ));
 
    if (0)
       pp_SKSS();
@@ -359,27 +374,13 @@
    After a possible SCSS change, update SKSS and the kernel itself.
    ------------------------------------------------------------------ */
 
-/* IMPORTANT NOTE: to avoid race conditions, we must always enter here
-   with ALL KERNEL SIGNALS BLOCKED ! 
-*/
 void VG_(handle_SCSS_change) ( Bool force_update )
 {
    Int            res, sig;
    SKSS           skss_old;
    vki_ksigaction ksa, ksa_old;
 
-#  ifdef DEBUG_SIGNALS
-   vki_ksigset_t  test_sigmask;
-   res = VG_(ksigprocmask)( VKI_SIG_SETMASK /*irrelevant*/, 
-                            NULL, &test_sigmask );
-   vg_assert(res == 0);
-   /* The kernel never says that SIGKILL or SIGSTOP are masked. It is
-      correct! So we fake it here for the purposes only of
-      assertion. */
-   VG_(ksigaddset)( &test_sigmask, VKI_SIGKILL );
-   VG_(ksigaddset)( &test_sigmask, VKI_SIGSTOP );
-   vg_assert(VG_(kisfullsigset)( &test_sigmask ));
-#  endif
+   vg_assert(is_correct_sigmask());
 
    /* Remember old SKSS and calculate new one. */
    skss_old = vg_skss;
@@ -394,24 +395,6 @@
       if (sig == VKI_SIGKILL || sig == VKI_SIGSTOP)
          continue;
 
-      /* Aside: take the opportunity to clean up DCSS: forget about any
-         pending signals directed at dead threads. */
-      if (vg_dcss.dcss_sigpending[sig] 
-          && vg_dcss.dcss_destthread[sig] != VG_INVALID_THREADID) {
-         ThreadId tid = vg_dcss.dcss_destthread[sig];
-         vg_assert(VG_(is_valid_or_empty_tid)(tid));
-         if (VG_(threads)[tid].status == VgTs_Empty) {
-            vg_dcss.dcss_sigpending[sig] = False;
-            vg_dcss.dcss_destthread[sig] = VG_INVALID_THREADID;
-            if (VG_(clo_trace_signals)) 
-               VG_(message)(Vg_DebugMsg, 
-                   "discarding pending signal %d due to thread %d exiting",
-                   sig, tid );
-         }
-      }
-
-      /* End of the Aside.  Now the Main Business. */
-
       if (!force_update) {
          if ((skss_old.skss_per_sig[sig].skss_handler
               == vg_skss.skss_per_sig[sig].skss_handler)
@@ -456,18 +439,6 @@
          vg_assert(VG_(kisfullsigset)( &ksa_old.ksa_mask ));
       }
    }
-
-   /* Just set the new sigmask, even if it's no different from the
-      old, since we have to do this anyway, to unblock the host
-      signals. */
-   if (VG_(clo_trace_signals)) 
-      VG_(message)(Vg_DebugMsg, 
-         "setting kmask(63..0) to 0x%x 0x%x",
-         vg_skss.skss_sigmask.ws[1], 
-         vg_skss.skss_sigmask.ws[0] 
-      );
-
-   VG_(restore_all_host_signals)( &vg_skss.skss_sigmask );
 }
 
 
@@ -513,7 +484,7 @@
    if (oss != NULL) {
       oss->ss_sp    = vg_scss.altstack.ss_sp;
       oss->ss_size  = vg_scss.altstack.ss_size;
-      oss->ss_flags = sas_ss_flags(m_esp);
+      oss->ss_flags = vg_scss.altstack.ss_flags | sas_ss_flags(m_esp);
    }
 
    if (ss != NULL) {
@@ -528,16 +499,17 @@
          return;
       }
       if (ss->ss_flags == VKI_SS_DISABLE) {
-         vg_scss.altstack.ss_size = 0;
-         vg_scss.altstack.ss_sp = NULL;
+         vg_scss.altstack.ss_flags = VKI_SS_DISABLE;
       } else {
          if (ss->ss_size < VKI_MINSIGSTKSZ) {
             SET_SYSCALL_RETVAL(tid, -VKI_ENOMEM);
             return;
          }
+
+	 vg_scss.altstack.ss_sp   = ss->ss_sp;
+	 vg_scss.altstack.ss_size = ss->ss_size;
+	 vg_scss.altstack.ss_flags = 0;
       }
-      vg_scss.altstack.ss_sp   = ss->ss_sp;
-      vg_scss.altstack.ss_size = ss->ss_size;
    }
    SET_SYSCALL_RETVAL(tid, 0);
 }
@@ -548,7 +520,8 @@
    Int              signo;
    vki_ksigaction*  new_act;
    vki_ksigaction*  old_act;
-   vki_ksigset_t    irrelevant_sigmask;
+
+   vg_assert(is_correct_sigmask());
 
    vg_assert(VG_(is_valid_tid)(tid));
    signo     = VG_(threads)[tid].m_ebx; /* int sigNo */
@@ -569,6 +542,12 @@
    /* Reject out-of-range signal numbers. */
    if (signo < 1 || signo > VKI_KNSIG) goto bad_signo;
 
+   /* don't let them use our signals */
+   if ( (signo == VKI_SIGVGINT || signo == VKI_SIGVGKILL)
+	&& new_act
+	&& !(new_act->ksa_handler == VKI_SIG_DFL || new_act->ksa_handler == VKI_SIG_IGN) )
+      goto bad_signo;
+
    /* Reject attempts to set a handler (or set ignore) for SIGKILL. */
    if ( (signo == VKI_SIGKILL || signo == VKI_SIGSTOP)
        && new_act
@@ -594,7 +573,6 @@
 
    /* All happy bunnies ... */
    if (new_act) {
-      VG_(block_all_host_signals)( &irrelevant_sigmask );
       VG_(handle_SCSS_change)( False /* lazy update */ );
    }
    SET_SYSCALL_RETVAL(tid, 0);
@@ -611,8 +589,8 @@
   bad_sigkill_or_sigstop:
    if (VG_(needs).core_errors && VG_(clo_verbosity) >= 1)
       VG_(message)(Vg_UserMsg,
-         "Warning: attempt to set %s handler in __NR_sigaction.", 
-         signo == VKI_SIGKILL ? "SIGKILL" : "SIGSTOP" );
+		   "Warning: attempt to set %s handler in __NR_sigaction.", 
+		   signame(signo));
 
    SET_SYSCALL_RETVAL(tid, -VKI_EINVAL);
    return;
@@ -640,12 +618,9 @@
    }
 }
 
-/* Handle blocking mask set/get uniformly for threads and process as a
-   whole.  If tid==VG_INVALID_THREADID, this is really
-   __NR_sigprocmask, in which case we set the masks for all threads to
-   the "set" and return in "oldset" that from the root thread (1).
-   Otherwise, tid will denote a valid thread, in which case we just
-   set/get its mask.
+/* 
+   This updates the thread's signal mask.  There's no such thing as a
+   process-wide signal mask.
 
    Note that the thread signal masks are an implicit part of SCSS,
    which is why this routine is allowed to mess with them.  
@@ -656,50 +631,35 @@
                   vki_ksigset_t* newset,
 		  vki_ksigset_t* oldset )
 {
-   vki_ksigset_t irrelevant_sigmask;
+   vg_assert(is_correct_sigmask());
 
    if (VG_(clo_trace_signals))
       VG_(message)(Vg_DebugExtraMsg, 
-         "do_setmask: tid = %d (%d means ALL), how = %d (%s), set = %p", 
-         tid,
-         VG_INVALID_THREADID,
-         how,
-         how==VKI_SIG_BLOCK ? "SIG_BLOCK" : (
-            how==VKI_SIG_UNBLOCK ? "SIG_UNBLOCK" : (
-            how==VKI_SIG_SETMASK ? "SIG_SETMASK" : "???")),
-         newset
-      );
+		   "do_setmask: tid = %d how = %d (%s), set = %p %08x%08x", 
+		   tid, how,
+		   how==VKI_SIG_BLOCK ? "SIG_BLOCK" : (
+		      how==VKI_SIG_UNBLOCK ? "SIG_UNBLOCK" : (
+			 how==VKI_SIG_SETMASK ? "SIG_SETMASK" : "???")),
+		   newset, newset ? newset->ws[1] : 0, newset ? newset->ws[0] : 0
+	 );
 
-   if (tid == VG_INVALID_THREADID) {
-      /* Behave as if __NR_sigprocmask. */
-      if (oldset) {
-         /* A bit fragile.  Should do better here really. */
-         vg_assert(VG_(threads)[1].status != VgTs_Empty);
-         *oldset = VG_(threads)[1].sig_mask;
-      }
-      if (newset) {
-        ThreadId tidd;
-        for (tidd = 1; tidd < VG_N_THREADS; tidd++) {
-            if (VG_(threads)[tidd].status == VgTs_Empty) 
-               continue;
-            do_sigprocmask_bitops ( 
-               how, &VG_(threads)[tidd].sig_mask, newset );
-         }
-      }
-   } else {
-      /* Just do this thread. */
-      vg_assert(VG_(is_valid_tid)(tid));
-      if (oldset)
-         *oldset = VG_(threads)[tid].sig_mask;
-      if (newset)
-         do_sigprocmask_bitops ( 
-            how, &VG_(threads)[tid].sig_mask, newset );
+   /* Just do this thread. */
+   vg_assert(VG_(is_valid_tid)(tid));
+   if (oldset) {
+      *oldset = VG_(threads)[tid].eff_sig_mask;
+      if (VG_(clo_trace_signals))
+	      VG_(message)(Vg_DebugExtraMsg, 
+			   "\toldset=%p %08x%08x",
+			   oldset, oldset->ws[1], oldset->ws[0]);
    }
-
    if (newset) {
-      VG_(block_all_host_signals)( &irrelevant_sigmask );
-      VG_(handle_SCSS_change)( False /* lazy update */ );
+      do_sigprocmask_bitops (how, &VG_(threads)[tid].sig_mask, newset );
+      VG_(ksigdelset)(&VG_(threads)[tid].sig_mask, VKI_SIGKILL);
+      VG_(ksigdelset)(&VG_(threads)[tid].sig_mask, VKI_SIGSTOP);
+      VG_(proxy_setsigmask)(tid);
    }
+
+   vg_assert(is_correct_sigmask());
 }
 
 
@@ -708,16 +668,26 @@
                                vki_ksigset_t* set,
                                vki_ksigset_t* oldset )
 {
-   if (how == VKI_SIG_BLOCK || how == VKI_SIG_UNBLOCK 
-                            || how == VKI_SIG_SETMASK) {
+   switch(how) {
+   case VKI_SIG_BLOCK:
+   case VKI_SIG_UNBLOCK:
+   case VKI_SIG_SETMASK:
       vg_assert(VG_(is_valid_tid)(tid));
-      do_setmask ( VG_INVALID_THREADID, how, set, oldset );
-      /* Syscall returns 0 (success) to its thread. */
+      /* Syscall returns 0 (success) to its thread. Set this up before
+	 calling do_setmask() because we may get a signal as part of
+	 setting the mask, which will confuse things.
+       */
       SET_SYSCALL_RETVAL(tid, 0);
-   } else {
+      do_setmask ( tid, how, set, oldset );
+
+      VG_(route_signals)();	/* if we're routing, do something before returning */
+      break;
+
+   default:
       VG_(message)(Vg_DebugMsg, 
                   "sigprocmask: unknown `how' field %d", how);
       SET_SYSCALL_RETVAL(tid, -VKI_EINVAL);
+      break;
    }
 }
 
@@ -738,106 +708,15 @@
 
 void VG_(send_signal_to_thread) ( ThreadId thread, Int sig )
 {
-   Int res;
+   ThreadState *tst;
+
    vg_assert(VG_(is_valid_tid)(thread));
    vg_assert(sig >= 1 && sig <= VKI_KNSIG);
    
-   switch ((UInt)(vg_scss.scss_per_sig[sig].scss_handler)) {
+   tst = VG_(get_ThreadState)(thread);
+   vg_assert(tst->proxy != NULL);
 
-      case ((UInt)VKI_SIG_IGN):
-         if (VG_(clo_trace_signals)) 
-            VG_(message)(Vg_DebugMsg, 
-               "send_signal %d to_thread %d: IGN, ignored", sig, thread );
-         break;
-
-      case ((UInt)VKI_SIG_DFL):
-         /* This is the tricky case.  Since we don't handle default
-            actions, the simple thing is to send someone round to the
-            front door and signal there.  Then the kernel will do
-            whatever it does with the default action. */
-         res = VG_(kkill)( VG_(getpid)(), sig );
-         vg_assert(res == 0);
-         break;
-
-      default:
-         if (!vg_dcss.dcss_sigpending[sig]) {
-            vg_dcss.dcss_sigpending[sig] = True;
-            vg_dcss.dcss_destthread[sig] = thread;
-            if (VG_(clo_trace_signals)) 
-               VG_(message)(Vg_DebugMsg, 
-                  "send_signal %d to_thread %d: now pending", sig, thread );
-         } else {
-            if (vg_dcss.dcss_destthread[sig] == thread) {
-               if (VG_(clo_trace_signals)) 
-                  VG_(message)(Vg_DebugMsg, 
-                     "send_signal %d to_thread %d: already pending ... "
-                     "discarded", sig, thread );
-            } else {
-               if (VG_(clo_trace_signals)) 
-                  VG_(message)(Vg_DebugMsg, 
-                     "send_signal %d to_thread %d: was pending for %d, "
-                     "now pending for %d",
-                     sig, thread, vg_dcss.dcss_destthread[sig], thread );
-               vg_dcss.dcss_destthread[sig] = thread;
-            }
-         }
-   }    
-}
-
-
-/* Store in set the signals which could be delivered to this thread
-   right now (since they are pending) but cannot be, because the
-   thread has masked them out. */
-void VG_(do_sigpending) ( ThreadId tid, vki_ksigset_t* set )
-{
-   Int           sig, res;
-   Bool          maybe_pend;
-   vki_ksigset_t process_pending;
-
-   /* Get the set of signals which are pending for the process as a
-      whole. */
-   res = VG_(ksigpending)( &process_pending );
-   vg_assert(res == 0);
-
-   VG_(ksigemptyset)(set);
-   for (sig = 1; sig <= VKI_KNSIG; sig++) {
-
-      /* Figure out if the signal could be pending for this thread.
-         There are two cases. */
-      maybe_pend = False;
-
-      /* Case 1: perhaps the signal is pending for the process as a
-         whole -- that is, is blocked even valgrind's signal
-         handler. */
-      if (VG_(ksigismember)( &process_pending, sig ))
-         maybe_pend = True;
-
-      /* Case 2: the signal has been collected by our handler and is
-         now awaiting disposition inside valgrind. */
-      if (/* is it pending at all? */
-          vg_dcss.dcss_sigpending[sig]
-          && 
-	  /* check it is not specifically directed to some other thread */
-          (vg_dcss.dcss_destthread[sig] == VG_INVALID_THREADID
-           || vg_dcss.dcss_destthread[sig] == tid)
-         )
-         maybe_pend = True;
-
-      if (!maybe_pend)
-         continue; /* this signal just ain't pending! */
-
-      /* Check other necessary conditions now ... */
-
-      if (VG_(ksigismember)( &VG_(threads)[tid].sigs_waited_for, sig ))
-         continue; /* tid is sigwaiting for sig, so will never be
-                      offered to a handler */
-      if (! VG_(ksigismember)( &VG_(threads)[tid].sig_mask, sig ))
-         continue; /* not blocked in this thread */
-
-      /* Ok, sig could be delivered to this thread if only it wasn't
-         masked out.  So we add it to set. */
-      VG_(ksigaddset)( set, sig );
-   }
+   VG_(proxy_sendsig)(thread, sig);
 }
 
 
@@ -868,6 +747,60 @@
    vg_assert(ret == 0);
 }
 
+/* Sanity check - check the scheduler LWP has all the signals blocked
+   it is supposed to have blocked. */
+static Bool is_correct_sigmask(void)
+{
+   vki_ksigset_t mask;
+   Bool ret = True;
+
+   vg_assert(VG_(gettid)() == VG_(main_pid));
+
+#ifdef DEBUG_SIGNALS
+   VG_(ksigprocmask)(VKI_SIG_SETMASK, NULL, &mask);
+
+   /* unresumable signals */
+   
+   ret = ret && !VG_(ksigismember)(&mask, VKI_SIGSEGV);
+   VG_(ksigaddset)(&mask, VKI_SIGSEGV);
+
+   ret = ret && !VG_(ksigismember)(&mask, VKI_SIGBUS);
+   VG_(ksigaddset)(&mask, VKI_SIGBUS);
+
+   ret = ret && !VG_(ksigismember)(&mask, VKI_SIGFPE);
+   VG_(ksigaddset)(&mask, VKI_SIGFPE);
+
+   ret = ret && !VG_(ksigismember)(&mask, VKI_SIGILL);
+   VG_(ksigaddset)(&mask, VKI_SIGILL);
+
+   /* unblockable signals (doesn't really matter if these are
+      already present) */
+   VG_(ksigaddset)(&mask, VKI_SIGSTOP);
+   VG_(ksigaddset)(&mask, VKI_SIGKILL);
+
+   ret = ret && VG_(kisfullsigset)(&mask);
+#endif /* DEBUG_SIGNALS */
+
+   return ret;
+}
+
+/* Set the signal mask for the scheduer LWP; this should be set once
+   and left that way - all async signal handling is done in the proxy
+   LWPs. */
+static void set_main_sigmask(void)
+{
+   vki_ksigset_t mask;
+
+   VG_(ksigfillset)(&mask);
+   VG_(ksigdelset)(&mask, VKI_SIGSEGV);
+   VG_(ksigdelset)(&mask, VKI_SIGBUS);
+   VG_(ksigdelset)(&mask, VKI_SIGFPE);
+   VG_(ksigdelset)(&mask, VKI_SIGILL);
+
+   VG_(ksigprocmask)(VKI_SIG_SETMASK, &mask, NULL);
+
+   vg_assert(is_correct_sigmask());
+}
 
 /* ---------------------------------------------------------------------
    The signal simulation proper.  A simplified version of what the 
@@ -879,11 +812,14 @@
 
 typedef
    struct {
-      /* These 4 are parameters to the signal handler.  The order of
-         them is important, since this whole struct is pushed onto the
-         client's stack at delivery time.  The first 4 words -- which
-         will be at the top of the stack -- constitute 4 arg words to
-         the handler. */
+      /* There are two different stack frame formats, depending on
+	 whether the client set the SA_SIGINFO flag for the handler.
+	 This structure is put onto the client's stack as part of
+	 signal delivery, and therefore appears as the signal
+	 handler's arguments.
+
+	 The first two words are common for both frame formats -
+	 they're the return address and the signal number. */
 
       /* Sig handler's (bogus) return address */
       Addr retaddr;
@@ -892,15 +828,28 @@
          handler won't change it.  So we keep a second copy of it in
          sigNo_private. */
       Int  sigNo;
-      /* ptr to siginfo_t; NULL for now. */
-      Addr psigInfo;
-      /* ptr to ucontext; NULL for now. */
-      Addr puContext;
+
+      /* This is where the two frames start differing. */
+      union {
+	 struct {		/* set SA_SIGINFO */
+	    /* ptr to siginfo_t. */
+	    Addr psigInfo;
+
+	    /* ptr to ucontext */
+	    Addr puContext;
+	 } sigInfo;
+	 struct vki_sigcontext sigContext; /* did not set SA_SIGINFO */
+      } handlerArgs;
 
       /* The rest are private fields which the handler is unaware of. */
 
       /* Sanity check word. */
       UInt magicPI;
+      /* pointed to by psigInfo */
+      vki_ksiginfo_t sigInfo;
+      /* pointed to by puContext */
+      struct vki_ucontext uContext;
+
       /* Safely-saved version of sigNo, as described above. */
       Int  sigNo_private;
       /* Saved processor state. */
@@ -927,6 +876,9 @@
       UInt sh_esp;
       UInt sh_eflags;
 
+      /* saved signal mask to be restored when handler returns */
+      vki_ksigset_t	mask;
+
       /* Scheduler-private stuff: what was the thread's status prior to
          delivering this signal? */
       ThreadStatus status;
@@ -937,24 +889,69 @@
    VgSigFrame;
 
 
+/* Make up a plausible-looking thread state from the thread's current state */
+static void synth_ucontext(ThreadId tid, const vki_ksiginfo_t *si, 
+			   const vki_ksigset_t *set, struct vki_ucontext *uc)
+{
+   ThreadState *tst = VG_(get_ThreadState)(tid);
+   struct vki_sigcontext *sc = &uc->uc_mcontext;
+
+   VG_(memset)(uc, 0, sizeof(*uc));
+
+   uc->uc_flags = 0;
+   uc->uc_link = 0;
+   uc->uc_sigmask = *set;
+   uc->uc_stack = vg_scss.altstack;
+
+#define SC(reg)	sc->reg = tst->m_##reg
+   SC(gs);
+   SC(fs);
+   SC(es);
+   SC(ds);
+
+   SC(edi);
+   SC(esi);
+   SC(ebp);
+   SC(esp);
+   SC(ebx);
+   SC(edx);
+   SC(ecx);
+   SC(eax);
+
+   SC(eip);
+   SC(cs);
+   SC(eflags);
+   SC(ss);
+   /* XXX esp_at_signal */
+   /* XXX trapno */
+   /* XXX err */
+#undef SC
+
+   sc->cr2 = (UInt)si->_sifields._sigfault._addr;
+}
 
 /* Set up a stack frame (VgSigContext) for the client's signal
    handler.  This includes the signal number and a bogus return
    address.  */
 static
-void vg_push_signal_frame ( ThreadId tid, int sigNo )
+void vg_push_signal_frame ( ThreadId tid, const vki_ksiginfo_t *siginfo )
 {
    Int          i;
    Addr         esp, esp_top_of_frame;
    VgSigFrame*  frame;
    ThreadState* tst;
+   Int		sigNo = siginfo->si_signo;
 
    vg_assert(sigNo >= 1 && sigNo <= VKI_KNSIG);
    vg_assert(VG_(is_valid_tid)(tid));
    tst = & VG_(threads)[tid];
 
+   if (VG_(clo_trace_signals))
+      VG_(message)(Vg_DebugMsg, 
+         "vg_push_signal_frame (thread %d): signal %d", tid, sigNo);
+
    if (/* this signal asked to run on an alt stack */
-       (vg_scss.scss_per_sig[sigNo].scss_flags & VKI_SA_ONSTACK)
+       (vg_scss.scss_per_sig[sigNo].scss_flags & VKI_SA_ONSTACK )
        && /* there is a defined and enabled alt stack, which we're not
              already using.  Logic from get_sigframe in
              arch/i386/kernel/signal.c. */
@@ -964,8 +961,8 @@
          = (Addr)(vg_scss.altstack.ss_sp) + vg_scss.altstack.ss_size;
       if (VG_(clo_trace_signals))
          VG_(message)(Vg_DebugMsg,
-            "delivering signal %d to thread %d: on ALT STACK", 
-            sigNo, tid );
+            "delivering signal %d (%s) to thread %d: on ALT STACK", 
+            sigNo, signame(sigNo), tid );
 
       /* Signal delivery to skins */
       VG_TRACK( pre_deliver_signal, tid, sigNo, /*alt_stack*/False );
@@ -992,12 +989,46 @@
 
    /* retaddr, sigNo, psigInfo, puContext fields are to be written */
    VG_TRACK( pre_mem_write, Vg_CoreSignal, tid, "signal handler frame", 
-                            (Addr)esp, 16 );
+                            (Addr)frame, offsetof(VgSigFrame, handlerArgs) );
    frame->retaddr    = (UInt)(&VG_(signalreturn_bogusRA));
    frame->sigNo      = sigNo;
    frame->sigNo_private = sigNo;
-   frame->psigInfo   = (Addr)NULL;
-   frame->puContext  = (Addr)NULL;
+   VG_TRACK( post_mem_write, (Addr)frame, offsetof(VgSigFrame, handlerArgs) );
+
+   if (vg_scss.scss_per_sig[sigNo].scss_flags & VKI_SA_SIGINFO) {
+      /* if the client asked for a siginfo delivery, then build the stack that way */
+      VG_TRACK( pre_mem_write, Vg_CoreSignal, tid, "signal handler frame (siginfo)", 
+		(Addr)&frame->handlerArgs, sizeof(frame->handlerArgs.sigInfo) );
+      frame->handlerArgs.sigInfo.psigInfo   = (Addr)&frame->sigInfo;
+      frame->handlerArgs.sigInfo.puContext = (Addr)&frame->uContext;
+      VG_TRACK( post_mem_write, (Addr)&frame->handlerArgs, sizeof(frame->handlerArgs.sigInfo) );
+
+      VG_TRACK( pre_mem_write, Vg_CoreSignal, tid, "signal handler frame (siginfo)", 
+		(Addr)&frame->sigInfo, sizeof(frame->sigInfo) );
+      VG_(memcpy)(&frame->sigInfo, siginfo, sizeof(vki_ksiginfo_t));
+      VG_TRACK( post_mem_write, (Addr)&frame->sigInfo, sizeof(frame->sigInfo) );
+
+      VG_TRACK( pre_mem_write, Vg_CoreSignal, tid, "signal handler frame (siginfo)", 
+		(Addr)&frame->uContext, sizeof(frame->uContext) );
+      synth_ucontext(tid, siginfo, &vg_scss.scss_per_sig[sigNo].scss_mask, &frame->uContext);
+      VG_TRACK( post_mem_write, (Addr)&frame->uContext, sizeof(frame->uContext) );
+   } else {
+      struct vki_ucontext uc;
+
+      /* otherwise just put the sigcontext there */
+
+      synth_ucontext(tid, siginfo, &vg_scss.scss_per_sig[sigNo].scss_mask, &uc);
+
+      VG_TRACK( pre_mem_write, Vg_CoreSignal, tid, "signal handler frame (sigcontext)", 
+		(Addr)&frame->handlerArgs, sizeof(frame->handlerArgs.sigContext) );
+      VG_(memcpy)(&frame->handlerArgs.sigContext, &uc.uc_mcontext, 
+		  sizeof(struct vki_sigcontext));
+      VG_TRACK( post_mem_write, (Addr)&frame->handlerArgs, 
+		sizeof(frame->handlerArgs.sigContext) );
+      
+      frame->handlerArgs.sigContext.oldmask = tst->sig_mask.ws[0];
+   }
+
    frame->magicPI    = 0x31415927;
 
    for (i = 0; i < VG_SIZE_OF_SSESTATE_W; i++)
@@ -1026,7 +1057,14 @@
       frame->sh_eflags  = tst->sh_eflags;
    }
 
-   frame->status     = tst->status;
+   frame->mask = tst->sig_mask;
+
+   /* If the thread is currently blocked in a syscall, we want it to
+      resume as runnable. */
+   if (tst->status == VgTs_WaitSys)
+      frame->status = VgTs_Runnable;
+   else
+      frame->status = tst->status;
 
    frame->magicE     = 0x27182818;
 
@@ -1040,14 +1078,9 @@
    /* This thread needs to be marked runnable, but we leave that the
       caller to do. */
 
-   /* retaddr, sigNo, psigInfo, puContext fields have been written -- 
-      at 0(%ESP) .. 12(%ESP) */
-   VG_TRACK( post_mem_write, (Addr)esp, 16 );
-
-   /* 
-   VG_(printf)("pushed signal frame; %%ESP now = %p, next %%EBP = %p\n", 
-               esp, tst->m_eip);
-   */
+   if (0)
+      VG_(printf)("pushed signal frame; %%ESP now = %p, next %%EBP = %p, status=%d\n", 
+		  esp, tst->m_eip, tst->status);
 }
 
 /* Clear the signal frame created by vg_push_signal_frame, restore the
@@ -1074,7 +1107,7 @@
    vg_assert(frame->magicE  == 0x27182818);
    if (VG_(clo_trace_signals))
       VG_(message)(Vg_DebugMsg, 
-         "vg_pop_signal_frame (thread %d): valid magic", tid);
+         "vg_pop_signal_frame (thread %d): valid magic; EIP=%p", tid, frame->m_eip);
 
    /* Mark the frame structure as nonaccessible. */
    VG_TRACK( die_mem_stack_signal, (Addr)frame, sizeof(VgSigFrame) );
@@ -1114,6 +1147,9 @@
       was delivered. */
    tst->status    = frame->status;
 
+   tst->sig_mask  = frame->mask;
+   VG_(proxy_setsigmask)(tid);
+
    /* Notify skins */
    VG_TRACK( post_deliver_signal, tid, sigNo );
 
@@ -1129,10 +1165,6 @@
 Bool VG_(signal_returns) ( ThreadId tid )
 {
    Int            sigNo;
-   vki_ksigset_t  saved_procmask;
-
-   /* Block host signals ... */
-   VG_(block_all_host_signals)( &saved_procmask );
 
    /* Pop the signal frame and restore tid's status to what it was
       before the signal was delivered. */
@@ -1140,207 +1172,401 @@
 
    vg_assert(sigNo >= 1 && sigNo <= VKI_KNSIG);
 
-   /* Unlock and return. */
-   VG_(restore_all_host_signals)( &saved_procmask );
-
    /* Scheduler now can resume this thread, or perhaps some other.
       Tell the scheduler whether or not any syscall interrupted by
-      this signal should be restarted, if possible, or no. */
+      this signal should be restarted, if possible, or no.  This is
+      only used for nanosleep; all other blocking syscalls are handled
+      in VG_(deliver_signal)().
+   */
    return 
       (vg_scss.scss_per_sig[sigNo].scss_flags & VKI_SA_RESTART)
          ? True 
          : False;
 }
 
-
-/* Deliver all pending signals, by building stack frames for their
-   handlers.  Return True if any signals were delivered. */
-Bool VG_(deliver_signals) ( void )
+static const Char *signame(Int sigNo)
 {
-   vki_ksigset_t  saved_procmask;
-   Int            sigNo;
-   Bool           found, scss_changed;
-   ThreadState*   tst;
-   ThreadId       tid;
+   static Char buf[10];
 
-   /* A cheap check.  We don't need to have exclusive access to the
-      pending array, because in the worst case, vg_oursignalhandler
-      will add signals, causing us to return, thinking there are no
-      signals to deliver, when in fact there are some.  A subsequent
-      call here will handle the signal(s) we missed.  */
-   found = False;
-   for (sigNo = 1; sigNo <= VKI_KNSIG; sigNo++)
-      if (vg_dcss.dcss_sigpending[sigNo])
-         found = True;
+   switch(sigNo) {
+#define S(x)	case VKI_##x: return #x
+      S(SIGHUP);
+      S(SIGINT);
+      S(SIGQUIT);
+      S(SIGILL);
+      S(SIGTRAP);
+      S(SIGABRT);
+      S(SIGBUS);
+      S(SIGFPE);
+      S(SIGKILL);
+      S(SIGUSR1);
+      S(SIGUSR2);
+      S(SIGSEGV);
+      S(SIGPIPE);
+      S(SIGALRM);
+      S(SIGTERM);
+      S(SIGSTKFLT);
+      S(SIGCHLD);
+      S(SIGCONT);
+      S(SIGSTOP);
+      S(SIGTSTP);
+      S(SIGTTIN);
+      S(SIGTTOU);
+      S(SIGURG);
+      S(SIGXCPU);
+      S(SIGXFSZ);
+      S(SIGVTALRM);
+      S(SIGPROF);
+      S(SIGWINCH);
+      S(SIGIO);
+      S(SIGPWR);
+      S(SIGUNUSED);
+#undef S
 
-   if (!found) return False;
+   case VKI_SIGRTMIN ... VKI_SIGRTMAX:
+      VG_(sprintf)(buf, "SIGRT%d", sigNo);
+      return buf;
 
-   /* Now we have to do it properly.  Get exclusive access by
-      blocking all the host's signals.  That means vg_oursignalhandler
-      can't run whilst we are messing with stuff.
-   */
-   scss_changed = False;
-   VG_(block_all_host_signals)( &saved_procmask );
+   default:
+      VG_(sprintf)(buf, "SIG%d", sigNo);
+      return buf;
+   }
+}
 
-   /* Look for signals to deliver ... */
-   for (sigNo = 1; sigNo <= VKI_KNSIG; sigNo++) {
+/* Hit ourselves with a signal using the default handler */
+void VG_(kill_self)(Int sigNo)
+{
+   vki_ksigset_t	mask, origmask;
+   vki_ksigaction	sa, origsa;   
 
-      if (!vg_dcss.dcss_sigpending[sigNo])
-         continue;
-
-      /* sigNo is pending.  Try to find a suitable thread to deliver
-         it to. */
-      /* First off, are any threads in sigwait() for the signal? 
-         If so just give to one of them and have done. */
-      for (tid = 1; tid < VG_N_THREADS; tid++) {
-         tst = & VG_(threads)[tid];
-         /* Is tid waiting for a signal?  If not, ignore. */
-         if (tst->status != VgTs_WaitSIG)
-            continue;
-         /* Is the signal directed at a specific thread other than
-            this one?  If yes, ignore. */
-         if (vg_dcss.dcss_destthread[sigNo] != VG_INVALID_THREADID
-             && vg_dcss.dcss_destthread[sigNo] != tid)
-            continue;
-         /* Is tid waiting for the signal?  If not, ignore. */
-         if (VG_(ksigismember)(&(tst->sigs_waited_for), sigNo))
-            break;
-      }
-      if (tid < VG_N_THREADS) {
-         UInt* sigwait_args;
-         tst = & VG_(threads)[tid];
-         if (VG_(clo_trace_signals) || VG_(clo_trace_sched))
-            VG_(message)(Vg_DebugMsg,
-               "releasing thread %d from sigwait() due to signal %d",
-               tid, sigNo );
-         sigwait_args = (UInt*)(tst->m_eax);
-         if (NULL != (UInt*)(sigwait_args[2])) {
-            *(Int*)(sigwait_args[2]) = sigNo;
-            VG_TRACK( post_mem_write, (Addr)sigwait_args[2], sizeof(UInt));
-         }
-	 SET_SIGNAL_EDX(tid, 0);
-         tst->status = VgTs_Runnable;
-         VG_(ksigemptyset)(&tst->sigs_waited_for);
-         scss_changed = True;
-         vg_dcss.dcss_sigpending[sigNo] = False;
-         vg_dcss.dcss_destthread[sigNo] = VG_INVALID_THREADID; 
-                                          /*paranoia*/
-         continue; /* for (sigNo = 1; ...) loop */
-      }
-
-      /* Well, nobody appears to be sigwaiting for it.  So we really
-         are delivering the signal in the usual way.  And that the
-         client really has a handler for this thread! */
-      vg_assert(vg_dcss.dcss_sigpending[sigNo]);
-
-      /* A recent addition, so as to stop seriously weird progs dying
-         at the following assertion (which this renders redundant,
-         btw). */
-      if (vg_scss.scss_per_sig[sigNo].scss_handler == VKI_SIG_IGN
-          || vg_scss.scss_per_sig[sigNo].scss_handler == VKI_SIG_DFL) {
-         /* Strange; perhaps the handler disappeared before we could
-            deliver the signal. */
-         VG_(message)(Vg_DebugMsg,
-            "discarding signal %d for thread %d because handler missing",
-            sigNo, tid );
-         vg_dcss.dcss_sigpending[sigNo] = False;
-         vg_dcss.dcss_destthread[sigNo] = VG_INVALID_THREADID;
-         continue; /* for (sigNo = 1; ...) loop */
-      }
-
-      vg_assert(vg_scss.scss_per_sig[sigNo].scss_handler != VKI_SIG_IGN
-                && vg_scss.scss_per_sig[sigNo].scss_handler != VKI_SIG_DFL);
-
-      tid = vg_dcss.dcss_destthread[sigNo];
-      vg_assert(tid == VG_INVALID_THREADID 
-                || VG_(is_valid_tid)(tid));
-
-      if (tid != VG_INVALID_THREADID) {
-         /* directed to a specific thread; ensure it actually still
-            exists ... */
-         tst = & VG_(threads)[tid];
-         if (tst->status == VgTs_Empty) {
-            /* dead, for whatever reason; ignore this signal */
-            if (VG_(clo_trace_signals))
-               VG_(message)(Vg_DebugMsg,
-                  "discarding signal %d for nonexistent thread %d",
-                  sigNo, tid );
-            vg_dcss.dcss_sigpending[sigNo] = False;
-            vg_dcss.dcss_destthread[sigNo] = VG_INVALID_THREADID;
-            continue; /* for (sigNo = 1; ...) loop */
-	 } else if (VG_(ksigismember)(&(tst->sig_mask), sigNo)) {
-            /* signal blocked in specific thread, so we can't
-               deliver it just now */
-            continue; /* for (sigNo = 1; ...) loop */
-         }
-      } else {
-         /* not directed to a specific thread, so search for a
-            suitable candidate */
-         for (tid = 1; tid < VG_N_THREADS; tid++) {
-            tst = & VG_(threads)[tid];
-            if (tst->status != VgTs_Empty
-                && !VG_(ksigismember)(&(tst->sig_mask), sigNo))
-               break;
-         }
-         if (tid == VG_N_THREADS) 
-            /* All threads have this signal blocked, so we can't
-               deliver it just now */
-            continue; /* for (sigNo = 1; ...) loop */
-      }
-
-      /* Ok, we can deliver signal sigNo to thread tid. */
-
-      if (VG_(clo_trace_signals))
-         VG_(message)(Vg_DebugMsg,"delivering signal %d to thread %d", 
-                                  sigNo, tid );
-
-      /* Create a signal delivery frame, and set the client's %ESP and
-         %EIP so that when execution continues, we will enter the
-         signal handler with the frame on top of the client's stack,
-         as it expects. */
-      vg_assert(VG_(is_valid_tid)(tid));
-      vg_push_signal_frame ( tid, sigNo );
-      VG_(threads)[tid].status = VgTs_Runnable;
+   sa.ksa_handler = VKI_SIG_DFL;
+   sa.ksa_flags = 0;
+   sa.ksa_restorer = 0;
+   VG_(ksigemptyset)(&sa.ksa_mask);
       
-      /* Signify that the signal has been delivered. */
-      vg_dcss.dcss_sigpending[sigNo] = False;
-      vg_dcss.dcss_destthread[sigNo] = VG_INVALID_THREADID;
+   VG_(ksigaction)(sigNo, &sa, &origsa);
 
-      if (vg_scss.scss_per_sig[sigNo].scss_flags & VKI_SA_ONESHOT) {
-         /* Do the ONESHOT thing. */
-         vg_scss.scss_per_sig[sigNo].scss_handler = VKI_SIG_DFL;
-         scss_changed = True;
+   VG_(ksigfillset)(&mask);
+   VG_(ksigdelset)(&mask, sigNo);
+   VG_(ksigprocmask)(VKI_SIG_SETMASK, &mask, &origmask);
+
+   VG_(ktkill)(VG_(getpid)(), sigNo);
+
+   VG_(ksigaction)(sigNo, &origsa, NULL);
+   VG_(ksigprocmask)(VKI_SIG_SETMASK, &origmask, NULL);
+}
+
+/* 
+   Perform the default action of a signal.  Returns if the default
+   action isn't fatal.
+
+   If we're not being quiet, then print out some more detail about
+   fatal signals (esp. core dumping signals).
+ */
+static void vg_default_action(const vki_ksiginfo_t *info, ThreadId tid)
+{
+   Int  sigNo     = info->si_signo;
+   Bool terminate = False;
+   Bool core      = False;
+
+   switch(sigNo) {
+   case VKI_SIGQUIT:	/* core */
+   case VKI_SIGILL:	/* core */
+   case VKI_SIGABRT:	/* core */
+   case VKI_SIGFPE:	/* core */
+   case VKI_SIGSEGV:	/* core */
+   case VKI_SIGBUS:	/* core */
+   case VKI_SIGTRAP:	/* core */
+   case VKI_SIGXCPU:	/* core */
+   case VKI_SIGXFSZ:	/* core */
+      terminate = True;
+      core = True;
+      break;
+
+   case VKI_SIGHUP:	/* term */
+   case VKI_SIGINT:	/* term */
+   case VKI_SIGKILL:	/* term - we won't see this */
+   case VKI_SIGPIPE:	/* term */
+   case VKI_SIGALRM:	/* term */
+   case VKI_SIGTERM:	/* term */
+   case VKI_SIGUSR1:	/* term */
+   case VKI_SIGUSR2:	/* term */
+   case VKI_SIGIO:	/* term */
+   case VKI_SIGPWR:	/* term */
+   case VKI_SIGSYS:	/* term */
+   case VKI_SIGPROF:	/* term */
+   case VKI_SIGVTALRM:	/* term */
+   case VKI_SIGRTMIN ... VKI_SIGRTMAX: /* term */
+      terminate = True;
+      break;
+   }
+
+   vg_assert(!core || (core && terminate));
+
+   if (terminate) {
+      if (VG_(clo_verbosity) != 0 && (core || VG_(clo_verbosity) > 1)) {
+	 VG_(message)(Vg_UserMsg, "");
+	 VG_(message)(Vg_UserMsg, "Process terminating with default action of signal %d (%s)%s", 
+		      sigNo, signame(sigNo), core ? ": dumping core" : "");
+
+	 /* Be helpful - decode some more details about this fault */
+	 if (info->si_code > VKI_SI_USER) {
+	    const Char *event = NULL;
+
+	    switch(sigNo) {
+	    case VKI_SIGSEGV:
+	       switch(info->si_code) {
+	       case 1: event = "address not mapped to object"; break;
+	       case 2: event = "invalid permissions for mapped object"; break;
+	       }
+	       break;
+
+	    case VKI_SIGILL:
+	       switch(info->si_code) {
+	       case 1: event = "illegal opcode"; break;
+	       case 2: event = "illegal operand"; break;
+	       case 3: event = "illegal addressing mode"; break;
+	       case 4: event = "illegal trap"; break;
+	       case 5: event = "privileged opcode"; break;
+	       case 6: event = "privileged register"; break;
+	       case 7: event = "coprocessor error"; break;
+	       case 8: event = "internal stack error"; break;
+	       }
+	       break;
+
+	    case VKI_SIGFPE:
+	       switch (info->si_code) {
+	       case 1: event = "integer divide by zero"; break;
+	       case 2: event = "integer overflow"; break;
+	       case 3: event = "FP divide by zero"; break;
+	       case 4: event = "FP overflow"; break;
+	       case 5: event = "FP underflow"; break;
+	       case 6: event = "FP inexact"; break;
+	       case 7: event = "FP invalid operation"; break;
+	       case 8: event = "FP subscript out of range"; break;
+	       }
+	       break;
+
+	    case VKI_SIGBUS:
+	       switch (info->si_code) {
+	       case 1: event = "invalid address alignment"; break;
+	       case 2: event = "non-existent physical address"; break;
+	       case 3: event = "hardware error"; break;
+	       }
+	       break;
+	    }
+
+	    if (event != NULL)
+	       VG_(message)(Vg_UserMsg, "  %s at address %p", 
+			    event, info->_sifields._sigfault._addr);
+	 }
+
+	 if (tid != VG_INVALID_THREADID) {
+	    ExeContext *ec = VG_(get_ExeContext)(tid);
+	    VG_(pp_ExeContext)(ec);
+	 }
+      }
+      
+      if (VG_(fatal_signal_set)) {
+	 VG_(fatal_sigNo) = sigNo;
+	 __builtin_longjmp(VG_(fatal_signal_jmpbuf), 1);
       }
    }
 
-   /* Unlock and return. */
-   if (scss_changed) {
-      /* handle_SCSS_change computes a new kernel blocking mask and
-         applies that. */
-      VG_(handle_SCSS_change)( False /* lazy update */ );
-   } else {
-      /* No SCSS change, so just restore the existing blocking
-         mask. */
-      VG_(restore_all_host_signals)( &saved_procmask );
-   }
+   VG_(kill_self)(sigNo);
 
-   return True;
+   vg_assert(!terminate);
 }
 
 
-/* Receive a signal from the host, and either discard it or park it in
-   the queue of pending signals.  All other signals will be blocked
-   when this handler runs.  Runs with all host signals blocked, so as
-   to have mutual exclusion when adding stuff to the queue. */
-
-static 
-void vg_oursignalhandler ( Int sigNo )
+void VG_(deliver_signal) ( ThreadId tid, const vki_ksiginfo_t *info, Bool async )
 {
-   static UInt   segv_warns = 0;
-   ThreadId      tid;
+   Int			sigNo = info->si_signo;
+   vki_ksigset_t	handlermask;
+   enum ThreadStatus	status;
+   SCSS_Per_Signal	*handler = &vg_scss.scss_per_sig[sigNo];
+   ThreadState		*tst = VG_(get_ThreadState)(tid);
+
+   if (VG_(clo_trace_signals))
+      VG_(message)(Vg_DebugMsg,"delivering signal %d (%s) to thread %d", 
+		   sigNo, signame(sigNo), tid );
+
+   if (sigNo == VKI_SIGVGINT) {
+      /* If this is a SIGVGINT, then we just ACK the signal and carry
+	 on; the application need never know about it (except for any
+	 effect on its syscalls). */
+      vg_assert(async);
+
+      if (tst->status == VgTs_WaitSys) {
+	 /* blocked in a syscall; we assume it should be interrupted */
+	 if (tst->m_eax == -VKI_ERESTARTSYS)
+	    tst->m_eax = -VKI_EINTR;
+      }
+
+      VG_(proxy_sigack)(tid, &tst->sig_mask);
+      return;
+   }
+
+   /* If thread is currently blocked in a syscall, then resume as
+      runnable.  If the syscall needs restarting, tweak the machine
+      state to make it happen. */
+   if (tst->status == VgTs_WaitSys) {
+      vg_assert(tst->syscallno != -1);
+
+      if (0)
+	 VG_(printf)("signal %d interrupting syscall %d\n",
+		     sigNo, tst->syscallno);
+
+      if (tst->m_eax == -VKI_ERESTARTSYS) {
+	  if (handler->scss_flags & VKI_SA_RESTART) {
+	     tst->m_eax = tst->syscallno;
+	     tst->m_eip -= 2;		/* sizeof(int $0x80) */
+	  } else
+	     tst->m_eax = -VKI_EINTR;
+      } else {
+	 /* return value is already in eax - either EINTR or the
+	    normal return value */
+      }
+   }
+
+   vg_assert(handler->scss_handler != VKI_SIG_IGN);
+
+   if (sigNo == VKI_SIGCHLD && (handler->scss_flags & VKI_SA_NOCLDWAIT)) {
+      //VG_(printf)("sigNo==SIGCHLD and app asked for NOCLDWAIT\n");
+      vg_babyeater(sigNo, NULL, NULL);
+   }
+
+   if (handler->scss_handler == VKI_SIG_DFL) {
+      handlermask = tst->sig_mask; /* no change to signal mask */
+      vg_default_action(info, tid);
+   } else {
+      /* Create a signal delivery frame, and set the client's %ESP and
+	 %EIP so that when execution continues, we will enter the
+	 signal handler with the frame on top of the client's stack,
+	 as it expects. */
+      vg_assert(VG_(is_valid_tid)(tid));
+      vg_push_signal_frame ( tid, info );
+
+      if (handler->scss_flags & VKI_SA_ONESHOT) {
+	 /* Do the ONESHOT thing. */
+	 handler->scss_handler = VKI_SIG_DFL;
+
+	 VG_(handle_SCSS_change)( False /* lazy update */ );
+      }
+   
+      status = tst->status;
+
+      switch(status) {
+      case VgTs_Runnable:
+	 break;
+
+      case VgTs_WaitSys:
+	 /* don't change status yet, because we're about to get a
+	    message telling us the syscall was interrupted */
+	 break;
+
+      case VgTs_WaitJoiner:
+      case VgTs_WaitJoinee:
+      case VgTs_WaitMX:
+      case VgTs_WaitCV:
+      case VgTs_Sleeping:
+	 status = VgTs_Runnable;
+	 break;
+
+      case VgTs_Empty:
+	 VG_(core_panic)("unexpected thread state");
+	 break;
+      }
+
+      tst->status = status;
+
+      /* handler gets the union of the signal's mask and the thread's
+	 mask */
+      handlermask = handler->scss_mask;
+      VG_(ksigaddset_from_set)(&handlermask, &VG_(threads)[tid].sig_mask);
+
+      /* also mask this signal, unless they ask us not to */
+      if (!(handler->scss_flags & VKI_SA_NOMASK))
+	 VG_(ksigaddset)(&handlermask, sigNo);
+   }
+
+   /* tell proxy we're about to start running the handler */
+   if (async)
+      VG_(proxy_sigack)(tid, &handlermask);
+}
+
+
+/* 
+   If the client set the handler for SIGCHLD to SIG_IGN, then we need
+   to automatically dezombie any dead children.  Also used if the
+   client set the SA_NOCLDWAIT on their SIGCHLD handler.
+ */
+static
+void vg_babyeater ( Int sigNo, vki_ksiginfo_t *info, struct vki_ucontext *uc )
+{
+   Int status;
+   Int pid;
+
+   vg_assert(sigNo == VKI_SIGCHLD);
+
+   while((pid = VG_(waitpid)(-1, &status, VKI_WNOHANG)) > 0) {
+      if (VG_(clo_trace_signals)) 
+	 VG_(message)(Vg_DebugMsg, "babyeater reaped %d", pid);
+   }
+}
+
+/* 
+   Receive an async signal from the host. 
+
+   It being called in the context of a proxy LWP, and therefore is an
+   async signal aimed at one of our threads.  In this case, we pass
+   the signal info to the main thread with VG_(proxy_handlesig)().
+
+   This should *never* be in the context of the main LWP, because
+   all signals for which this is the handler should be blocked there.
+*/
+static 
+void vg_async_signalhandler ( Int sigNo, vki_ksiginfo_t *info, struct vki_ucontext *uc )
+{
+   if (VG_(gettid)() == VG_(main_pid)) {
+      VG_(printf)("got signal %d in LWP %d (%d)\n",
+		  sigNo, VG_(gettid)(), VG_(gettid)(), VG_(main_pid));
+      vg_assert(VG_(ksigismember)(&uc->uc_sigmask, sigNo));
+   }
+
+   vg_assert(VG_(gettid)() != VG_(main_pid));
+
+   VG_(proxy_handlesig)(info, &uc->uc_mcontext);
+}
+
+/* 
+   Recieve a sync signal from the host. 
+
+   This should always be called from the main thread, though it may be
+   called in a proxy LWP if someone sends an async version of one of
+   the sync signals.
+*/
+static
+void vg_sync_signalhandler ( Int sigNo, vki_ksiginfo_t *info, struct vki_ucontext *uc )
+{
    Int           dummy_local;
-   Bool          sane;
-   vki_ksigset_t saved_procmask;
+
+   vg_assert(info != NULL);
+   vg_assert(info->si_signo == sigNo);
+   vg_assert(sigNo == VKI_SIGSEGV ||
+	     sigNo == VKI_SIGBUS  ||
+	     sigNo == VKI_SIGFPE  ||
+	     sigNo == VKI_SIGILL);
+
+   if (VG_(gettid)() != VG_(main_pid)) {
+      /* We were sent one of our sync signals in an async way (or the
+	 proxy LWP code has a bug) */
+      vg_assert(info->si_code <= VKI_SI_USER);
+
+      VG_(proxy_handlesig)(info, &uc->uc_mcontext);
+      return;
+   }
+
 
    /*
    if (sigNo == VKI_SIGUSR1) {
@@ -1381,75 +1607,89 @@
    vg_assert((Char*)(&(VG_(sigstack)[0])) <= (Char*)(&dummy_local));
    vg_assert((Char*)(&dummy_local) < (Char*)(&(VG_(sigstack)[VG_SIGSTACK_SIZE_W])));
 
-   VG_(block_all_host_signals)( &saved_procmask );
-
-   /* This is a sanity check.  Either a signal has arrived because the
-      client set a handler for it, or because some thread sigwaited on
-      it.  Establish that at least one of these is the case. */
-   sane = False;
-   if (vg_scss.scss_per_sig[sigNo].scss_handler != VKI_SIG_DFL
-       && vg_scss.scss_per_sig[sigNo].scss_handler != VKI_SIG_IGN) {
-      sane = True;
-   } else {
-      for (tid = 1; tid < VG_N_THREADS; tid++) {
-         if (VG_(threads)[tid].status != VgTs_WaitSIG) 
-            continue;
-         if (VG_(ksigismember)(&VG_(threads)[tid].sigs_waited_for, sigNo))
-            sane = True;
-      }
-   }
-   if (!sane) {
-      if (VG_(clo_trace_signals)) {
-         VG_(add_to_msg)("unexpected!");
-         VG_(end_msg)();
-      }
-      /* Note: we panic with all signals blocked here.  Don't think
-         that matters. */
-      VG_(core_panic)("vg_oursignalhandler: unexpected signal");
-   }
-   /* End of the sanity check. */
-
-   /* Decide what to do with it. */
-   if (vg_dcss.dcss_sigpending[sigNo]) {
-      /* pending; ignore it. */
-      if (VG_(clo_trace_signals)) {
-         VG_(add_to_msg)("already pending; discarded" );
-         VG_(end_msg)();
-      }
-   } else {
-      /* Ok, we'd better deliver it to the client. */
-      /* Queue it up for delivery at some point in the future. */
-      vg_dcss.dcss_sigpending[sigNo] = True;
-      vg_dcss.dcss_destthread[sigNo] = VG_INVALID_THREADID;
-      if (VG_(clo_trace_signals)) {
-         VG_(add_to_msg)("queued" );
-         VG_(end_msg)();
-      }
-   }
-
-   /* We've finished messing with the queue, so re-enable host
-      signals. */
-   VG_(restore_all_host_signals)( &saved_procmask );
-
-   if ( (sigNo == VKI_SIGSEGV || sigNo == VKI_SIGBUS 
-         || sigNo == VKI_SIGFPE || sigNo == VKI_SIGILL)
-        &&
-        VG_(scheduler_jmpbuf_valid)
-      ) {
+   if (VG_(scheduler_jmpbuf_valid)) {
       /* Can't continue; must longjmp back to the scheduler and thus
          enter the sighandler immediately. */
+      VG_(memcpy)(&VG_(unresumable_siginfo), info, sizeof(vki_ksiginfo_t));
+   
       VG_(longjmpd_on_signal) = sigNo;
       __builtin_longjmp(VG_(scheduler_jmpbuf),1);
    }
 
-   if (sigNo == VKI_SIGSEGV && !VG_(scheduler_jmpbuf_valid)) {
-      if (++segv_warns <= 3) {
-	VG_(message)(Vg_UserMsg, 
-           "Warning: SIGSEGV not in user code; either from syscall kill()" );
-	VG_(message)(Vg_UserMsg, 
-           "   or possible Valgrind bug.  "
-           "This message is only shown 3 times." );
-      }
+   if (info->si_code <= VKI_SI_USER) {
+      /* 
+	 OK, one of sync signals was sent from user-mode, so try to
+	 deliver it to someone who cares.  We've currently got the
+	 signal blocked because we're in the handler, so some other
+	 thread will pick it up if they want it.  If all the other
+	 threads have this signal blocked, it will remain pending.
+	 wait for .01sec to see if someone picks it up, then eat it if
+	 not (otherwise we will just keep spinning, since we *can't*
+	 block these signals).
+
+	 XXX This is crap.  All the proxy LWPs could easily be
+	 blocking this signal transiently (say, waiting for us to
+	 respond to a SigACK), but want it eventually.  Maybe we
+	 should just bite the bullet and scan the per-thread block
+	 sets and decide who to deliver it to.
+       */
+      static const struct vki_timespec ts = { 0, (Int)(.01 * 1000000000) };
+      static const struct vki_timespec zero = { 0, 0 };
+      vki_ksigset_t set;
+      vki_ksiginfo_t si;
+      Bool dropped = False;
+
+      VG_(kkill)(VG_(main_pid), sigNo);
+      VG_(nanosleep)(&ts, NULL);
+      VG_(ksigemptyset)(&set);
+      VG_(ksigaddset)(&set, sigNo);
+      while(VG_(ksigtimedwait)(&set, &si, &zero) == sigNo)
+	 dropped = True;
+
+      if (dropped)
+	 VG_(message)(Vg_UserMsg,
+		      "Dropped pending signal %d (%s) because all threads were blocking it,"
+		      "but we cannot block it forever.",
+		      sigNo, signame(sigNo));
+   } else {
+      /* 
+	 A bad signal came from the kernel (indicating an instruction
+	 generated it), but there was no jumpbuf set up.  This means
+	 it was actually generated by Valgrind internally.
+       */
+      struct vki_sigcontext *sc = &uc->uc_mcontext;
+
+      VG_(message)(Vg_DebugMsg, 
+		   "INTERNAL ERROR: Valgrind received a signal %d (%s) - exiting",
+		   sigNo, signame(sigNo));
+      VG_(message)(Vg_DebugMsg, 
+		   "si_code=%x Fault EIP: %p; Faulting address: %p",
+		   info->si_code, sc->eip, info->_sifields._sigfault._addr);
+
+      if (0)
+	 VG_(kill_self)(sigNo);		/* generate a core dump */
+      VG_(core_panic)("Killed by fatal signal");
+   }
+}
+
+
+/* 
+   This signal handler exists only so that the scheduler thread can
+   poke the LWP to make it fall out of whatever syscall it is in.
+   Used for thread termination and cancellation.
+ */
+static void proxy_sigvg_handler(int signo, vki_ksiginfo_t *si, struct vki_ucontext *uc)
+{
+   vg_assert(signo == VKI_SIGVGINT || signo == VKI_SIGVGKILL);
+   vg_assert(si->si_signo == signo);
+
+   /* only pay attention to it if it came from the scheduler */
+   if (si->si_code == VKI_SI_TKILL &&
+       si->_sifields._kill._pid == VG_(main_pid)) {
+      vg_assert(si->si_code == VKI_SI_TKILL);
+      vg_assert(si->_sifields._kill._pid == VG_(main_pid));
+   
+      VG_(proxy_handlesig)(si, &uc->uc_mcontext);
    }
 }
 
@@ -1459,13 +1699,11 @@
 */
 void VG_(unblock_host_signal) ( Int sigNo )
 {
-   Int ret;
-   vki_ksigset_t set;
-   VG_(ksigemptyset)(&set);
-   ret = VG_(ksigaddset)(&set,sigNo);
-   vg_assert(ret == 0);
-   ret = VG_(ksigprocmask)(VKI_SIG_UNBLOCK,&set,NULL);
-   vg_assert(ret == 0);
+   vg_assert(sigNo == VKI_SIGSEGV ||
+	     sigNo == VKI_SIGBUS ||
+	     sigNo == VKI_SIGILL ||
+	     sigNo == VKI_SIGFPE);
+   set_main_sigmask();
 }
 
 
@@ -1482,6 +1720,76 @@
    VG_(printf)("}\n");
 }
 
+/* 
+   In pre-2.6 kernels, the kernel didn't distribute signals to threads
+   in a thread-group properly, so we need to do it here.
+ */
+void VG_(route_signals)(void)
+{
+   static const struct vki_timespec zero = { 0, 0 };
+   static ThreadId start_tid = 1;	/* tid to start scanning from */
+   vki_ksigset_t set;
+   vki_ksigset_t resend;
+   vki_ksiginfo_t si;
+   Int sigNo;
+
+   vg_assert(VG_(gettid)() == VG_(main_pid));
+   vg_assert(is_correct_sigmask());
+
+   if (!VG_(do_signal_routing))
+      return;
+
+   VG_(ksigemptyset)(&resend);
+
+   /* get the scheduler LWP's signal mask, and use it as the set of
+      signals we're polling for */
+   VG_(ksigprocmask)(VKI_SIG_SETMASK, NULL, &set);
+
+   while(VG_(ksigtimedwait)(&set, &si, &zero) > 0) {
+      ThreadId tid;
+      ThreadId end_tid;
+      Int target = -1;
+      
+      end_tid = start_tid - 1;
+      if (end_tid < 0 || end_tid >= VG_N_THREADS)
+	      end_tid = VG_N_THREADS-1;
+
+      sigNo = si.si_signo;
+
+      /* look for a suitable thread to deliver it to */
+      for(tid = start_tid;
+	  tid != end_tid;
+	  tid = (tid + 1) % VG_N_THREADS) {
+	 ThreadState *tst = &VG_(threads)[tid];
+
+	 if (tst->status == VgTs_Empty)
+	    continue;
+
+	 if (!VG_(ksigismember)(&tst->sig_mask, sigNo)) {
+	    vg_assert(tst->proxy != NULL);
+	    target = tid;
+	    start_tid = tid;
+	    break;
+	 }
+      }
+      
+      if (target != -1) {
+	 if (VG_(clo_trace_signals))
+	    VG_(message)(Vg_DebugMsg, "Routing signal %d to tid %d",
+			 sigNo, tid);
+	 VG_(proxy_sendsig)(tid, sigNo);
+      } else {
+	 if (VG_(clo_trace_signals))
+	    VG_(message)(Vg_DebugMsg, "Adding signal %d to pending set",
+			 sigNo);
+	 VG_(ksigaddset)(&resend, sigNo);
+      }
+   }
+
+   for(sigNo = 0; sigNo < VKI_KNSIG; sigNo++)
+      if (VG_(ksigismember)(&resend, sigNo))
+	 VG_(ktkill)(VG_(main_pid), sigNo);
+}
 
 /* At startup, copy the process' real signal state to the SCSS.
    Whilst doing this, block all real signals.  Then calculate SKSS and
@@ -1490,16 +1798,20 @@
 void VG_(sigstartup_actions) ( void )
 {
    Int i, ret;
-
    vki_ksigset_t  saved_procmask;
    vki_kstack_t   altstack_info;
    vki_ksigaction sa;
 
    /* VG_(printf)("SIGSTARTUP\n"); */
-   /* Block all signals.  
-      saved_procmask remembers the previous mask. */
+   /* Block all signals.  saved_procmask remembers the previous mask,
+      which the first thread inherits.
+   */
    VG_(block_all_host_signals)( &saved_procmask );
 
+   /* Set the signal mask which the scheduler LWP should maintain from
+      now on. */
+   set_main_sigmask();
+
    /* Copy per-signal settings to SCSS. */
    for (i = 1; i <= VKI_KNSIG; i++) {
 
@@ -1517,6 +1829,14 @@
       vg_scss.scss_per_sig[i].scss_restorer = sa.ksa_restorer;
    }
 
+   /* Our private internal signals are treated as ignored */
+   vg_scss.scss_per_sig[VKI_SIGVGINT].scss_handler = VKI_SIG_IGN;
+   vg_scss.scss_per_sig[VKI_SIGVGINT].scss_flags = VKI_SA_SIGINFO;
+   VG_(ksigfillset)(&vg_scss.scss_per_sig[VKI_SIGVGINT].scss_mask);
+   vg_scss.scss_per_sig[VKI_SIGVGKILL].scss_handler = VKI_SIG_IGN;
+   vg_scss.scss_per_sig[VKI_SIGVGKILL].scss_flags = VKI_SA_SIGINFO;
+   VG_(ksigfillset)(&vg_scss.scss_per_sig[VKI_SIGVGKILL].scss_mask);
+
    /* Copy the alt stack, if any. */
    ret = VG_(ksigaltstack)(NULL, &vg_scss.altstack);
    vg_assert(ret == 0);
@@ -1524,12 +1844,7 @@
    /* Copy the process' signal mask into the root thread. */
    vg_assert(VG_(threads)[1].status == VgTs_Runnable);
    VG_(threads)[1].sig_mask = saved_procmask;
-
-   /* Initialise DCSS. */
-   for (i = 1; i <= VKI_KNSIG; i++) {
-      vg_dcss.dcss_sigpending[i] = False;
-      vg_dcss.dcss_destthread[i] = VG_INVALID_THREADID;
-   }
+   VG_(proxy_setsigmask)(1);
 
    /* Register an alternative stack for our own signal handler to run on. */
    altstack_info.ss_sp = &(VG_(sigstack)[0]);
@@ -1551,6 +1866,7 @@
    /* Calculate SKSS and apply it.  This also sets the initial kernel
       mask we need to run with. */
    VG_(handle_SCSS_change)( True /* forced update */ );
+
 }
 
 
diff --git a/coregrind/vg_syscall.S b/coregrind/vg_syscall.S
index faa23a5..7b0fd00 100644
--- a/coregrind/vg_syscall.S
+++ b/coregrind/vg_syscall.S
@@ -30,138 +30,89 @@
 */
 
 #include "vg_constants.h"
-
+#include "vg_unistd.h"
 
 .globl	VG_(do_syscall)
 
-# NOTE that this routine expects the simulated machines state
-# to be in m_state_static.  Therefore it needs to be wrapped by
-# code which copies from baseBlock before the call, into
-# m_state_static, and back afterwards.
+/*
+	Perform a Linux syscall with int 0x80
 	
+	Syscall args are passed on the stack
+	Int VG_(do_syscall)(Int syscall_no, ...)
+
+	This has no effect on the virtual machine; the expectation is
+	that the syscall mechanism makes no useful changes to any
+	register except %eax, which is returned.
+ */
 VG_(do_syscall):
-	# Save all the int registers of the real machines state on the
-	# simulators stack.
-	pushal
-
-	# and save the real FPU state too
-	fwait
-
-	pushfl
-	cmpb	$0, VG_(have_ssestate)
-	jz	qq1nosse
-	fxsave	VG_(real_sse_state_saved_over_syscall)
-	andl	$0x0000FFBF, VG_(real_sse_state_saved_over_syscall)+24
-	fxrstor	VG_(real_sse_state_saved_over_syscall)
-	jmp	qq1merge
-qq1nosse:		
-	fnsave	VG_(real_sse_state_saved_over_syscall)
-	frstor	VG_(real_sse_state_saved_over_syscall)
-qq1merge:
-	popfl
-	
-	# remember what the simulators stack pointer is
-	movl	%esp, VG_(esp_saved_over_syscall)
-	
-	# Now copy the simulated machines state into the real one
-	# esp still refers to the simulators stack
-	pushfl
-	cmpb	$0, VG_(have_ssestate)
-	jz	qq2nosse
-	andl	$0x0000FFBF, VG_(m_state_static)+64+24
-	fxrstor	VG_(m_state_static)+64
-	jmp	qq2merge
-qq2nosse:	
-	frstor	VG_(m_state_static)+64
-qq2merge:
-	popfl
-	
-	movl	VG_(m_state_static)+56, %eax
-	pushl	%eax
-	popfl
-#if 0
-	/* don't bother to save/restore seg regs across the kernel iface.  
-	   Once we have our hands on them, our simulation of it is 
-	   completely internal, and the kernel sees nothing.  
-	   What's more, loading new values in to %cs seems 
-	   to be impossible anyway. */
-	movw	VG_(m_state_static)+0, %cs
-	movw	VG_(m_state_static)+4, %ss
-	movw	VG_(m_state_static)+8, %ds
-	movw	VG_(m_state_static)+12, %es
-	movw	VG_(m_state_static)+16, %fs
-	movw	VG_(m_state_static)+20, %gs
-#endif
-	movl	VG_(m_state_static)+24, %eax
-	movl	VG_(m_state_static)+28, %ecx
-	movl	VG_(m_state_static)+32, %edx
-	movl	VG_(m_state_static)+36, %ebx
-	movl	VG_(m_state_static)+40, %esp
-	movl	VG_(m_state_static)+44, %ebp
-	movl	VG_(m_state_static)+48, %esi
-	movl	VG_(m_state_static)+52, %edi
-
-	# esp now refers to the simulatees stack
-	# Do the actual system call
+	push	%esi
+	push	%edi
+	push	%ebx
+	push	%ebp
+	movl	16+ 4(%esp),%eax
+	movl	16+ 8(%esp),%ebx
+	movl	16+12(%esp),%ecx
+	movl	16+16(%esp),%edx
+	movl	16+20(%esp),%esi
+	movl	16+24(%esp),%edi
+	movl	16+28(%esp),%ebp
 	int	$0x80
-
-	# restore stack as soon as possible
-	# esp refers to simulatees stack
-	movl	%esp, VG_(m_state_static)+40
-	movl	VG_(esp_saved_over_syscall), %esp
-	# esp refers to simulators stack
-
-	# ... and undo everything else.  
-	# Copy real state back to simulated state.
-#if 0
-	movw	%cs, VG_(m_state_static)+0
-	movw	%ss, VG_(m_state_static)+4
-	movw	%ds, VG_(m_state_static)+8
-	movw	%es, VG_(m_state_static)+12
-	movw	%fs, VG_(m_state_static)+16
-	movw	%gs, VG_(m_state_static)+20
-#endif
-	movl	%eax, VG_(m_state_static)+24
-	movl	%ecx, VG_(m_state_static)+28
-	movl	%edx, VG_(m_state_static)+32
-	movl	%ebx, VG_(m_state_static)+36
-	movl	%ebp, VG_(m_state_static)+44
-	movl	%esi, VG_(m_state_static)+48
-	movl	%edi, VG_(m_state_static)+52
-	pushfl
-	popl	%eax
-	movl	%eax, VG_(m_state_static)+56
-	fwait
-
-	pushfl
-	cmpb	$0, VG_(have_ssestate)
-	jz	pp2nosse
-	fxsave	VG_(m_state_static)+64
-	andl	$0x0000FFBF, VG_(m_state_static)+64+24
-	fxrstor	VG_(m_state_static)+64
-	jmp	pp2merge
-pp2nosse:
-	fnsave	VG_(m_state_static)+64
-	frstor	VG_(m_state_static)+64
-pp2merge:
-	popfl
-	
-	# Restore the state of the simulator
-	pushfl
-	cmpb	$0, VG_(have_ssestate)
-	jz	pp1nosse
-	andl	$0x0000FFBF, VG_(real_sse_state_saved_over_syscall)+24
-	fxrstor	VG_(real_sse_state_saved_over_syscall)
-	jmp	pp1merge
-pp1nosse:
-	frstor	VG_(real_sse_state_saved_over_syscall)
-pp1merge:
-	popfl
-
-	popal
-
+	popl	%ebp
+	popl	%ebx
+	popl	%edi
+	popl	%esi
 	ret
 
+/*
+	Perform a clone system call.  clone is strange because it has
+	fork()-like return-twice semantics, so it needs special
+	handling here.
+
+	int VG_(clone)(int (*fn)(void *), void *child_stack, int flags, void *arg, 
+	               0                  4                  8          12
+		       pid_t *child_tid, pid_t *parent_tid)
+		       16                20
+
+ */
+.globl VG_(clone)
+VG_(clone):
+#define FSZ	(4+4+4)			/* frame size = retaddr+ebx+edi */
+	push	%ebx
+	push	%edi
+	/* set up child stack with function and arg */
+	movl	 4+FSZ(%esp), %ecx	/* child stack */
+	movl	12+FSZ(%esp), %ebx	/* fn arg */
+	movl	 0+FSZ(%esp), %eax	/* fn */
+	lea	-8(%ecx), %ecx		/* make space on stack */
+	movl	%ebx, 4(%ecx)		/*   fn arg */
+	movl	%eax, 0(%ecx)		/*   fn */
+
+	/* get other args to clone */
+	movl	 8+FSZ(%esp), %ebx	/* flags */
+	movl	20+FSZ(%esp), %edx	/* parent tid * */
+	movl	16+FSZ(%esp), %edi	/* child tid * */
+	movl	$__NR_clone, %eax
+	int	$0x80
+	testl	%eax, %eax
+	jnz	1f
+
+	/* CHILD - call thread function */
+	popl	%eax
+	call	*%eax
+
+	/* exit with result */
+	movl	%eax, %ebx
+	movl	$__NR_exit, %eax
+	int	$0x80
+
+	/* Hm, exit returned */
+	ud2
+		
+1:	/* PARENT or ERROR */
+	pop	%edi
+	pop	%ebx
+	ret
+	
 ##--------------------------------------------------------------------##
 ##--- end                                             vg_syscall.S ---##
 ##--------------------------------------------------------------------##
diff --git a/coregrind/vg_syscalls.c b/coregrind/vg_syscalls.c
index 9f1d162..555882b 100644
--- a/coregrind/vg_syscalls.c
+++ b/coregrind/vg_syscalls.c
@@ -40,11 +40,9 @@
    * notify the skin of the memory events (reads, writes) happening
 
    * perform the syscall, usually by passing it along to the kernel
-     unmodified.  However, because we simulate signals ourselves,
-     signal-related syscalls are routed to vg_signal.c, and are not
-     delivered to the kernel.
+     unmodified.
 
-   A magical piece of assembly code, vg_do_syscall(), in vg_syscall.S
+   A magical piece of assembly code, VG_(do_syscall)(), in vg_syscall.S
    does the tricky bit of passing a syscall to the kernel, whilst
    having the simulator retain control.
 */
@@ -55,6 +53,70 @@
    if (VG_(clo_trace_syscalls))        \
       VG_(printf)(format, ## args)
 
+
+/* ---------------------------------------------------------------------
+   A simple atfork() facility for Valgrind's internal use
+   ------------------------------------------------------------------ */
+
+struct atfork {
+   vg_atfork_t	pre;
+   vg_atfork_t	parent;
+   vg_atfork_t	child;
+};
+
+#define VG_MAX_ATFORK	10
+
+static struct atfork atforks[VG_MAX_ATFORK];
+static Int n_atfork;
+
+void VG_(atfork)(vg_atfork_t pre, vg_atfork_t parent, vg_atfork_t child)
+{
+   Int i;
+
+   for(i = 0; i < n_atfork; i++) {
+      if (atforks[i].pre == pre &&
+	  atforks[i].parent == parent &&
+	  atforks[i].child == child)
+	 return;
+   }
+
+   if (n_atfork >= VG_MAX_ATFORK)
+      VG_(core_panic)("Too many VG_(atfork) handlers requested: raise VG_MAX_ATFORK");
+
+   atforks[n_atfork].pre    = pre;
+   atforks[n_atfork].parent = parent;
+   atforks[n_atfork].child  = child;
+
+   n_atfork++;
+}
+
+static void do_atfork_pre(ThreadId tid)
+{
+   Int i;
+
+   for(i = 0; i < n_atfork; i++)
+      if (atforks[i].pre != NULL)
+	 (*atforks[i].pre)(tid);
+}
+
+static void do_atfork_parent(ThreadId tid)
+{
+   Int i;
+
+   for(i = 0; i < n_atfork; i++)
+      if (atforks[i].parent != NULL)
+	 (*atforks[i].parent)(tid);
+}
+
+static void do_atfork_child(ThreadId tid)
+{
+   Int i;
+
+   for(i = 0; i < n_atfork; i++)
+      if (atforks[i].child != NULL)
+	 (*atforks[i].child)(tid);
+}
+
 /* ---------------------------------------------------------------------
    Doing mmap, munmap, mremap, mprotect
    ------------------------------------------------------------------ */
@@ -199,15 +261,7 @@
 UInt get_shm_size ( Int shmid )
 {
    struct shmid_ds buf;
-   long __res;
-    __asm__ volatile ( "int $0x80"
-                       : "=a" (__res)
-                       : "0" (__NR_ipc),
-                         "b" ((long)(24) /*IPCOP_shmctl*/),
-                         "c" ((long)(shmid)),
-                         "d" ((long)(IPC_STAT)),
-                         "S" ((long)(0)),
-                         "D" ((long)(&buf)) );
+   long __res = VG_(do_syscall)(__NR_ipc, 24 /* IPCOP_shmctl */, shmid, IPC_STAT, 0, &buf);
     if ( VG_(is_kerror) ( __res ) )
        return 0;
  
@@ -225,7 +279,8 @@
 }
 
 static 
-void pre_mem_read_sendmsg ( ThreadId tid, Char *msg, UInt base, UInt size )
+void pre_mem_read_sendmsg ( ThreadId tid,
+                            Char *msg, UInt base, UInt size )
 {
    Char *outmsg = strdupcat ( "socketcall.sendmsg", msg, VG_AR_TRANSIENT );
    SYSCALL_TRACK( pre_mem_read, tid, outmsg, base, size );
@@ -234,7 +289,8 @@
 }
 
 static 
-void pre_mem_write_recvmsg ( ThreadId tid, Char *msg, UInt base, UInt size )
+void pre_mem_write_recvmsg ( ThreadId tid,
+                             Char *msg, UInt base, UInt size )
 {
    Char *outmsg = strdupcat ( "socketcall.recvmsg", msg, VG_AR_TRANSIENT );
    SYSCALL_TRACK( pre_mem_write, tid, outmsg, base, size );
@@ -286,8 +342,9 @@
 }
 
 static
-void pre_mem_read_sockaddr ( ThreadId tid, Char *description,
-                             struct sockaddr *sa, UInt salen )
+void pre_mem_read_sockaddr ( ThreadId tid,
+			     Char *description,
+			     struct sockaddr *sa, UInt salen )
 {
    Char *outmsg;
 
@@ -300,7 +357,7 @@
    VG_(sprintf) ( outmsg, description, ".sa_family" );
    SYSCALL_TRACK( pre_mem_read, tid, outmsg, 
                   (UInt) &sa->sa_family, sizeof (sa_family_t));
-               
+
    switch (sa->sa_family) {
                   
       case AF_UNIX:
@@ -337,7 +394,7 @@
          VG_(sprintf) ( outmsg, description, ".sin6_scope_id" );
          SYSCALL_TRACK( pre_mem_read, tid, outmsg,
             (UInt) &((struct sockaddr_in6 *) sa)->sin6_scope_id,
-            sizeof (uint32_t));
+			sizeof (uint32_t));
 #        endif
          break;
                
@@ -377,7 +434,7 @@
       UInt buflen_in = deref_UInt( tid, buflen_p, buflen_s);
       if (buflen_in > 0) {
          VG_(track_events).pre_mem_write ( Vg_CoreSysCall,
-                                           tid, buf_s, buf_p, buflen_in );
+					   tid, buf_s, buf_p, buflen_in );
       }
    }
 }
@@ -413,38 +470,3881 @@
 }
 
 /* ---------------------------------------------------------------------
+   Vet file descriptors for sanity
+   ------------------------------------------------------------------ */
+
+/* Return true if we're allowed to use or create this fd */
+static Bool fd_allowed(Int fd, const Char *syscall, ThreadId tid)
+{
+   if (fd < 0 || fd > VG_MAX_FD || fd == VG_(clo_logfile_fd)) {
+      VG_(message)(Vg_UserMsg, "Warning: bad use of file descriptor %d in syscall %s()",
+		   fd, syscall);
+      if (fd == VG_(clo_logfile_fd))
+	 VG_(message)(Vg_UserMsg, "   Use --logfile-fd=<number> to select an alternative "
+		      "logfile fd.");
+      if (VG_(clo_verbosity) > 1) {
+	 ExeContext *ec = VG_(get_ExeContext)(tid);
+	 VG_(pp_ExeContext)(ec);
+      }
+      return False;
+   }
+   return True;
+}
+
+
+/* ---------------------------------------------------------------------
    The Main Entertainment ...
    ------------------------------------------------------------------ */
 
-void VG_(perform_assumed_nonblocking_syscall) ( ThreadId tid )
+
+#define PRE(x)	\
+	static void before_##x(ThreadId tid, ThreadState *tst)
+#define POST(x)	\
+	static void after_##x(ThreadId tid, ThreadState *tst)
+
+#define STR(x)	#x
+#define PREALIAS(new, old)	\
+	PRE(new) __attribute__((alias(STR(before_##old))))
+#define POSTALIAS(new, old)	\
+	POST(new) __attribute__((alias(STR(after_##old))))
+
+#define SYSNO	(tst->m_eax)		/* in PRE(x)  */
+#define res	((Int)tst->m_eax)	/* in POST(x) */
+#define arg1	(tst->m_ebx)
+#define arg2	(tst->m_ecx)
+#define arg3	(tst->m_edx)
+#define arg4	(tst->m_esi)
+#define arg5	(tst->m_edi)
+#define arg6	(tst->m_ebp)
+
+PRE(exit_group)
+{
+   VG_(core_panic)("syscall exit_group() not caught by the scheduler?!");
+}
+
+PRE(exit)
+{
+   VG_(core_panic)("syscall exit() not caught by the scheduler?!");
+}
+
+PRE(clone)
+{
+   VG_(unimplemented)
+      ("clone(): not supported by Valgrind.\n   "
+       "We do now support programs linked against\n   "
+       "libpthread.so, though.  Re-run with -v and ensure that\n   "
+       "you are picking up Valgrind's implementation of libpthread.so.");
+}
+
+PRE(ptrace)
+{
+   /* long ptrace (enum __ptrace_request request, pid_t pid, 
+      void *addr, void *data); ... sort of. */
+   /* Sigh ... the /usr/include/sys/user.h on R H 6.2 doesn't 
+      define struct user_fpxregs_struct.  On the basis that it 
+      is defined as follows on my R H 7.2 (glibc-2.2.4) box, 
+      I kludge it.
+
+      struct user_fpxregs_struct
+      {
+      unsigned short int cwd;
+      unsigned short int swd;
+      unsigned short int twd;
+      unsigned short int fop;
+      long int fip;
+      long int fcs;
+      long int foo;
+      long int fos;
+      long int mxcsr;
+      long int reserved;
+      long int st_space[32];  8*16 bytes for each FP-reg = 128 bytes
+      long int xmm_space[32]; 8*16 bytes for each XMM-reg = 128 bytes
+      long int padding[56];
+      };
+   */
+   const Int sizeof_struct_user_fpxregs_struct
+      = sizeof(unsigned short) * (1 + 1 + 1 + 1) 
+      + sizeof(long int) * (1 + 1 + 1 + 1 + 1 + 1 + 32 + 32 + 56);
+
+   MAYBE_PRINTF("ptrace ( %d, %d, %p, %p )\n", arg1,arg2,arg3,arg4);
+   switch (arg1) {
+   case 12:   /* PTRACE_GETREGS */
+      SYSCALL_TRACK( pre_mem_write, tid, "ptrace(getregs)", arg4, 
+		     sizeof (struct user_regs_struct));
+      break;
+   case 14:   /* PTRACE_GETFPREGS */
+      SYSCALL_TRACK( pre_mem_write, tid, "ptrace(getfpregs)", arg4, 
+		     sizeof (struct user_fpregs_struct));
+      break;
+   case 18:   /* PTRACE_GETFPXREGS */
+      SYSCALL_TRACK( pre_mem_write, tid, "ptrace(getfpxregs)", arg4, 
+		     sizeof_struct_user_fpxregs_struct);
+      break;
+   case 1: case 2: case 3:    /* PTRACE_PEEK{TEXT,DATA,USER} */
+      SYSCALL_TRACK( pre_mem_write, tid, "ptrace(peek)", arg4, 
+		     sizeof (long));
+      break;
+   case 13:   /* PTRACE_SETREGS */
+      SYSCALL_TRACK( pre_mem_read, tid, "ptrace(setregs)", arg4, 
+		     sizeof (struct user_regs_struct));
+      break;
+   case 15:   /* PTRACE_SETFPREGS */
+      SYSCALL_TRACK( pre_mem_read, tid, "ptrace(setfpregs)", arg4, 
+		     sizeof (struct user_fpregs_struct));
+      break;
+   case 19:   /* PTRACE_SETFPXREGS */
+      SYSCALL_TRACK( pre_mem_read, tid, "ptrace(setfpxregs)", arg4, 
+		     sizeof_struct_user_fpxregs_struct);
+      break;
+   default:
+      break;
+   }
+}
+
+POST(ptrace)
+{
+   const Int sizeof_struct_user_fpxregs_struct
+      = sizeof(unsigned short) * (1 + 1 + 1 + 1) 
+      + sizeof(long int) * (1 + 1 + 1 + 1 + 1 + 1 + 32 + 32 + 56);
+
+   switch (arg1) {
+   case 12:  /* PTRACE_GETREGS */
+      VG_TRACK( post_mem_write, arg4, 
+		sizeof (struct user_regs_struct));
+      break;
+   case 14:  /* PTRACE_GETFPREGS */
+      VG_TRACK( post_mem_write, arg4, 
+		sizeof (struct user_fpregs_struct));
+      break;
+   case 18:  /* PTRACE_GETFPXREGS */
+      VG_TRACK( post_mem_write, arg4, 
+		sizeof_struct_user_fpxregs_struct);
+      break;
+   case 1: case 2: case 3:    /* PTRACE_PEEK{TEXT,DATA,USER} */
+      VG_TRACK( post_mem_write, arg4, sizeof (long));
+      break;
+   default:
+      break;
+   }
+}
+
+PRE(mount)
+{
+   MAYBE_PRINTF( "mount( %p, %p, %p )\n" ,arg1,arg2,arg3);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid,"mount(specialfile)",arg1);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid,"mount(dir)",arg2);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid,"mount(filesystemtype)",arg3);
+}
+
+PRE(umount)
+{
+   /* int umount(const char *path) */
+   MAYBE_PRINTF("umount( %p )\n", arg1);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid,"umount(path)",arg1);
+}
+
+PRE(modify_ldt)
+{
+   MAYBE_PRINTF("modify_ldt ( %d, %p, %d )\n", arg1,arg2,arg3);
+   if (arg1 == 0) {
+      /* read the LDT into ptr */
+      SYSCALL_TRACK( pre_mem_write, tid, 
+		     "modify_ldt(ptr)(func=0)", arg2, arg3 );
+   }
+   if (arg1 == 1 || arg1 == 0x11) {
+      /* write the LDT with the entry pointed at by ptr */
+      SYSCALL_TRACK( pre_mem_read, tid, 
+		     "modify_ldt(ptr)(func=1 or 0x11)", arg2, 
+		     sizeof(struct vki_modify_ldt_ldt_s) );
+   }
+   /* "do" the syscall ourselves; the kernel never sees it */
+   res = VG_(sys_modify_ldt)( tid, arg1, (void*)arg2, arg3 );
+
+   if (arg1 == 0 && !VG_(is_kerror)(res) && res > 0) {
+      VG_TRACK( post_mem_write, arg2, res );
+   }
+}
+
+PRE(setresgid)
+{
+   /* int setresgid(gid_t rgid, gid_t egid, gid_t sgid); */
+   MAYBE_PRINTF("setresgid ( %d, %d, %d )\n", arg1, arg2, arg3);
+}
+
+PRE(vhangup)
+{
+   MAYBE_PRINTF("vhangup()\n");
+}
+
+PRE(iopl)
+{
+   MAYBE_PRINTF("iopl ( %d )\n", arg1);
+}
+
+PRE(setxattr)
+{
+   MAYBE_PRINTF("setxattr ( %p, %p, %p, %d, %d )\n",
+		arg1, arg2, arg3, arg4, arg5);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "setxattr(path)", arg1 );
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "setxattr(name)", arg2 );
+   SYSCALL_TRACK( pre_mem_read, tid, "setxattr(value)", arg3, arg4 );
+}
+
+PREALIAS(lsetxattr, setxattr);
+
+PRE(fsetxattr)
+{
+   /* int fsetxattr (int filedes, const char *name,
+      const void *value, size_t size, int flags); */
+   MAYBE_PRINTF("fsetxattr ( %d, %p, %p, %d, %d )\n",
+		arg1, arg2, arg3, arg4, arg5);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "fsetxattr(name)", arg2 );
+   SYSCALL_TRACK( pre_mem_read, tid, "fsetxattr(value)", arg3, arg4 );
+}
+
+PRE(getxattr)
+{
+   MAYBE_PRINTF("getxattr ( %p, %p, %p, %d )\n", 
+		arg1,arg2,arg3, arg4);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "getxattr(path)", arg1 );
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "getxattr(name)", arg2 );
+   SYSCALL_TRACK( pre_mem_write, tid, "getxattr(value)", arg3, arg4 );
+}
+
+POST(getxattr)
+{
+   if (res > 0 && arg3 != (Addr)NULL) {
+      VG_TRACK( post_mem_write, arg3, res );
+   }
+}
+
+PREALIAS(lgetxattr, getxattr);
+POSTALIAS(lgetxattr, getxattr);
+
+PRE(fgetxattr)
+{
+   MAYBE_PRINTF("fgetxattr ( %d, %p, %p, %d )\n",
+		arg1, arg2, arg3, arg4);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "fgetxattr(name)", arg2 );
+   SYSCALL_TRACK( pre_mem_write, tid, "fgetxattr(value)", arg3, arg4 );
+}
+
+POST(fgetxattr)
+{
+   if (res > 0 && arg3 != (Addr)NULL)
+      VG_TRACK( post_mem_write, arg3, res );
+}
+
+PRE(listxattr)
+{
+   MAYBE_PRINTF("listxattr ( %p, %p, %d )\n", arg1, arg2, arg3);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "listxattr(path)", arg1 );
+   SYSCALL_TRACK( pre_mem_write, tid, "listxattr(list)", arg2, arg3 );
+}
+
+POST(listxattr)
+{
+   if (res > 0 && arg2 != (Addr)NULL)
+      VG_TRACK( post_mem_write, arg2, res );
+}
+
+PREALIAS(llistxattr, listxattr);
+POSTALIAS(llistxattr, listxattr);
+
+PRE(flistxattr)
+{
+   /* ssize_t flistxattr (int filedes, char *list, size_t size); */
+   MAYBE_PRINTF("flistxattr ( %d, %p, %d )\n", arg1, arg2, arg3);
+   SYSCALL_TRACK( pre_mem_write, tid, "listxattr(list)", arg2, arg3 );
+}
+
+POST(flistxattr)
+{
+   if (res > 0 && arg2 != (Addr)NULL)
+      VG_TRACK( post_mem_write, arg2, res );
+}
+
+PRE(removexattr)
+{
+   MAYBE_PRINTF("removexattr ( %p, %p )\n", arg1, arg2);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "listxattr(path)", arg1 );
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "listxattr(name)", arg2 );
+}
+
+PREALIAS(lremovexattr, removexattr);
+
+PRE(fremovexattr)
+{
+   MAYBE_PRINTF("removexattr ( %d, %p )\n", arg1, arg2);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "listxattr(name)", arg2 );
+}
+
+PRE(quotactl)
+{
+   MAYBE_PRINTF("quotactl (0x%x, %p, 0x%x, 0x%x )\n", 
+		arg1,arg2,arg3, arg4);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "quotactl(special)", arg2 );
+}
+
+PRE(lookup_dcookie)
+{
+   MAYBE_PRINTF("lookup_dcookie (0x%llx, %p, %d)\n",
+		arg1 | ((long long) arg2 << 32), arg3, arg4);
+   SYSCALL_TRACK( pre_mem_write, tid, "lookup_dcookie(buf)", arg3, arg4);
+}
+
+POST(lookup_dcookie)
+{
+   if (arg3 != (Addr)NULL)
+      VG_TRACK( post_mem_write, arg3, res);
+}
+
+PRE(truncate64)
+{
+   MAYBE_PRINTF("truncate64 ( %p, %lld )\n",
+		arg1, ((ULong)arg2) | (((ULong) arg3) << 32));
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "truncate64(path)", arg1 );
+}
+
+PRE(fdatasync)
+{
+   /* int fdatasync(int fd); */
+   MAYBE_PRINTF("fdatasync ( %d )\n", arg1);
+}
+
+PRE(msync)
+{
+   /* int msync(const void *start, size_t length, int flags); */
+   MAYBE_PRINTF("msync ( %p, %d, %d )\n", arg1,arg2,arg3);
+   SYSCALL_TRACK( pre_mem_read, tid, "msync(start)", arg1, arg2 );
+}
+
+struct getpmsg_strbuf {
+   int     maxlen;         /* no. of bytes in buffer */
+   int     len;            /* no. of bytes returned */
+   caddr_t buf;            /* pointer to data */
+};
+
+PRE(getpmsg)
+{
+   /* LiS getpmsg from http://www.gcom.com/home/linux/lis/ */
+   /* int getpmsg(int fd, struct strbuf *ctrl, struct strbuf *data, 
+      int *bandp, int *flagsp); */
+   struct getpmsg_strbuf *ctrl;
+   struct getpmsg_strbuf *data;
+   MAYBE_PRINTF("getpmsg ( %d, %p, %p, %p, %p )\n",
+		arg1,arg2,arg3,arg4,arg5);
+   ctrl = (struct getpmsg_strbuf *)arg2;
+   data = (struct getpmsg_strbuf *)arg3;
+   if (ctrl && ctrl->maxlen > 0)
+      SYSCALL_TRACK( pre_mem_write,tid, "getpmsg(ctrl)", 
+		     (UInt)ctrl->buf, ctrl->maxlen);
+   if (data && data->maxlen > 0)
+      SYSCALL_TRACK( pre_mem_write,tid, "getpmsg(data)", 
+		     (UInt)data->buf, data->maxlen);
+   if (arg4)
+      SYSCALL_TRACK( pre_mem_write,tid, "getpmsg(bandp)", 
+		     (UInt)arg4, sizeof(int));
+   if (arg5)
+      SYSCALL_TRACK( pre_mem_write,tid, "getpmsg(flagsp)", 
+		     (UInt)arg5, sizeof(int));
+}
+
+POST(getpmsg)
+{
+   struct getpmsg_strbuf *ctrl;
+   struct getpmsg_strbuf *data;
+
+   ctrl = (struct getpmsg_strbuf *)arg2;
+   data = (struct getpmsg_strbuf *)arg3;
+   if (res == 0 && ctrl && ctrl->len > 0) {
+      VG_TRACK( post_mem_write, (UInt)ctrl->buf, ctrl->len);
+   }
+   if (res == 0 && data && data->len > 0) {
+      VG_TRACK( post_mem_write, (UInt)data->buf, data->len);
+   }
+}
+
+PRE(putpmsg)
+{
+   /* LiS putpmsg from http://www.gcom.com/home/linux/lis/ */
+   /* int putpmsg(int fd, struct strbuf *ctrl, struct strbuf *data, 
+      int band, int flags); */
+   struct strbuf {
+      int     maxlen;         /* no. of bytes in buffer */
+      int     len;            /* no. of bytes returned */
+      caddr_t buf;            /* pointer to data */
+   };
+   struct strbuf *ctrl;
+   struct strbuf *data;
+   MAYBE_PRINTF("putpmsg ( %d, %p, %p, %d, %d )\n",
+		arg1,arg2,arg3,arg4,arg5);
+   ctrl = (struct strbuf *)arg2;
+   data = (struct strbuf *)arg3;
+   if (ctrl && ctrl->len > 0)
+      SYSCALL_TRACK( pre_mem_read,tid, "putpmsg(ctrl)",
+		     (UInt)ctrl->buf, ctrl->len);
+   if (data && data->len > 0)
+      SYSCALL_TRACK( pre_mem_read,tid, "putpmsg(data)",
+		     (UInt)data->buf, data->len);
+}
+
+PRE(getitimer)
+{
+   /* int getitimer(int which, struct itimerval *value); */
+   MAYBE_PRINTF("getitimer ( %d, %p )\n", arg1, arg2);
+   SYSCALL_TRACK( pre_mem_write, tid, "getitimer(timer)", arg2, 
+		  sizeof(struct itimerval) );
+}
+
+POST(getitimer)
+{
+   if (arg2 != (Addr)NULL) {
+      VG_TRACK( post_mem_write,arg2, sizeof(struct itimerval));
+   }
+}
+
+PRE(syslog)
+{
+   /* int syslog(int type, char *bufp, int len); */
+   MAYBE_PRINTF("syslog (%d, %p, %d)\n",arg1,arg2,arg3);
+   switch(arg1) {
+   case 2: case 3: case 4:
+      SYSCALL_TRACK( pre_mem_write, tid, "syslog(buf)", arg2, arg3);
+      break;
+   default: 
+      break;
+   }
+}
+
+POST(syslog)
+{
+   switch (arg1) {
+   case 2: case 3: case 4:
+      VG_TRACK( post_mem_write, arg2, arg3 );
+      break;
+   default:
+      break;
+   }
+}
+
+PRE(personality)
+{
+   /* int personality(unsigned long persona); */
+   MAYBE_PRINTF("personality ( %d )\n", arg1);
+}
+
+PRE(chroot)
+{
+   /* int chroot(const char *path); */
+   MAYBE_PRINTF("chroot ( %p )\n", arg1);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "chroot(path)", arg1 );
+}
+
+PRE(madvise)
+{
+   /* int madvise(void *start, size_t length, int advice ); */
+   MAYBE_PRINTF("madvise ( %p, %d, %d )\n", arg1,arg2,arg3);
+}
+
+PRE(mremap)
+{
+   /* void* mremap(void * old_address, size_t old_size, 
+      size_t new_size, unsigned long flags); */
+   MAYBE_PRINTF("mremap ( %p, %d, %d, 0x%x )\n", 
+		arg1, arg2, arg3, arg4);
+   SYSCALL_TRACK( pre_mem_write, tid, "mremap(old_address)", arg1, arg2 );
+}
+
+POST(mremap)
+{
+   mremap_segment( arg1, arg2, (Addr)res, arg3 );
+}
+
+PRE(nice)
+{
+   /* int nice(int inc); */
+   MAYBE_PRINTF("nice ( %d )\n", arg1);
+}
+
+PRE(setresgid32)
+{
+   /* int setresgid(gid_t rgid, gid_t egid, gid_t sgid); */
+   MAYBE_PRINTF("setresgid32 ( %d, %d, %d )\n", arg1, arg2, arg3);
+}
+
+PRE(setfsuid32)
+{
+   /* int setfsuid(uid_t fsuid); */
+   MAYBE_PRINTF("setfsuid ( %d )\n", arg1);
+}
+
+PRE(_sysctl)
+{
+   /* int _sysctl(struct __sysctl_args *args); */
+   MAYBE_PRINTF("_sysctl ( %p )\n", arg1 );
+   SYSCALL_TRACK( pre_mem_write, tid, "_sysctl(args)", arg1, 
+		  sizeof(struct __sysctl_args) );
+}
+
+POST(_sysctl)
+{
+   VG_TRACK( post_mem_write, arg1, sizeof(struct __sysctl_args) );
+
+}
+
+PRE(sched_getscheduler)
+{
+   /* int sched_getscheduler(pid_t pid); */
+   MAYBE_PRINTF("sched_getscheduler ( %d )\n", arg1);
+}
+
+PRE(sched_setscheduler)
+{
+   /* int sched_setscheduler(pid_t pid, int policy, 
+      const struct sched_param *p); */
+   MAYBE_PRINTF("sched_setscheduler ( %d, %d, %p )\n",arg1,arg2,arg3);
+   if (arg3 != (UInt)NULL)
+      SYSCALL_TRACK( pre_mem_read, tid,
+		     "sched_setscheduler(struct sched_param *p)", 
+		     arg3, sizeof(struct sched_param));
+}
+
+PRE(mlock)
+{
+   /* int mlock(const void * addr, size_t len) */
+   MAYBE_PRINTF("mlock ( %p, %d )\n", arg1, arg2);
+}
+
+PRE(munlock)
+{
+   /* int munlock(const void * addr, size_t len) */
+   MAYBE_PRINTF("munlock ( %p, %d )\n", arg1, arg2);
+}
+
+PRE(mlockall)
+{
+   /* int mlockall(int flags); */
+   MAYBE_PRINTF("mlockall ( %x )\n", arg1);
+}
+
+PRE(munlockall)
+{
+   /* int munlock(const void * addr, size_t len) */
+   MAYBE_PRINTF("munlock ( %p, %d )\n", arg1, arg2);
+}
+
+PRE(sched_get_priority_max)
+{
+   /* int sched_get_priority_max(int policy); */
+   MAYBE_PRINTF("sched_get_priority_max ( %d )\n", arg1);
+}
+
+PRE(sched_get_priority_min)
+{
+   /* int sched_get_priority_min(int policy); */
+   MAYBE_PRINTF("sched_get_priority_min ( %d )\n", arg1);
+}
+
+PRE(setpriority)
+{
+   /* int setpriority(int which, int who, int prio); */
+   MAYBE_PRINTF("setpriority ( %d, %d, %d )\n", arg1, arg2, arg3);
+}
+
+PRE(getpriority)
+{
+   /* int getpriority(int which, int who); */
+   MAYBE_PRINTF("getpriority ( %d, %d )\n", arg1, arg2);
+}
+
+PRE(setfsgid)
+{
+   /* int setfsgid(gid_t gid); */
+   MAYBE_PRINTF("setfsgid ( %d )\n", arg1);
+}
+
+PRE(setregid)
+{
+   /* int setregid(gid_t rgid, gid_t egid); */
+   MAYBE_PRINTF("setregid ( %d, %d )\n", arg1, arg2);
+}
+
+PRE(setresuid)
+{
+   /* int setresuid(uid_t ruid, uid_t euid, uid_t suid); */
+   MAYBE_PRINTF("setresuid ( %d, %d, %d )\n", arg1, arg2, arg3);
+}
+
+PRE(setfsuid)
+{
+   /* int setfsuid(uid_t uid); */
+   MAYBE_PRINTF("setfsuid ( %d )\n", arg1);
+}
+
+PRE(sendfile)
+{
+   /* ssize_t sendfile(int out_fd, int in_fd, off_t *offset, 
+      size_t count) */
+   MAYBE_PRINTF("sendfile ( %d, %d, %p, %d )\n",arg1,arg2,arg3,arg4);
+   if (arg3 != (UInt)NULL)
+      SYSCALL_TRACK( pre_mem_write, tid, "sendfile(offset)",
+		     arg3, sizeof(off_t) );
+}
+
+POST(sendfile)
+{
+   VG_TRACK( post_mem_write, arg3, sizeof( off_t ) );
+}
+
+PRE(sendfile64)
+{
+   /* ssize_t sendfile64(int out_df, int in_fd, loff_t *offset,
+      size_t count); */
+   MAYBE_PRINTF("sendfile64 ( %d, %d, %p, %d )\n",arg1,arg2,arg3,arg4);
+   if (arg3 != (UInt)NULL)
+      SYSCALL_TRACK( pre_mem_write, tid, "sendfile64(offset)",
+		     arg3, sizeof(loff_t) );
+}
+
+POST(sendfile64)
+{
+   if (arg3 != (UInt)NULL ) {
+      VG_TRACK( post_mem_write, arg3, sizeof(loff_t) );
+   }
+}
+
+PRE(pwrite64)
+{
+   /* ssize_t pwrite (int fd, const void *buf, size_t nbytes,
+      off_t offset); */
+   MAYBE_PRINTF("pwrite64 ( %d, %p, %d, %d )\n", arg1, arg2, arg3, arg4);
+   SYSCALL_TRACK( pre_mem_read, tid, "pwrite(buf)", arg2, arg3 );
+}
+
+PRE(sync)
+{
+   /* int sync(); */
+   MAYBE_PRINTF("sync ( )\n");
+}
+
+PRE(fstatfs)
+{
+   /* int fstatfs(int fd, struct statfs *buf); */
+   MAYBE_PRINTF("fstatfs ( %d, %p )\n",arg1,arg2);
+   SYSCALL_TRACK( pre_mem_write, tid, "stat(buf)", 
+		  arg2, sizeof(struct statfs) );
+}
+
+POST(fstatfs)
+{
+   VG_TRACK( post_mem_write, arg2, sizeof(struct statfs) );
+}
+
+PRE(getsid)
+{
+   /* pid_t getsid(pid_t pid); */
+   MAYBE_PRINTF("getsid ( %d )\n", arg1);
+}
+
+PRE(pread64)
+{
+   /* ssize_t pread64(int fd, void *buf, size_t count, off_t offset); */
+   MAYBE_PRINTF("pread ( %d, %p, %d, %d ) ...\n",arg1,arg2,arg3,arg4);
+   SYSCALL_TRACK( pre_mem_write, tid, "pread(buf)", arg2, arg3 );
+}
+
+POST(pread64)
+{
+   MAYBE_PRINTF("SYSCALL[%d]       pread ( %d, %p, %d, %d ) --> %d\n",
+		VG_(getpid)(),
+		arg1, arg2, arg3, arg4, res);
+   if (res > 0) {
+      VG_TRACK( post_mem_write, arg2, res );
+   }
+}
+
+PRE(mknod)
+{
+   /* int mknod(const char *pathname, mode_t mode, dev_t dev); */
+   MAYBE_PRINTF("mknod ( %p, 0x%x, 0x%x )\n", arg1, arg2, arg3 );
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "mknod(pathname)", arg1 );
+}
+
+PRE(flock)
+{
+   /* int flock(int fd, int operation); */
+   MAYBE_PRINTF("flock ( %d, %d )\n", arg1, arg2 );
+}
+
+PRE(init_module)
+{
+   /* int init_module(const char *name, struct module *image); */
+   MAYBE_PRINTF("init_module ( %p, %p )\n", arg1, arg2 );
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "init_module(name)", arg1 );
+   SYSCALL_TRACK( pre_mem_read, tid, "init_module(image)", arg2, 
+		  VKI_SIZEOF_STRUCT_MODULE );
+}
+
+PRE(ioperm)
+{
+   /* int ioperm(unsigned long from, unsigned long num, int turn_on); */
+   MAYBE_PRINTF("ioperm ( %d, %d, %d )\n", arg1, arg2, arg3 );
+}
+
+PRE(capget)
+{
+   /* int capget(cap_user_header_t header, cap_user_data_t data); */
+   MAYBE_PRINTF("capget ( %p, %p )\n", arg1, arg2 );
+   SYSCALL_TRACK( pre_mem_read, tid, "capget(header)", arg1, 
+		  sizeof(vki_cap_user_header_t) );
+   SYSCALL_TRACK( pre_mem_write, tid, "capget(data)", arg2, 
+		  sizeof( vki_cap_user_data_t) );
+}
+
+POST(capget)
+{
+   if (arg2 != (Addr)NULL)
+      VG_TRACK( post_mem_write, arg2, sizeof( vki_cap_user_data_t) );
+}
+
+PRE(capset)
+{
+   SYSCALL_TRACK( pre_mem_read, tid, "capset(header)", 
+		  arg1, sizeof(vki_cap_user_header_t) );
+   SYSCALL_TRACK( pre_mem_read, tid, "capset(data)", 
+		  arg2, sizeof( vki_cap_user_data_t) );
+}
+
+PRE(execve)
+{
+   /* int execve (const char *filename, 
+      char *const argv [], 
+      char *const envp[]); */
+   MAYBE_PRINTF("execve ( %p(%s), %p, %p ) --- NOT CHECKED\n", 
+		arg1, arg1, arg2, arg3);
+   /* Resistance is futile.  Nuke all other threads.  POSIX
+      mandates this. */
+   VG_(nuke_all_threads_except)( tid );
+   /* Make any binding for LD_PRELOAD disappear, so that child
+      processes don't get traced into. */
+   if (!VG_(clo_trace_children)) {
+      Int i;
+      Char** envp = (Char**)arg3;
+      Char*  ld_preload_str = NULL;
+      Char*  ld_library_path_str = NULL;
+      for (i = 0; envp[i] != NULL; i++) {
+	 if (VG_(strncmp)(envp[i], "LD_PRELOAD=", 11) == 0)
+	    ld_preload_str = &envp[i][11];
+	 if (VG_(strncmp)(envp[i], "LD_LIBRARY_PATH=", 16) == 0)
+	    ld_library_path_str = &envp[i][16];
+      }
+      VG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH)(
+	 ld_preload_str, ld_library_path_str );
+   }
+}
+
+POST(execve)
+{
+   /* Should we still be alive here?  Don't think so. */
+   /* Actually, above comment is wrong.  execve can fail, just
+      like any other syscall -- typically the file to exec does
+      not exist.  Hence: */
+   vg_assert(VG_(is_kerror)(res));
+}
+
+PRE(access)
+{
+   /* int access(const char *pathname, int mode); */
+   MAYBE_PRINTF("access ( %p, %d )\n", arg1,arg2);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "access(pathname)", arg1 );
+}
+
+PRE(alarm)
+{
+   /* unsigned int alarm(unsigned int seconds); */
+   MAYBE_PRINTF("alarm ( %d )\n", arg1);
+}
+
+PRE(brk)
+{
+   /* libc   says: int   brk(void *end_data_segment);
+      kernel says: void* brk(void* end_data_segment);  (more or less)
+
+      libc returns 0 on success, and -1 (and sets errno) on failure.
+      Nb: if you ask to shrink the dataseg end below what it
+      currently is, that always succeeds, even if the dataseg end
+      doesn't actually change (eg. brk(0)).  Unless it seg faults.
+
+      Kernel returns the new dataseg end.  If the brk() failed, this
+      will be unchanged from the old one.  That's why calling (kernel)
+      brk(0) gives the current dataseg end (libc brk() just returns
+      zero in that case).
+
+      Both will seg fault if you shrink it back into a text segment.
+   */
+   MAYBE_PRINTF("brk ( %p ) --> ",arg1);
+}
+
+POST(brk)
+{
+   MAYBE_PRINTF("0x%x\n", res);
+
+   if (res == arg1) {
+      /* brk() succeeded */
+      if (res < curr_dataseg_end) {
+         /* successfully shrunk the data segment. */
+         VG_TRACK( die_mem_brk, (Addr)arg1,
+		   curr_dataseg_end-arg1 );
+      } else
+      if (res > curr_dataseg_end && res != 0) {
+         /* successfully grew the data segment */
+         VG_TRACK( new_mem_brk, curr_dataseg_end,
+                                arg1-curr_dataseg_end );
+      }
+      curr_dataseg_end = res;
+   } else {
+      /* brk() failed */
+      vg_assert(curr_dataseg_end == res);
+   }
+}
+
+PRE(chdir)
+{
+   /* int chdir(const char *path); */
+   MAYBE_PRINTF("chdir ( %p )\n", arg1);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "chdir(path)", arg1 );
+}
+
+PRE(chmod)
+{
+   /* int chmod(const char *path, mode_t mode); */
+   MAYBE_PRINTF("chmod ( %p, %d )\n", arg1,arg2);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "chmod(path)", arg1 );
+}
+
+PRE(chown)
+{
+   /* int chown(const char *path, uid_t owner, gid_t group); */
+   MAYBE_PRINTF("chown ( %p, 0x%x, 0x%x )\n", arg1,arg2,arg3);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "chown(path)", arg1 );
+}
+
+PREALIAS(chown32, chown);
+PREALIAS(lchown32, chown);
+
+PRE(close)
+{
+   /* int close(int fd); */
+   MAYBE_PRINTF("close ( %d )\n",arg1);
+   /* Detect and negate attempts by the client to close Valgrind's
+      logfile fd ... */
+   if (!fd_allowed(arg1, "close", tid))
+      res = -VKI_EBADF;
+}
+
+
+PRE(dup)
+{
+   /* int dup(int oldfd); */
+   MAYBE_PRINTF("dup ( %d ) --> ", arg1);
+}
+
+POST(dup)
+{
+   MAYBE_PRINTF("%d\n", res);
+   if (!fd_allowed(res, "dup", tid)) {
+      VG_(close)(res);
+      res = -VKI_EMFILE;
+   }
+}
+
+PRE(dup2)
+{
+   /* int dup2(int oldfd, int newfd); */
+   MAYBE_PRINTF("dup2 ( %d, %d ) ...\n", arg1,arg2);
+   if (!fd_allowed(arg2, "dup2", tid))
+      res = -VKI_EBADF;
+}
+
+POST(dup2)
+{
+   MAYBE_PRINTF("SYSCALL[%d]       dup2 ( %d, %d ) = %d\n", 
+		VG_(getpid)(), 
+		arg1, arg2, res);
+}
+
+PRE(fcntl)
+{
+   /* int fcntl(int fd, int cmd, int arg); */
+   MAYBE_PRINTF("fcntl ( %d, %d, %d )\n",arg1,arg2,arg3);
+}
+
+PRE(fchdir)
+{
+   /* int fchdir(int fd); */
+   MAYBE_PRINTF("fchdir ( %d )\n", arg1);
+}
+
+PRE(fchown)
+{
+   /* int fchown(int filedes, uid_t owner, gid_t group); */
+   MAYBE_PRINTF("fchown ( %d, %d, %d )\n", arg1,arg2,arg3);
+}
+
+PREALIAS(fchown32, fchown);
+
+PRE(fchmod)
+{
+   /* int fchmod(int fildes, mode_t mode); */
+   MAYBE_PRINTF("fchmod ( %d, %d )\n", arg1,arg2);
+}
+
+PRE(fcntl64)
+{
+   /* I don't know what the prototype for this is supposed to be. */
+   /* ??? int fcntl(int fd, int cmd); */
+   MAYBE_PRINTF("fcntl64 (?!) ( %d, %d )\n", arg1,arg2);
+}
+
+PRE(fstat)
+{
+   /* int fstat(int filedes, struct stat *buf); */
+   MAYBE_PRINTF("fstat ( %d, %p )\n",arg1,arg2);
+   SYSCALL_TRACK( pre_mem_write, tid, "fstat", arg2, sizeof(struct stat) );
+}
+
+POST(fstat)
+{
+   VG_TRACK( post_mem_write, arg2, sizeof(struct stat) );
+}
+
+static vki_ksigset_t fork_saved_mask;
+
+PRE(fork)
+{
+   vki_ksigset_t mask;
+
+   vg_assert(VG_(gettid)() == VG_(main_pid));
+
+   /* Block all signals during fork, so that we can fix things up in
+      the child without being interrupted. */
+   VG_(ksigfillset)(&mask);
+   VG_(ksigprocmask)(VKI_SIG_SETMASK, &mask, &fork_saved_mask);
+
+   /* pid_t fork(void); */
+   MAYBE_PRINTF("fork ()\n");
+
+   do_atfork_pre(tid);
+}
+
+POST(fork)
+{
+   if (res == 0) {
+      do_atfork_child(tid);
+
+      /* I am the child.  Nuke all other threads which I might
+	 have inherited from my parent.  POSIX mandates this. */
+      VG_(nuke_all_threads_except)( tid );
+
+      /* XXX TODO: tid 1 is special, and is presumed to be present.
+	 We should move this TID to 1 in the child. */
+
+      /* restore signal mask */
+      VG_(ksigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
+   } else {
+      MAYBE_PRINTF("   fork: process %d created child %d\n", VG_(main_pid), res);
+
+      do_atfork_parent(tid);
+
+      /* restore signal mask */
+      VG_(ksigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
+   }
+}
+
+PRE(fsync)
+{
+   /* int fsync(int fd); */
+   MAYBE_PRINTF("fsync ( %d )\n", arg1);
+}
+
+PRE(ftruncate)
+{
+   /* int ftruncate(int fd, size_t length); */
+   MAYBE_PRINTF("ftruncate ( %d, %d )\n", arg1,arg2);
+}
+
+PRE(ftruncate64)
+{
+   /* int ftruncate64(int fd, off64_t length); */
+   MAYBE_PRINTF("ftruncate64 ( %d, %lld )\n", 
+		arg1,arg2|((long long) arg3 << 32));
+}
+
+PRE(getdents)
+{
+   /* int getdents(unsigned int fd, struct dirent *dirp, 
+      unsigned int count); */
+   MAYBE_PRINTF("getdents ( %d, %p, %d )\n",arg1,arg2,arg3);
+   SYSCALL_TRACK( pre_mem_write, tid, "getdents(dirp)", arg2, arg3 );
+}
+
+POST(getdents)
+{
+   if (res > 0)
+      VG_TRACK( post_mem_write, arg2, res );
+}
+
+PRE(getdents64)
+{
+   /* int getdents(unsigned int fd, struct dirent64 *dirp, 
+      unsigned int count); */
+   MAYBE_PRINTF("getdents64 ( %d, %p, %d )\n",arg1,arg2,arg3);
+   SYSCALL_TRACK( pre_mem_write, tid, "getdents64(dirp)", arg2, arg3 );
+}
+
+POST(getdents64)
+{
+   if (res > 0)
+      VG_TRACK( post_mem_write, arg2, res );
+}
+
+PRE(getgroups)
+{
+   /* int getgroups(int size, gid_t list[]); */
+   MAYBE_PRINTF("getgroups ( %d, %p )\n", arg1, arg2);
+   if (arg1 > 0)
+      SYSCALL_TRACK( pre_mem_write, tid, "getgroups(list)", arg2, 
+		     arg1 * sizeof(gid_t) );
+}
+
+POST(getgroups)
+{
+   if (arg1 > 0 && res > 0)
+      VG_TRACK( post_mem_write, arg2, res * sizeof(gid_t) );
+}
+
+PREALIAS(getgroups32, getgroups);
+POSTALIAS(getgroups32, getgroups);
+
+PRE(getcwd)
+{
+   /* char *getcwd(char *buf, size_t size);  (but see comment below) */
+   MAYBE_PRINTF("getcwd ( %p, %d )\n",arg1,arg2);
+   SYSCALL_TRACK( pre_mem_write, tid, "getcwd(buf)", arg1, arg2 );
+}
+
+POST(getcwd)
+{
+   if (res != (Addr)NULL)
+      VG_TRACK( post_mem_write, arg1, res );
+}
+
+PRE(geteuid)
+{
+   /* uid_t geteuid(void); */
+   MAYBE_PRINTF("geteuid ( )\n");
+}
+
+PRE(geteuid32)
+{
+   /* ?? uid_t geteuid32(void); */
+   MAYBE_PRINTF("geteuid32(?) ( )\n");
+}
+
+PRE(getegid)
+{
+   /* gid_t getegid(void); */
+   MAYBE_PRINTF("getegid ()\n");
+}
+
+PRE(getegid32)
+{
+   /* gid_t getegid32(void); */
+   MAYBE_PRINTF("getegid32 ()\n");
+}
+
+PRE(getgid)
+{
+   /* gid_t getgid(void); */
+   MAYBE_PRINTF("getgid ()\n");
+}
+
+PRE(getgid32)
+{
+   /* gid_t getgid32(void); */
+   MAYBE_PRINTF("getgid32 ()\n");
+}
+
+PRE(getpid)
+{
+   /* pid_t getpid(void); */
+   MAYBE_PRINTF("getpid ()\n");
+}
+
+PRE(getpgid)
+{
+   /* pid_t getpgid(pid_t pid); */
+   MAYBE_PRINTF("getpgid ( %d )\n", arg1);
+}
+
+PRE(getpgrp)
+{
+   /* pid_t getpgrp(void); */
+   MAYBE_PRINTF("getpgrp ()\n");
+}
+
+PRE(getppid)
+{
+   /* pid_t getppid(void); */
+   MAYBE_PRINTF("getppid ()\n");
+}
+
+PRE(getresgid)
+{
+   /* int getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid); */
+   MAYBE_PRINTF("getresgid ( %p, %p, %p )\n", arg1,arg2,arg3);
+   SYSCALL_TRACK( pre_mem_write, tid, "getresgid(rgid)", 
+		  arg1, sizeof(gid_t) );
+   SYSCALL_TRACK( pre_mem_write, tid, "getresgid(egid)", 
+		  arg2, sizeof(gid_t) );
+   SYSCALL_TRACK( pre_mem_write, tid, "getresgid(sgid)", 
+		  arg3, sizeof(gid_t) );
+}
+
+POST(getresgid)
+{
+   if (res == 0) {
+      VG_TRACK( post_mem_write, arg1, sizeof(gid_t) );
+      VG_TRACK( post_mem_write, arg2, sizeof(gid_t) );
+      VG_TRACK( post_mem_write, arg3, sizeof(gid_t) );
+   }
+}
+
+PRE(getresgid32)
+{
+   /* int getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid); */
+   MAYBE_PRINTF("getresgid32 ( %p, %p, %p )\n", arg1,arg2,arg3);
+   SYSCALL_TRACK( pre_mem_write, tid, "getresgid32(rgid)", 
+		  arg1, sizeof(gid_t) );
+   SYSCALL_TRACK( pre_mem_write, tid, "getresgid32(egid)", 
+		  arg2, sizeof(gid_t) );
+   SYSCALL_TRACK( pre_mem_write, tid, "getresgid32(sgid)", 
+		  arg3, sizeof(gid_t) );
+}
+
+POST(getresgid32)
+{
+   if (res == 0) {
+      VG_TRACK( post_mem_write, arg1, sizeof(gid_t) );
+      VG_TRACK( post_mem_write, arg2, sizeof(gid_t) );
+      VG_TRACK( post_mem_write, arg3, sizeof(gid_t) );
+   }
+}
+
+PRE(getresuid)
+{
+   /* int getresuid(uid_t *ruid, uid_t *euid, uid_t *suid); */
+   MAYBE_PRINTF("getresuid ( %p, %p, %p )\n", arg1,arg2,arg3);
+   SYSCALL_TRACK( pre_mem_write, tid, "getresuid(ruid)", 
+		  arg1, sizeof(uid_t) );
+   SYSCALL_TRACK( pre_mem_write, tid, "getresuid(euid)", 
+		  arg2, sizeof(uid_t) );
+   SYSCALL_TRACK( pre_mem_write, tid, "getresuid(suid)", 
+		  arg3, sizeof(uid_t) );
+}
+
+POST(getresuid)
+{
+   if (res == 0) {
+      VG_TRACK( post_mem_write, arg1, sizeof(uid_t) );
+      VG_TRACK( post_mem_write, arg2, sizeof(uid_t) );
+      VG_TRACK( post_mem_write, arg3, sizeof(uid_t) );
+   }
+}
+
+PRE(getresuid32)
+{
+   /* int getresuid(uid_t *ruid, uid_t *euid, uid_t *suid); */
+   MAYBE_PRINTF("getresuid32 ( %p, %p, %p )\n", arg1,arg2,arg3);
+   SYSCALL_TRACK( pre_mem_write, tid, "getresuid32(ruid)", 
+		  arg1, sizeof(uid_t) );
+   SYSCALL_TRACK( pre_mem_write, tid, "getresuid32(euid)", 
+		  arg2, sizeof(uid_t) );
+   SYSCALL_TRACK( pre_mem_write, tid, "getresuid32(suid)", 
+		  arg3, sizeof(uid_t) );
+}
+
+POST(getresuid32)
+{
+   if (res == 0) {
+      VG_TRACK( post_mem_write, arg1, sizeof(uid_t) );
+      VG_TRACK( post_mem_write, arg2, sizeof(uid_t) );
+      VG_TRACK( post_mem_write, arg3, sizeof(uid_t) );
+   }
+}
+
+PRE(getrlimit)
+{
+   /* int getrlimit (int resource, struct rlimit *rlim); */
+   MAYBE_PRINTF("getrlimit ( %d, %p )\n", arg1,arg2);
+   SYSCALL_TRACK( pre_mem_write, tid, "getrlimit(rlim)", arg2, 
+		  sizeof(struct rlimit) );
+}
+
+POST(getrlimit)
+{
+   if (res == 0)
+      VG_TRACK( post_mem_write, arg2, sizeof(struct rlimit) );
+}
+
+PREALIAS(ugetrlimit, getrlimit);
+POSTALIAS(ugetrlimit, getrlimit);
+
+PRE(getrusage)
+{
+   /* int getrusage (int who, struct rusage *usage); */
+   MAYBE_PRINTF("getrusage ( %d, %p )\n", arg1,arg2);
+   SYSCALL_TRACK( pre_mem_write, tid, "getrusage(usage)", arg2, 
+		  sizeof(struct rusage) );
+}
+
+POST(getrusage)
+{
+   if (res == 0)
+      VG_TRACK( post_mem_write,arg2, sizeof(struct rusage) );
+}
+
+PRE(gettimeofday)
+{
+   /* int gettimeofday(struct timeval *tv, struct timezone *tz); */
+   MAYBE_PRINTF("gettimeofday ( %p, %p )\n",arg1,arg2);
+   SYSCALL_TRACK( pre_mem_write, tid, "gettimeofday(tv)", arg1, 
+		  sizeof(struct timeval) );
+   if (arg2 != 0)
+      SYSCALL_TRACK( pre_mem_write, tid, "gettimeofday(tz)", arg2, 
+		     sizeof(struct timezone) );
+}
+
+POST(gettimeofday)
+{
+   if (res == 0) {
+      VG_TRACK( post_mem_write, arg1, sizeof(struct timeval) );
+      if (arg2 != 0)
+	 VG_TRACK( post_mem_write, arg2, sizeof(struct timezone) );
+   }
+}
+
+PRE(getuid)
+{
+   /* uid_t getuid(void); */
+   MAYBE_PRINTF("getuid ( )\n");
+}
+
+PRE(getuid32)
+{
+   /* ???uid_t getuid32(void); */
+   MAYBE_PRINTF("getuid32 ( )\n");
+}
+
+PRE(ipc)
+{
+   MAYBE_PRINTF("ipc ( %d, %d, %d, %d, %p, %d )\n",
+		arg1,arg2,arg3,arg4,arg5,arg6);
+   switch (arg1 /* call */) {
+   case 1: /* IPCOP_semop */
+      SYSCALL_TRACK( pre_mem_read, tid, "semop(sops)", arg5, 
+		     arg3 * sizeof(struct sembuf) );
+      break;
+   case 2: /* IPCOP_semget */
+   case 3: /* IPCOP_semctl */
+      break;
+   case 11: /* IPCOP_msgsnd */
+   {
+      struct msgbuf *msgp = (struct msgbuf *)arg5;
+      Int msgsz = arg3;
+
+      SYSCALL_TRACK( pre_mem_read, tid, "msgsnd(msgp->mtype)", 
+		     (UInt)&msgp->mtype, sizeof(msgp->mtype) );
+      SYSCALL_TRACK( pre_mem_read, tid, "msgsnd(msgp->mtext)", 
+		     (UInt)msgp->mtext, msgsz );
+      break;
+   }
+   case 12: /* IPCOP_msgrcv */
+   {
+      struct msgbuf *msgp;
+      Int msgsz = arg3;
+ 
+      msgp = (struct msgbuf *)deref_Addr( tid,
+					  (Addr) (&((struct ipc_kludge *)arg5)->msgp),
+					  "msgrcv(msgp)" );
+
+      SYSCALL_TRACK( pre_mem_write, tid, "msgrcv(msgp->mtype)", 
+		     (UInt)&msgp->mtype, sizeof(msgp->mtype) );
+      SYSCALL_TRACK( pre_mem_write, tid, "msgrcv(msgp->mtext)", 
+		     (UInt)msgp->mtext, msgsz );
+      break;
+   }
+   case 13: /* IPCOP_msgget */
+      break;
+   case 14: /* IPCOP_msgctl */
+   {
+      switch (arg3 /* cmd */) {
+      case IPC_STAT:
+	 SYSCALL_TRACK( pre_mem_write, tid, "msgctl(buf)", arg5, 
+			sizeof(struct msqid_ds) );
+	 break;
+      case IPC_SET:
+	 SYSCALL_TRACK( pre_mem_read, tid, "msgctl(buf)", arg5, 
+			sizeof(struct msqid_ds) );
+	 break;
+#                    if defined(IPC_64)
+      case IPC_STAT|IPC_64:
+	 SYSCALL_TRACK( pre_mem_write, tid, "msgctl(buf)", arg5, 
+			sizeof(struct msqid64_ds) );
+	 break;
+#                    endif
+#                    if defined(IPC_64)
+      case IPC_SET|IPC_64:
+	 SYSCALL_TRACK( pre_mem_read, tid, "msgctl(buf)", arg5, 
+			sizeof(struct msqid64_ds) );
+	 break;
+#                    endif
+      default:
+	 break;
+      }
+      break;
+   }
+   case 21: /* IPCOP_shmat */
+   {
+      break;
+   }
+   case 22: /* IPCOP_shmdt */
+      break;
+   case 23: /* IPCOP_shmget */
+      break;
+   case 24: /* IPCOP_shmctl */
+      /* Subject: shmctl: The True Story
+	 Date: Thu, 9 May 2002 18:07:23 +0100 (BST)
+	 From: Reuben Thomas <rrt@mupsych.org>
+	 To: Julian Seward <jseward@acm.org>
+
+	 1. As you suggested, the syscall subop is in arg1.
+
+	 2. There are a couple more twists, so the arg order
+	 is actually:
+
+	 arg1 syscall subop
+	 arg2 file desc
+	 arg3 shm operation code (can have IPC_64 set)
+	 arg4 0 ??? is arg3-arg4 a 64-bit quantity when IPC_64
+	 is defined?
+	 arg5 pointer to buffer
+
+	 3. With this in mind, I've amended the case as below:
+      */
+   {
+      UInt cmd = arg3;
+      Bool out_arg = False;
+      if ( arg5 ) {
+#                    if defined(IPC_64)
+	 cmd = cmd & (~IPC_64);
+#                    endif
+	 out_arg = cmd == SHM_STAT || cmd == IPC_STAT;
+	 if ( out_arg )
+	    SYSCALL_TRACK( pre_mem_write, tid, 
+                           "shmctl(SHM_STAT or IPC_STAT,buf)", 
+                           arg5, sizeof(struct shmid_ds) );
+	 else
+	    SYSCALL_TRACK( pre_mem_read, tid, 
+                           "shmctl(SHM_XXXX,buf)", 
+                           arg5, sizeof(struct shmid_ds) );
+      }
+   }
+   break;
+   default:
+      VG_(message)(Vg_DebugMsg,
+		   "FATAL: unhandled syscall(ipc) %d",
+		   arg1 );
+      VG_(core_panic)("... bye!\n");
+      break; /*NOTREACHED*/
+   }   
+}
+
+POST(ipc)
+{
+   switch (arg1 /* call */) {
+   case 1: /* IPCOP_semop */
+      break;
+   case 2: /* IPCOP_semget */
+   case 3: /* IPCOP_semctl */
+      break;
+   case 11: /* IPCOP_msgsnd */
+      break;
+   case 12: /* IPCOP_msgrcv */
+   {
+      struct msgbuf *msgp;
+ 
+      msgp = (struct msgbuf *)deref_Addr( tid,
+					  (Addr) (&((struct ipc_kludge *)arg5)->msgp),
+					  "msgrcv(msgp)" );
+      if ( res > 0 ) {
+	 VG_TRACK( post_mem_write, (UInt)&msgp->mtype, 
+		   sizeof(msgp->mtype) );
+	 VG_TRACK( post_mem_write, (UInt)msgp->mtext, res );
+      }
+      break;
+   }
+   case 13: /* IPCOP_msgget */
+      break;
+   case 14: /* IPCOP_msgctl */
+   {
+      switch (arg3 /* cmd */) {
+      case IPC_STAT:
+	 if ( res > 0 ) {
+	    VG_TRACK( post_mem_write, arg5, 
+		      sizeof(struct msqid_ds) );
+	 }
+	 break;
+      case IPC_SET:
+	 break;
+#                    if defined(IPC_64)
+      case IPC_STAT|IPC_64:
+	 if ( res > 0 ) {
+	    VG_TRACK( post_mem_write, arg5, 
+		      sizeof(struct msqid64_ds) );
+	 }
+	 break;
+#                    endif
+#                    if defined(IPC_64)
+      case IPC_SET|IPC_64:
+	 break;
+#                    endif
+      default:
+	 break;
+      }
+      break;
+   }
+   case 21: /* IPCOP_shmat */
+   {
+      Int shmid = arg2;
+      /*Int shmflag = arg3;*/
+      Addr addr;
+
+                  
+      /* force readability. before the syscall it is
+       * indeed uninitialized, as can be seen in
+       * glibc/sysdeps/unix/sysv/linux/shmat.c */
+      VG_TRACK( post_mem_write, arg4, sizeof( ULong ) );
+
+      addr = deref_Addr ( tid, arg4, "shmat(addr)" );
+      if ( addr > 0 ) { 
+	 UInt segmentSize = get_shm_size ( shmid );
+	 if ( segmentSize > 0 ) {
+	    /* we don't distinguish whether it's read-only or
+	     * read-write -- it doesn't matter really. */
+	    VG_TRACK( new_mem_mmap, addr, segmentSize, 
+		      True, True, False );
+	 }
+      }
+      break;
+   }
+   case 22: /* IPCOP_shmdt */
+      /* ### FIXME: this should call make_noaccess on the
+       * area passed to shmdt. But there's no way to
+       * figure out the size of the shared memory segment
+       * just from the address...  Maybe we want to keep a
+       * copy of the exiting mappings inside valgrind? */
+      break;
+   case 23: /* IPCOP_shmget */
+      break;
+   case 24: /* IPCOP_shmctl */
+      /* Subject: shmctl: The True Story
+	 Date: Thu, 9 May 2002 18:07:23 +0100 (BST)
+	 From: Reuben Thomas <rrt@mupsych.org>
+	 To: Julian Seward <jseward@acm.org>
+
+	 1. As you suggested, the syscall subop is in arg1.
+
+	 2. There are a couple more twists, so the arg order
+	 is actually:
+
+	 arg1 syscall subop
+	 arg2 file desc
+	 arg3 shm operation code (can have IPC_64 set)
+	 arg4 0 ??? is arg3-arg4 a 64-bit quantity when IPC_64
+	 is defined?
+	 arg5 pointer to buffer
+
+	 3. With this in mind, I've amended the case as below:
+      */
+   {
+      UInt cmd = arg3;
+      Bool out_arg = False;
+      if ( arg5 ) {
+#                    if defined(IPC_64)
+	 cmd = cmd & (~IPC_64);
+#                    endif
+	 out_arg = cmd == SHM_STAT || cmd == IPC_STAT;
+      }
+      if ( arg5 && res == 0 && out_arg )
+	 VG_TRACK( post_mem_write, arg5, 
+		   sizeof(struct shmid_ds) );
+   }
+   break;
+   default:
+      VG_(message)(Vg_DebugMsg,
+		   "FATAL: unhandled syscall(ipc) %d",
+		   arg1 );
+      VG_(core_panic)("... bye!\n");
+      break; /*NOTREACHED*/
+   }
+}
+
+PRE(ioctl)
+{
+   /* int ioctl(int d, int request, ...)
+      [The  "third"  argument  is traditionally char *argp, 
+      and will be so named for this discussion.]
+   */
+   /*
+     VG_(message)(
+     Vg_DebugMsg, 
+     "is an IOCTL,  request = 0x%x,   d = %d,   argp = 0x%x", 
+     arg2,arg1,arg3);
+   */
+   MAYBE_PRINTF("ioctl ( %d, 0x%x, %p )\n",arg1,arg2,arg3);
+   switch (arg2 /* request */) {
+   case TCSETS:
+   case TCSETSW:
+   case TCSETSF:
+      SYSCALL_TRACK( pre_mem_read, tid, "ioctl(TCSET{S,SW,SF})", arg3, 
+		     VKI_SIZEOF_STRUCT_TERMIOS );
+      break; 
+   case TCGETS:
+      SYSCALL_TRACK( pre_mem_write, tid, "ioctl(TCGETS)", arg3, 
+		     VKI_SIZEOF_STRUCT_TERMIOS );
+      break;
+   case TCSETA:
+   case TCSETAW:
+   case TCSETAF:
+      SYSCALL_TRACK( pre_mem_read, tid, "ioctl(TCSET{A,AW,AF})", arg3,
+		     VKI_SIZEOF_STRUCT_TERMIO );
+      break;
+   case TCGETA:
+      SYSCALL_TRACK( pre_mem_write, tid, "ioctl(TCGETA)", arg3,
+		     VKI_SIZEOF_STRUCT_TERMIO );
+      break;
+   case TCSBRK:
+   case TCXONC:
+   case TCSBRKP:
+   case TCFLSH:
+      /* These just take an int by value */
+      break;
+   case TIOCGWINSZ:
+      SYSCALL_TRACK( pre_mem_write, tid, "ioctl(TIOCGWINSZ)", arg3, 
+		     sizeof(struct winsize) );
+      break;
+   case TIOCSWINSZ:
+      SYSCALL_TRACK( pre_mem_read, tid, "ioctl(TIOCSWINSZ)", arg3, 
+		     sizeof(struct winsize) );
+      break;
+   case TIOCLINUX:
+      SYSCALL_TRACK( pre_mem_read, tid, "ioctl(TIOCLINUX)", arg3, 
+		     sizeof(char *) );
+      if (*(char *)arg3 == 11) {
+	 SYSCALL_TRACK( pre_mem_read, tid, "ioctl(TIOCLINUX, 11)", 
+			arg3, 2 * sizeof(char *) );
+      }
+      break;
+   case TIOCGPGRP:
+      /* Get process group ID for foreground processing group. */
+      SYSCALL_TRACK( pre_mem_write, tid, "ioctl(TIOCGPGRP)", arg3,
+		     sizeof(pid_t) );
+      break;
+   case TIOCSPGRP:
+      /* Set a process group ID? */
+      SYSCALL_TRACK( pre_mem_write, tid, "ioctl(TIOCGPGRP)", arg3,
+		     sizeof(pid_t) );
+      break;
+   case TIOCGPTN: /* Get Pty Number (of pty-mux device) */
+      SYSCALL_TRACK( pre_mem_write, tid, "ioctl(TIOCGPTN)", 
+		     arg3, sizeof(int) );
+      break;
+   case TIOCSCTTY:
+      /* Just takes an int value.  */
+      break;
+   case TIOCSPTLCK: /* Lock/unlock Pty */
+      SYSCALL_TRACK( pre_mem_read, tid, "ioctl(TIOCSPTLCK)", 
+		     arg3, sizeof(int) );
+      break;
+   case FIONBIO:
+      SYSCALL_TRACK( pre_mem_read, tid, "ioctl(FIONBIO)", 
+		     arg3, sizeof(int) );
+      break;
+   case FIOASYNC:
+      SYSCALL_TRACK( pre_mem_read, tid, "ioctl(FIOASYNC)", 
+		     arg3, sizeof(int) );
+      break;
+   case FIONREAD:                /* identical to SIOCINQ */
+      SYSCALL_TRACK( pre_mem_write, tid, "ioctl(FIONREAD)", 
+		     arg3, sizeof(int) );
+      break;
+
+      /* If you get compilation problems here, change the #if
+	 1 to #if 0 and get rid of <scsi/sg.h> in
+	 vg_unsafe.h. */
+#       if 1
+   case SG_SET_COMMAND_Q:
+      SYSCALL_TRACK( pre_mem_read, tid, "ioctl(SG_SET_COMMAND_Q)", 
+		     arg3, sizeof(int) );
+      break;
+#           if defined(SG_IO)
+   case SG_IO:
+      SYSCALL_TRACK( pre_mem_write, tid, "ioctl(SG_IO)", arg3, 
+		     sizeof(struct sg_io_hdr) );
+      break;
+#           endif /* SG_IO */
+   case SG_GET_SCSI_ID:
+      /* Note: sometimes sg_scsi_id is called sg_scsi_id_t */
+      SYSCALL_TRACK( pre_mem_write, tid, "ioctl(SG_GET_SCSI_ID)", arg3, 
+		     sizeof(struct sg_scsi_id) );
+      break;
+   case SG_SET_RESERVED_SIZE:
+      SYSCALL_TRACK( pre_mem_read, tid, "ioctl(SG_SET_RESERVED_SIZE)", 
+		     arg3, sizeof(int) );
+      break;
+   case SG_SET_TIMEOUT:
+      SYSCALL_TRACK( pre_mem_read, tid, "ioctl(SG_SET_TIMEOUT)", arg3, 
+		     sizeof(int) );
+      break;
+   case SG_GET_RESERVED_SIZE:
+      SYSCALL_TRACK( pre_mem_write, tid, 
+		     "ioctl(SG_GET_RESERVED_SIZE)", arg3, 
+		     sizeof(int) );
+      break;
+   case SG_GET_TIMEOUT:
+      SYSCALL_TRACK( pre_mem_write, tid, "ioctl(SG_GET_TIMEOUT)", arg3, 
+		     sizeof(int) );
+      break;
+   case SG_GET_VERSION_NUM:
+      SYSCALL_TRACK( pre_mem_read, tid, "ioctl(SG_GET_VERSION_NUM)", 
+		     arg3, sizeof(int) );
+      break;
+#       endif
+
+   case IIOCGETCPS:
+      /* In early 2.4 kernels, ISDN_MAX_CHANNELS was only defined
+       * when KERNEL was. I never saw a larger value than 64 though */
+#              ifndef ISDN_MAX_CHANNELS
+#              define ISDN_MAX_CHANNELS 64
+#              endif
+      SYSCALL_TRACK( pre_mem_write, tid, "ioctl(IIOCGETCPS)", arg3,
+		     ISDN_MAX_CHANNELS 
+		     * 2 * sizeof(unsigned long) );
+      break;
+   case IIOCNETGPN:
+      SYSCALL_TRACK( pre_mem_read, tid, "ioctl(IIOCNETGPN)",
+		     (UInt)&((isdn_net_ioctl_phone *)arg3)->name,
+		     sizeof(((isdn_net_ioctl_phone *)arg3)->name) );
+      SYSCALL_TRACK( pre_mem_write, tid, "ioctl(IIOCNETGPN)", arg3,
+		     sizeof(isdn_net_ioctl_phone) );
+      break;
+
+      /* These all use struct ifreq AFAIK */
+   case SIOCGIFINDEX:
+   case SIOCGIFFLAGS:        /* get flags                    */
+   case SIOCGIFHWADDR:       /* Get hardware address         */
+   case SIOCGIFMTU:          /* get MTU size                 */
+   case SIOCGIFADDR:         /* get PA address               */
+   case SIOCGIFNETMASK:      /* get network PA mask          */
+   case SIOCGIFMETRIC:       /* get metric                   */
+   case SIOCGIFMAP:          /* Get device parameters        */
+   case SIOCGIFTXQLEN:       /* Get the tx queue length      */
+   case SIOCGIFDSTADDR:      /* get remote PA address        */
+   case SIOCGIFBRDADDR:      /* get broadcast PA address     */
+   case SIOCGIFNAME:         /* get iface name               */
+      SYSCALL_TRACK( pre_mem_write,tid, "ioctl(SIOCGIFINDEX)", arg3, 
+		     sizeof(struct ifreq));
+      break;
+   case SIOCGIFCONF:         /* get iface list               */
+      /* WAS:
+	 SYSCALL_TRACK( pre_mem_write,"ioctl(SIOCGIFCONF)", arg3, 
+	 sizeof(struct ifconf));
+	 KERNEL_DO_SYSCALL(tid,res);
+	 if (!VG_(is_kerror)(res) && res == 0)
+	 VG_TRACK( post_mem_write,arg3, sizeof(struct ifconf));
+      */
+      SYSCALL_TRACK( pre_mem_read,tid, "ioctl(SIOCGIFCONF)", arg3, 
+		     sizeof(struct ifconf));
+      if ( arg3 ) {
+	 // TODO len must be readable and writable
+	 // buf pointer only needs to be readable
+	 struct ifconf *ifc = (struct ifconf *) arg3;
+	 SYSCALL_TRACK( pre_mem_write,tid, "ioctl(SIOCGIFCONF).ifc_buf",
+			(Addr)(ifc->ifc_buf), (UInt)(ifc->ifc_len) );
+      }
+      break;
+   case SIOCGSTAMP:
+      SYSCALL_TRACK( pre_mem_write,tid, "ioctl(SIOCGSTAMP)", arg3, 
+		     sizeof(struct timeval));
+      break;
+      /* SIOCOUTQ is an ioctl that, when called on a socket, returns
+	 the number of bytes currently in that socket's send buffer.
+	 It writes this value as an int to the memory location
+	 indicated by the third argument of ioctl(2). */
+   case SIOCOUTQ:
+      SYSCALL_TRACK( pre_mem_write,tid, "ioctl(SIOCOUTQ)", arg3, 
+		     sizeof(int));
+      break;
+   case SIOCGRARP:           /* get RARP table entry         */
+   case SIOCGARP:            /* get ARP table entry          */
+      SYSCALL_TRACK( pre_mem_write,tid, "ioctl(SIOCGARP)", arg3, 
+		     sizeof(struct arpreq));
+      break;
+                    
+   case SIOCSIFFLAGS:        /* set flags                    */
+   case SIOCSIFMAP:          /* Set device parameters        */
+   case SIOCSIFTXQLEN:       /* Set the tx queue length      */
+   case SIOCSIFDSTADDR:      /* set remote PA address        */
+   case SIOCSIFBRDADDR:      /* set broadcast PA address     */
+   case SIOCSIFNETMASK:      /* set network PA mask          */
+   case SIOCSIFMETRIC:       /* set metric                   */
+   case SIOCSIFADDR:         /* set PA address               */
+   case SIOCSIFMTU:          /* set MTU size                 */
+   case SIOCSIFHWADDR:       /* set hardware address         */
+      SYSCALL_TRACK( pre_mem_read,tid,"ioctl(SIOCSIFFLAGS)", arg3, 
+		     sizeof(struct ifreq));
+      break;
+      /* Routing table calls.  */
+   case SIOCADDRT:           /* add routing table entry      */
+   case SIOCDELRT:           /* delete routing table entry   */
+      SYSCALL_TRACK( pre_mem_read,tid,"ioctl(SIOCADDRT/DELRT)", arg3, 
+		     sizeof(struct rtentry));
+      break;
+
+      /* RARP cache control calls. */
+   case SIOCDRARP:           /* delete RARP table entry      */
+   case SIOCSRARP:           /* set RARP table entry         */
+      /* ARP cache control calls. */
+   case SIOCSARP:            /* set ARP table entry          */
+   case SIOCDARP:            /* delete ARP table entry       */
+      SYSCALL_TRACK( pre_mem_read,tid, "ioctl(SIOCSIFFLAGS)", arg3, 
+		     sizeof(struct ifreq));
+      break;
+
+   case SIOCSPGRP:
+      SYSCALL_TRACK( pre_mem_read, tid, "ioctl(SIOCSPGRP)", arg3, 
+		     sizeof(int) );
+      break;
+
+      /* linux/soundcard interface (OSS) */
+   case SNDCTL_SEQ_GETOUTCOUNT:
+   case SNDCTL_SEQ_GETINCOUNT:
+   case SNDCTL_SEQ_PERCMODE:
+   case SNDCTL_SEQ_TESTMIDI:
+   case SNDCTL_SEQ_RESETSAMPLES:
+   case SNDCTL_SEQ_NRSYNTHS:
+   case SNDCTL_SEQ_NRMIDIS:
+   case SNDCTL_SEQ_GETTIME:
+   case SNDCTL_DSP_GETFMTS:
+   case SNDCTL_DSP_GETTRIGGER:
+   case SNDCTL_DSP_GETODELAY:
+#           if defined(SNDCTL_DSP_GETSPDIF)
+   case SNDCTL_DSP_GETSPDIF:
+#           endif
+   case SNDCTL_DSP_GETCAPS:
+   case SOUND_PCM_READ_RATE:
+   case SOUND_PCM_READ_CHANNELS:
+   case SOUND_PCM_READ_BITS:
+   case (SOUND_PCM_READ_BITS|0x40000000): /* what the fuck ? */
+   case SOUND_PCM_READ_FILTER:
+      SYSCALL_TRACK( pre_mem_write,tid,
+		     "ioctl(SNDCTL_XXX|SOUND_XXX (SIOR, int))", 
+		     arg3,
+		     sizeof(int));
+      break;
+   case SNDCTL_SEQ_CTRLRATE:
+   case SNDCTL_DSP_SPEED:
+   case SNDCTL_DSP_STEREO:
+   case SNDCTL_DSP_GETBLKSIZE: 
+   case SNDCTL_DSP_CHANNELS:
+   case SOUND_PCM_WRITE_FILTER:
+   case SNDCTL_DSP_SUBDIVIDE:
+   case SNDCTL_DSP_SETFRAGMENT:
+#           if defined(SNDCTL_DSP_GETCHANNELMASK)
+   case SNDCTL_DSP_GETCHANNELMASK:
+#           endif
+#           if defined(SNDCTL_DSP_BIND_CHANNEL)
+   case SNDCTL_DSP_BIND_CHANNEL:
+#           endif
+   case SNDCTL_TMR_TIMEBASE:
+   case SNDCTL_TMR_TEMPO:
+   case SNDCTL_TMR_SOURCE:
+   case SNDCTL_MIDI_PRETIME:
+   case SNDCTL_MIDI_MPUMODE:
+      SYSCALL_TRACK( pre_mem_read,tid, "ioctl(SNDCTL_XXX|SOUND_XXX "
+		     "(SIOWR, int))", 
+		     arg3, sizeof(int));
+      SYSCALL_TRACK( pre_mem_write,tid, "ioctl(SNDCTL_XXX|SOUND_XXX "
+		     "(SIOWR, int))", 
+		     arg3, sizeof(int));
+      break;
+   case SNDCTL_DSP_GETOSPACE:
+   case SNDCTL_DSP_GETISPACE:
+      SYSCALL_TRACK( pre_mem_write,tid, 
+		     "ioctl(SNDCTL_XXX|SOUND_XXX "
+		     "(SIOR, audio_buf_info))", arg3,
+		     sizeof(audio_buf_info));
+      break;
+   case SNDCTL_DSP_SETTRIGGER:
+      SYSCALL_TRACK( pre_mem_read,tid, 
+		     "ioctl(SNDCTL_XXX|SOUND_XXX (SIOW, int))", 
+		     arg3, sizeof(int));
+      break;
+
+   case SNDCTL_DSP_POST:
+   case SNDCTL_DSP_RESET:
+   case SNDCTL_DSP_SYNC:
+   case SNDCTL_DSP_SETSYNCRO:
+   case SNDCTL_DSP_SETDUPLEX:
+      break;
+
+      /* Real Time Clock (/dev/rtc) ioctls */
+#           ifndef GLIBC_2_1
+   case RTC_UIE_ON:
+   case RTC_UIE_OFF:
+   case RTC_AIE_ON:
+   case RTC_AIE_OFF:
+   case RTC_PIE_ON:
+   case RTC_PIE_OFF:
+   case RTC_IRQP_SET:
+      break;
+   case RTC_RD_TIME:
+   case RTC_ALM_READ:
+      SYSCALL_TRACK( pre_mem_write,tid, "ioctl(RTC_RD_TIME/ALM_READ)", 
+		     arg3, sizeof(struct rtc_time));
+      break;
+   case RTC_ALM_SET:
+      SYSCALL_TRACK( pre_mem_read,tid, "ioctl(RTC_ALM_SET)", arg3,
+		     sizeof(struct rtc_time));
+      break;
+   case RTC_IRQP_READ:
+      SYSCALL_TRACK( pre_mem_write,tid, "ioctl(RTC_IRQP_READ)", arg3,
+		     sizeof(unsigned long));
+      break;
+#           endif /* GLIBC_2_1 */
+
+#           ifdef BLKGETSIZE
+   case BLKGETSIZE:
+      SYSCALL_TRACK( pre_mem_write,tid, "ioctl(BLKGETSIZE)", arg3,
+		     sizeof(unsigned long));
+      break;
+#           endif /* BLKGETSIZE */
+
+      /* CD ROM stuff (??)  */
+   case CDROMSUBCHNL:
+      SYSCALL_TRACK( pre_mem_read,tid, 
+		     "ioctl(CDROMSUBCHNL (cdsc_format, char))",
+		     (int) &(((struct cdrom_subchnl *) arg3)->cdsc_format), 
+		     sizeof(((struct cdrom_subchnl *) arg3)->cdsc_format));
+      SYSCALL_TRACK( pre_mem_write,tid, 
+		     "ioctl(CDROMSUBCHNL)", arg3, 
+		     sizeof(struct cdrom_subchnl));
+      break;
+   case CDROMREADTOCHDR:
+      SYSCALL_TRACK( pre_mem_write,tid, 
+		     "ioctl(CDROMREADTOCHDR)", arg3, 
+		     sizeof(struct cdrom_tochdr));
+      break;
+   case CDROMREADTOCENTRY:
+      SYSCALL_TRACK( pre_mem_read,tid, 
+		     "ioctl(CDROMREADTOCENTRY (cdte_format, char))",
+		     (int) &(((struct cdrom_tocentry *) arg3)->cdte_format), 
+		     sizeof(((struct cdrom_tocentry *) arg3)->cdte_format));
+      SYSCALL_TRACK( pre_mem_read,tid, 
+		     "ioctl(CDROMREADTOCENTRY (cdte_track, char))",
+		     (int) &(((struct cdrom_tocentry *) arg3)->cdte_track), 
+		     sizeof(((struct cdrom_tocentry *) arg3)->cdte_track));
+      SYSCALL_TRACK( pre_mem_write,tid, 
+		     "ioctl(CDROMREADTOCENTRY)", arg3, 
+		     sizeof(struct cdrom_tocentry));
+      break;
+   case CDROMPLAYMSF:
+      SYSCALL_TRACK( pre_mem_read,tid, "ioctl(CDROMPLAYMSF)", arg3, 
+		     sizeof(struct cdrom_msf));
+      break;
+      /* The following two are probably bogus (should check args
+	 for readability).  JRS 20021117 */
+   case CDROM_DRIVE_STATUS: /* 0x5326 */
+   case CDROM_CLEAR_OPTIONS: /* 0x5321 */
+      break;
+
+      /* We don't have any specific information on it, so
+	 try to do something reasonable based on direction and
+	 size bits.  The encoding scheme is described in
+	 /usr/include/asm/ioctl.h.  
+
+	 According to Simon Hausmann, _IOC_READ means the kernel
+	 writes a value to the ioctl value passed from the user
+	 space and the other way around with _IOC_WRITE. */
+   default: {
+      UInt dir  = _IOC_DIR(arg2);
+      UInt size = _IOC_SIZE(arg2);
+      if (VG_(strstr)(VG_(clo_weird_hacks), "lax-ioctls") != NULL) {
+	 /* 
+	  * Be very lax about ioctl handling; the only
+	  * assumption is that the size is correct. Doesn't
+	  * require the full buffer to be initialized when
+	  * writing.  Without this, using some device
+	  * drivers with a large number of strange ioctl
+	  * commands becomes very tiresome.
+	  */
+      } else if (/* size == 0 || */ dir == _IOC_NONE) {
+	 static Int moans = 3;
+	 if (moans > 0) {
+	    moans--;
+	    VG_(message)(Vg_UserMsg, 
+			 "Warning: noted but unhandled ioctl 0x%x"
+			 " with no size/direction hints",
+			 arg2); 
+	    VG_(message)(Vg_UserMsg, 
+			 "   This could cause spurious value errors"
+			 " to appear.");
+	    VG_(message)(Vg_UserMsg, 
+			 "   See README_MISSING_SYSCALL_OR_IOCTL for "
+			 "guidance on writing a proper wrapper." );
+	 }
+      } else {
+	 if ((dir & _IOC_WRITE) && size > 0)
+	    SYSCALL_TRACK( pre_mem_read,tid, "ioctl(generic)", 
+			   arg3, size);
+	 if ((dir & _IOC_READ) && size > 0)
+	    SYSCALL_TRACK( pre_mem_write,tid, "ioctl(generic)", 
+			   arg3, size);
+      }
+      break;
+   }
+   }   
+}
+
+POST(ioctl)
+{
+   /* int ioctl(int d, int request, ...)
+      [The  "third"  argument  is traditionally char *argp, 
+      and will be so named for this discussion.]
+   */
+   /*
+     VG_(message)(
+     Vg_DebugMsg, 
+     "is an IOCTL,  request = 0x%x,   d = %d,   argp = 0x%x", 
+     arg2,arg1,arg3);
+   */
+   MAYBE_PRINTF("ioctl ( %d, 0x%x, %p )\n",arg1,arg2,arg3);
+   switch (arg2 /* request */) {
+   case TCSETS:
+   case TCSETSW:
+   case TCSETSF:
+      break; 
+   case TCGETS:
+      if (res == 0)
+	 VG_TRACK( post_mem_write, arg3, VKI_SIZEOF_STRUCT_TERMIOS );
+      break;
+   case TCSETA:
+   case TCSETAW:
+   case TCSETAF:
+      break;
+   case TCGETA:
+      if (res == 0)
+	 VG_TRACK( post_mem_write, arg3, VKI_SIZEOF_STRUCT_TERMIO );
+      break;
+   case TCSBRK:
+   case TCXONC:
+   case TCSBRKP:
+   case TCFLSH:
+      break;
+   case TIOCGWINSZ:
+      if (res == 0)
+	 VG_TRACK( post_mem_write, arg3, sizeof(struct winsize) );
+      break;
+   case TIOCSWINSZ:
+      break;
+   case TIOCLINUX:
+      if (res == 0)
+	 VG_TRACK( post_mem_write, arg3, sizeof(char *) );
+      break;
+   case TIOCGPGRP:
+      /* Get process group ID for foreground processing group. */
+      if (res == 0)
+	 VG_TRACK( post_mem_write, arg3, sizeof(pid_t) );
+      break;
+   case TIOCSPGRP:
+      /* Set a process group ID? */
+      if (res == 0)
+	 VG_TRACK( post_mem_write, arg3, sizeof(pid_t) );
+      break;
+   case TIOCGPTN: /* Get Pty Number (of pty-mux device) */
+      if (res == 0)
+	 VG_TRACK( post_mem_write, arg3, sizeof(int));
+      break;
+   case TIOCSCTTY:
+      break;
+   case TIOCSPTLCK: /* Lock/unlock Pty */
+      break;
+   case FIONBIO:
+      break;
+   case FIOASYNC:
+      break;
+   case FIONREAD:                /* identical to SIOCINQ */
+      if (res == 0)
+	 VG_TRACK( post_mem_write, arg3, sizeof(int) );
+      break;
+
+      /* If you get compilation problems here, change the #if
+	 1 to #if 0 and get rid of <scsi/sg.h> in
+	 vg_unsafe.h. */
+#       if 1
+   case SG_SET_COMMAND_Q:
+      break;
+#           if defined(SG_IO)
+   case SG_IO:
+      if (res == 0)
+	 VG_TRACK( post_mem_write,arg3, sizeof(struct sg_io_hdr));
+      break;
+#           endif /* SG_IO */
+   case SG_GET_SCSI_ID:
+      if (res == 0)
+	 VG_TRACK( post_mem_write,arg3, sizeof(struct sg_scsi_id));
+      break;
+   case SG_SET_RESERVED_SIZE:
+      break;
+   case SG_SET_TIMEOUT:
+      break;
+   case SG_GET_RESERVED_SIZE:
+      if (res == 0)
+	 VG_TRACK( post_mem_write,arg3, sizeof(int));
+      break;
+   case SG_GET_TIMEOUT:
+      if (res == 0)
+	 VG_TRACK( post_mem_write,arg3, sizeof(int));
+      break;
+   case SG_GET_VERSION_NUM:
+      break;
+#       endif
+
+   case IIOCGETCPS:
+      /* In early 2.4 kernels, ISDN_MAX_CHANNELS was only defined
+       * when KERNEL was. I never saw a larger value than 64 though */
+#              ifndef ISDN_MAX_CHANNELS
+#              define ISDN_MAX_CHANNELS 64
+#              endif
+      if (res == 0)
+	 VG_TRACK( post_mem_write, arg3, ISDN_MAX_CHANNELS 
+		   * 2 * sizeof(unsigned long) );
+      break;
+   case IIOCNETGPN:
+      if (res == 0)
+	 VG_TRACK( post_mem_write, arg3, sizeof(isdn_net_ioctl_phone) );
+      break;
+
+      /* These all use struct ifreq AFAIK */
+   case SIOCGIFINDEX:
+   case SIOCGIFFLAGS:        /* get flags                    */
+   case SIOCGIFHWADDR:       /* Get hardware address         */
+   case SIOCGIFMTU:          /* get MTU size                 */
+   case SIOCGIFADDR:         /* get PA address               */
+   case SIOCGIFNETMASK:      /* get network PA mask          */
+   case SIOCGIFMETRIC:       /* get metric                   */
+   case SIOCGIFMAP:          /* Get device parameters        */
+   case SIOCGIFTXQLEN:       /* Get the tx queue length      */
+   case SIOCGIFDSTADDR:      /* get remote PA address        */
+   case SIOCGIFBRDADDR:      /* get broadcast PA address     */
+   case SIOCGIFNAME:         /* get iface name               */
+      if (res == 0)
+	 VG_TRACK( post_mem_write,arg3, sizeof(struct ifreq));
+      break;
+   case SIOCGIFCONF:         /* get iface list               */
+      /* WAS:
+	 SYSCALL_TRACK( pre_mem_write,"ioctl(SIOCGIFCONF)", arg3, 
+	 sizeof(struct ifconf));
+	 KERNEL_DO_SYSCALL(tid,res);
+	 if (!VG_(is_kerror)(res) && res == 0)
+	 VG_TRACK( post_mem_write,arg3, sizeof(struct ifconf));
+      */
+      if (res == 0 && arg3 ) {
+	 struct ifconf *ifc = (struct ifconf *) arg3;
+	 if (ifc->ifc_buf != NULL)
+	    VG_TRACK( post_mem_write, (Addr)(ifc->ifc_buf), 
+		      (UInt)(ifc->ifc_len) );
+      }
+      break;
+   case SIOCGSTAMP:
+      if (res == 0)
+	 VG_TRACK( post_mem_write,arg3, sizeof(struct timeval));
+      break;
+      /* SIOCOUTQ is an ioctl that, when called on a socket, returns
+	 the number of bytes currently in that socket's send buffer.
+	 It writes this value as an int to the memory location
+	 indicated by the third argument of ioctl(2). */
+   case SIOCOUTQ:
+      if (res == 0)
+	 VG_TRACK( post_mem_write,arg3, sizeof(int));
+      break;
+   case SIOCGRARP:           /* get RARP table entry         */
+   case SIOCGARP:            /* get ARP table entry          */
+      if (res == 0)
+	 VG_TRACK( post_mem_write,arg3, sizeof(struct arpreq));
+      break;
+                    
+   case SIOCSIFFLAGS:        /* set flags                    */
+   case SIOCSIFMAP:          /* Set device parameters        */
+   case SIOCSIFTXQLEN:       /* Set the tx queue length      */
+   case SIOCSIFDSTADDR:      /* set remote PA address        */
+   case SIOCSIFBRDADDR:      /* set broadcast PA address     */
+   case SIOCSIFNETMASK:      /* set network PA mask          */
+   case SIOCSIFMETRIC:       /* set metric                   */
+   case SIOCSIFADDR:         /* set PA address               */
+   case SIOCSIFMTU:          /* set MTU size                 */
+   case SIOCSIFHWADDR:       /* set hardware address         */
+      break;
+      /* Routing table calls.  */
+   case SIOCADDRT:           /* add routing table entry      */
+   case SIOCDELRT:           /* delete routing table entry   */
+      break;
+
+      /* RARP cache control calls. */
+   case SIOCDRARP:           /* delete RARP table entry      */
+   case SIOCSRARP:           /* set RARP table entry         */
+      /* ARP cache control calls. */
+   case SIOCSARP:            /* set ARP table entry          */
+   case SIOCDARP:            /* delete ARP table entry       */
+      break;
+
+   case SIOCSPGRP:
+      break;
+
+      /* linux/soundcard interface (OSS) */
+   case SNDCTL_SEQ_GETOUTCOUNT:
+   case SNDCTL_SEQ_GETINCOUNT:
+   case SNDCTL_SEQ_PERCMODE:
+   case SNDCTL_SEQ_TESTMIDI:
+   case SNDCTL_SEQ_RESETSAMPLES:
+   case SNDCTL_SEQ_NRSYNTHS:
+   case SNDCTL_SEQ_NRMIDIS:
+   case SNDCTL_SEQ_GETTIME:
+   case SNDCTL_DSP_GETFMTS:
+   case SNDCTL_DSP_GETTRIGGER:
+   case SNDCTL_DSP_GETODELAY:
+#           if defined(SNDCTL_DSP_GETSPDIF)
+   case SNDCTL_DSP_GETSPDIF:
+#           endif
+   case SNDCTL_DSP_GETCAPS:
+   case SOUND_PCM_READ_RATE:
+   case SOUND_PCM_READ_CHANNELS:
+   case SOUND_PCM_READ_BITS:
+   case (SOUND_PCM_READ_BITS|0x40000000): /* what the fuck ? */
+   case SOUND_PCM_READ_FILTER:
+      if (res == 0)
+	 VG_TRACK( post_mem_write,arg3, sizeof(int));
+      break;
+   case SNDCTL_SEQ_CTRLRATE:
+   case SNDCTL_DSP_SPEED:
+   case SNDCTL_DSP_STEREO:
+   case SNDCTL_DSP_GETBLKSIZE: 
+   case SNDCTL_DSP_CHANNELS:
+   case SOUND_PCM_WRITE_FILTER:
+   case SNDCTL_DSP_SUBDIVIDE:
+   case SNDCTL_DSP_SETFRAGMENT:
+#           if defined(SNDCTL_DSP_GETCHANNELMASK)
+   case SNDCTL_DSP_GETCHANNELMASK:
+#           endif
+#           if defined(SNDCTL_DSP_BIND_CHANNEL)
+   case SNDCTL_DSP_BIND_CHANNEL:
+#           endif
+   case SNDCTL_TMR_TIMEBASE:
+   case SNDCTL_TMR_TEMPO:
+   case SNDCTL_TMR_SOURCE:
+   case SNDCTL_MIDI_PRETIME:
+   case SNDCTL_MIDI_MPUMODE:
+      break;
+   case SNDCTL_DSP_GETOSPACE:
+   case SNDCTL_DSP_GETISPACE:
+      if (res == 0)
+	 VG_TRACK( post_mem_write,arg3, sizeof(audio_buf_info));
+      break;
+   case SNDCTL_DSP_SETTRIGGER:
+      break;
+
+   case SNDCTL_DSP_POST:
+   case SNDCTL_DSP_RESET:
+   case SNDCTL_DSP_SYNC:
+   case SNDCTL_DSP_SETSYNCRO:
+   case SNDCTL_DSP_SETDUPLEX:
+      break;
+
+      /* Real Time Clock (/dev/rtc) ioctls */
+#           ifndef GLIBC_2_1
+   case RTC_UIE_ON:
+   case RTC_UIE_OFF:
+   case RTC_AIE_ON:
+   case RTC_AIE_OFF:
+   case RTC_PIE_ON:
+   case RTC_PIE_OFF:
+   case RTC_IRQP_SET:
+      break;
+   case RTC_RD_TIME:
+   case RTC_ALM_READ:
+      if (res == 0)
+	 VG_TRACK( post_mem_write,arg3, sizeof(struct rtc_time));
+      break;
+   case RTC_ALM_SET:
+      break;
+   case RTC_IRQP_READ:
+      if(res == 0)
+	 VG_TRACK( post_mem_write,arg3, sizeof(unsigned long));
+      break;
+#           endif /* GLIBC_2_1 */
+
+#           ifdef BLKGETSIZE
+   case BLKGETSIZE:
+      if (res == 0)
+	 VG_TRACK( post_mem_write,arg3, sizeof(unsigned long));
+      break;
+#           endif /* BLKGETSIZE */
+
+      /* CD ROM stuff (??)  */
+   case CDROMSUBCHNL:
+      if (res == 0)
+	 VG_TRACK( post_mem_write,arg3, sizeof(struct cdrom_subchnl));
+      break;
+   case CDROMREADTOCHDR:
+      if (res == 0)
+	 VG_TRACK( post_mem_write,arg3, sizeof(struct cdrom_tochdr));
+      break;
+   case CDROMREADTOCENTRY:
+      if (res == 0)
+	 VG_TRACK( post_mem_write,arg3, sizeof(struct cdrom_tochdr));
+      break;
+   case CDROMPLAYMSF:
+      break;
+      /* The following two are probably bogus (should check args
+	 for readability).  JRS 20021117 */
+   case CDROM_DRIVE_STATUS: /* 0x5326 */
+   case CDROM_CLEAR_OPTIONS: /* 0x5321 */
+      break;
+
+      /* We don't have any specific information on it, so
+	 try to do something reasonable based on direction and
+	 size bits.  The encoding scheme is described in
+	 /usr/include/asm/ioctl.h.  
+
+	 According to Simon Hausmann, _IOC_READ means the kernel
+	 writes a value to the ioctl value passed from the user
+	 space and the other way around with _IOC_WRITE. */
+   default: {
+      UInt dir  = _IOC_DIR(arg2);
+      UInt size = _IOC_SIZE(arg2);
+      if (size > 0 && (dir & _IOC_READ)
+	  && res == 0
+	  && arg3 != (Addr)NULL)
+	 VG_TRACK( post_mem_write,arg3, size);
+      break;
+   }
+   }
+}
+
+PRE(kill)
+{
+   /* int kill(pid_t pid, int sig); */
+   MAYBE_PRINTF("kill ( %d, %d )\n", arg1,arg2);
+   if (arg2 == VKI_SIGVGINT || arg2 == VKI_SIGVGKILL)
+      res = -VKI_EINVAL;
+}
+
+POST(kill)
+{
+   /* If this was a self-kill then wait for a signal to be
+      delivered to any thread before claiming the kill is done. */
+   if (res >= 0 &&					/* if it was successful */
+       arg2 != 0 &&					/* if a real signal */
+       !VG_(is_sig_ign)(arg2) &&			/* that isn't ignored and */
+       !VG_(ksigismember)(&tst->eff_sig_mask, arg2) &&	/*      we're not blocking it */
+       (arg1 == VG_(getpid)() ||			/* directed at us or */
+	arg1 == -1	      ||			/* directed at everyone or */
+	arg1 == 0	      ||			/* directed at whole group or */
+	-arg1 == VG_(getpgrp)())) {			/* directed at our group... */
+      /* ...then wait for that signal to be delivered to someone
+	 (might be us, might be someone else who doesn't have it
+	 blocked) */
+      VG_(proxy_waitsig)();
+   }
+}
+
+PRE(link)
+{
+   /* int link(const char *oldpath, const char *newpath); */
+   MAYBE_PRINTF("link ( %p, %p)\n", arg1, arg2);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "link(oldpath)", arg1);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "link(newpath)", arg2);
+}
+
+PRE(lseek)
+{
+   /* off_t lseek(int fildes, off_t offset, int whence); */
+   MAYBE_PRINTF("lseek ( %d, %d, %d )\n",arg1,arg2,arg3);
+}
+
+PRE(_llseek)
+{
+   /* int _llseek(unsigned int fd, unsigned long offset_high,       
+      unsigned long  offset_low, 
+      loff_t * result, unsigned int whence); */
+   MAYBE_PRINTF("llseek ( %d, 0x%x, 0x%x, %p, %d )\n",
+		arg1,arg2,arg3,arg4,arg5);
+   SYSCALL_TRACK( pre_mem_write, tid, "llseek(result)", arg4, 
+		  sizeof(loff_t));
+}
+
+POST(_llseek)
+{
+   if (res == 0)
+      VG_TRACK( post_mem_write, arg4, sizeof(loff_t) );
+}
+
+PRE(lstat)
+{
+   /* int lstat(const char *file_name, struct stat *buf); */
+   MAYBE_PRINTF("lstat ( %p, %p )\n",arg1,arg2);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "lstat(file_name)", arg1 );
+   SYSCALL_TRACK( pre_mem_write, tid, "lstat(buf)", arg2, 
+		  sizeof(struct stat) );
+}
+
+POST(lstat)
+{
+   if (res == 0) {
+      VG_TRACK( post_mem_write, arg2, sizeof(struct stat) );
+   }
+}
+
+PRE(lstat64)
+{
+   /* int lstat64(const char *file_name, struct stat64 *buf); */
+   MAYBE_PRINTF("lstat64 ( %p, %p )\n",arg1,arg2);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "lstat64(file_name)", arg1 );
+   SYSCALL_TRACK( pre_mem_write, tid, "lstat64(buf)", arg2, 
+		  sizeof(struct stat64) );
+}
+
+POST(lstat64)
+{
+   if (res == 0) {
+      VG_TRACK( post_mem_write, arg2, sizeof(struct stat64) );
+   }
+}
+
+PRE(mkdir)
+{
+   /* int mkdir(const char *pathname, mode_t mode); */
+   MAYBE_PRINTF("mkdir ( %p, %d )\n", arg1,arg2);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "mkdir(pathname)", arg1 );
+}
+
+PRE(mmap2)
+{
+   /* My impression is that this is exactly like __NR_mmap 
+      except that all 6 args are passed in regs, rather than in 
+      a memory-block. */
+   /* void* mmap(void *start, size_t length, int prot, 
+      int flags, int fd, off_t offset); 
+   */
+   MAYBE_PRINTF("mmap2 ( %p, %d, %d, %d, %d, %d )\n",
+		arg1, arg2, arg3, arg4, arg5, arg6 );
+}
+
+POST(mmap2)
+{
+   mmap_segment( (Addr)res, arg2, arg3, arg5 );
+}
+
+PRE(mmap)
+{
+   /* void* mmap(void *start, size_t length, int prot, 
+      int flags, int fd, off_t offset); 
+   */
+
+   UInt* arg_block = (UInt*)arg1;
+   UInt a1, a2, a3, a4, a5, a6;
+
+   SYSCALL_TRACK( pre_mem_read, tid, "mmap(args)", arg1, 6*sizeof(UInt) );
+
+   a1 = arg_block[0];
+   a2 = arg_block[1];
+   a3 = arg_block[2];
+   a4 = arg_block[3];
+   a5 = arg_block[4];
+   a6 = arg_block[5];
+   MAYBE_PRINTF("mmap ( %p, %d, %d, %d, %d, %d )\n",
+		a1, a2, a3, a4, a5, a6 );
+}
+
+POST(mmap)
+{
+   UInt* arg_block = (UInt*)arg1;
+   UInt a2, a3, a5;
+
+   a2 = arg_block[1];
+   a3 = arg_block[2];
+   a5 = arg_block[4];
+
+   mmap_segment( (Addr)res, a2, a3, a5 );
+}
+
+PRE(mprotect)
+{
+   /* int mprotect(const void *addr, size_t len, int prot); */
+   /* should addr .. addr+len-1 be checked before the call? */
+   MAYBE_PRINTF("mprotect ( %p, %d, %d )\n", arg1,arg2,arg3);
+}
+
+POST(mprotect)
+{
+   mprotect_segment( arg1, arg2, arg3 );
+}
+
+PRE(munmap)
+{
+   /* int munmap(void *start, size_t length); */
+   /* should start .. start+length-1 be checked before the call? */
+   MAYBE_PRINTF("munmap ( %p, %d )\n", arg1,arg2);
+}
+
+POST(munmap)
+{
+   munmap_segment( arg1, arg2 );
+}
+
+PRE(nanosleep)
+{
+         /* int nanosleep(const struct timespec *req, struct timespec *rem); */
+         MAYBE_PRINTF("nanosleep ( %p, %p )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read, tid, "nanosleep(req)", arg1, 
+                                              sizeof(struct timespec) );
+         if (arg2 != (UInt)NULL)
+            SYSCALL_TRACK( pre_mem_write, tid, "nanosleep(rem)", arg2, 
+			   sizeof(struct timespec) );
+}
+
+POST(nanosleep)
+{
+   /* Somewhat bogus ... is only written by the kernel if
+      res == -1 && errno == EINTR. */
+   if (arg2 != (UInt)NULL)
+      VG_TRACK( post_mem_write, arg2, sizeof(struct timespec) );
+}
+
+PRE(_newselect)
+{
+   /* int select(int n,  
+		 fd_set *readfds, fd_set *writefds, fd_set *exceptfds, 
+		 struct timeval *timeout);
+   */
+   MAYBE_PRINTF("newselect ( %d, %p, %p, %p, %p )\n",
+		arg1,arg2,arg3,arg4,arg5);
+   if (arg2 != 0)
+      SYSCALL_TRACK( pre_mem_read, tid, "newselect(readfds)",   
+		     arg2, arg1/8 /* __FD_SETSIZE/8 */ );
+   if (arg3 != 0)
+      SYSCALL_TRACK( pre_mem_read, tid, "newselect(writefds)",  
+		     arg3, arg1/8 /* __FD_SETSIZE/8 */ );
+   if (arg4 != 0)
+      SYSCALL_TRACK( pre_mem_read, tid, "newselect(exceptfds)", 
+		     arg4, arg1/8 /* __FD_SETSIZE/8 */ );
+   if (arg5 != 0)
+      SYSCALL_TRACK( pre_mem_read, tid, "newselect(timeout)", arg5, 
+		     sizeof(struct timeval) );
+}
+
+PRE(open)
+{
+   /* int open(const char *pathname, int flags); */
+   MAYBE_PRINTF("open ( %p(%s), %d ) --> ",arg1,arg1,arg2);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "open(pathname)", arg1 );
+}
+
+POST(open)
+{
+   if (!fd_allowed(res, "open", tid)) {
+      VG_(close)(res);
+      res = -VKI_EMFILE;
+   }
+   MAYBE_PRINTF("%d\n",res);
+}
+
+PRE(read)
+{
+   /* size_t read(int fd, void *buf, size_t count); */
+   MAYBE_PRINTF("read ( %d, %p, %d )\n", arg1, arg2, arg3);
+
+   if (!fd_allowed(arg1, "read", tid))
+      res = -VKI_EBADF;   
+}
+
+POST(read)
+{
+   if (res > 0)
+      VG_TRACK(post_mem_write, arg2, res);
+}
+
+PRE(write)
+{
+   /* size_t write(int fd, const void *buf, size_t count); */
+   MAYBE_PRINTF("write ( %d, %p, %d )\n", arg1, arg2, arg3);
+   if (!fd_allowed(arg1, "write", tid))
+      res = -VKI_EBADF;
+   else
+      SYSCALL_TRACK( pre_mem_read, tid, "write(buf)", arg2, arg3 );
+}
+
+PRE(creat)
+{
+   /* int creat(const char *pathname, mode_t mode); */
+   MAYBE_PRINTF("creat ( %p(%s), %d ) --> ",arg1,arg1,arg2);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "creat(pathname)", arg1 );
+}
+
+POST(creat)
+{
+   if (!fd_allowed(res, "creat", tid)) {
+      VG_(close)(res);
+      res = -VKI_EMFILE;
+   }
+   MAYBE_PRINTF("%d\n",res);
+}
+
+PRE(pipe)
+{
+   /* int pipe(int filedes[2]); */
+   MAYBE_PRINTF("pipe ( %p ) ...\n", arg1);
+   SYSCALL_TRACK( pre_mem_write, tid, "pipe(filedes)", 
+		  arg1, 2*sizeof(int) );
+}
+
+POST(pipe)
+{
+   Int *p = (Int *)arg1;
+
+   if (!fd_allowed(p[0], "pipe", tid) ||
+       !fd_allowed(p[1], "pipe", tid)) {
+      VG_(close)(p[0]);
+      VG_(close)(p[1]);
+      res = -VKI_EMFILE;
+   } else 
+      VG_TRACK( post_mem_write, arg1, 2*sizeof(int) );
+
+   MAYBE_PRINTF("SYSCALL[%d]       pipe --> %d (rd %d, wr %d)\n", 
+		VG_(getpid)(), res,
+		((UInt*)arg1)[0], ((UInt*)arg1)[1] );
+}
+
+PRE(poll)
+{
+   /* struct pollfd {
+	int fd;           -- file descriptor
+	short events;     -- requested events
+	short revents;    -- returned events
+      };
+      int poll(struct pollfd *ufds, unsigned int nfds, 
+      int timeout) 
+   */
+   MAYBE_PRINTF("poll ( %p, %d, %d )\n",arg1,arg2,arg3);
+   /* In fact some parts of this struct should be readable too.
+      This should be fixed properly. */
+   SYSCALL_TRACK( pre_mem_write, tid, "poll(ufds)", 
+		  arg1, arg2 * sizeof(struct pollfd) );
+}
+
+POST(poll)
+{
+   if (res > 0) {
+      UInt i;
+      struct pollfd * arr = (struct pollfd *)arg1;
+      for (i = 0; i < arg2; i++)
+	 VG_TRACK( post_mem_write, (Addr)(&arr[i].revents), 
+		   sizeof(Short) );
+   }
+}
+
+PRE(readlink)
+{
+   /* int readlink(const char *path, char *buf, size_t bufsiz); */
+   MAYBE_PRINTF("readlink ( %p, %p, %d )\n", arg1,arg2,arg3);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "readlink(path)", arg1 );
+   SYSCALL_TRACK( pre_mem_write, tid, "readlink(buf)", arg2,arg3 );
+}
+
+POST(readlink)
+{
+   VG_TRACK( post_mem_write, arg2, res );
+}
+
+PRE(readv)
+{
+   /* int readv(int fd, const struct iovec * vector, size_t count); */
+   Int i;
+   struct iovec * vec;
+   MAYBE_PRINTF("readv ( %d, %p, %d )\n",arg1,arg2,arg3);
+   if (!fd_allowed(arg1, "readv", tid)) {
+      res = -VKI_EBADF;
+   } else {
+      SYSCALL_TRACK( pre_mem_read, tid, "readv(vector)", 
+		     arg2, arg3 * sizeof(struct iovec) );
+      /* ToDo: don't do any of the following if the vector is invalid */
+      vec = (struct iovec *)arg2;
+      for (i = 0; i < (Int)arg3; i++)
+	 SYSCALL_TRACK( pre_mem_write, tid, "readv(vector[...])",
+			(UInt)vec[i].iov_base,vec[i].iov_len );
+   }
+}
+
+POST(readv)
+{
+   if (res > 0) {
+      Int i;
+      struct iovec * vec = (struct iovec *)arg2;
+      Int remains = res;
+
+      /* res holds the number of bytes read. */
+      for (i = 0; i < (Int)arg3; i++) {
+	 Int nReadThisBuf = vec[i].iov_len;
+	 if (nReadThisBuf > remains) nReadThisBuf = remains;
+	 VG_TRACK( post_mem_write, (UInt)vec[i].iov_base, nReadThisBuf );
+	 remains -= nReadThisBuf;
+	 if (remains < 0) VG_(core_panic)("readv: remains < 0");
+      }
+   }
+}
+
+PRE(rename)
+{
+   /* int rename(const char *oldpath, const char *newpath); */
+   MAYBE_PRINTF("rename ( %p, %p )\n", arg1, arg2 );
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "rename(oldpath)", arg1 );
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "rename(newpath)", arg2 );
+}
+
+PRE(rmdir)
+{
+   /* int rmdir(const char *pathname); */
+   MAYBE_PRINTF("rmdir ( %p )\n", arg1);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "rmdir(pathname)", arg1 );
+}
+
+PRE(sched_setparam)
+{
+   /* int sched_setparam(pid_t pid, const struct sched_param *p); */
+   MAYBE_PRINTF("sched_setparam ( %d, %p )\n", arg1, arg2 );
+   SYSCALL_TRACK( pre_mem_read, tid, "sched_setparam(ptr)",
+		  arg2, sizeof(struct sched_param) );
+}
+
+POST(sched_setparam)
+{
+   VG_TRACK( post_mem_write, arg2, sizeof(struct sched_param) );
+}
+
+PRE(sched_getparam)
+{
+   /* int sched_getparam(pid_t pid, struct sched_param *p); */
+   MAYBE_PRINTF("sched_getparam ( %d, %p )\n", arg1, arg2 );
+   SYSCALL_TRACK( pre_mem_write, tid, "sched_getparam(ptr)",
+		  arg2, sizeof(struct sched_param) );
+}
+
+POST(sched_getparam)
+{
+   VG_TRACK( post_mem_write, arg2, sizeof(struct sched_param) );
+}
+
+PRE(sched_yield)
+{
+   /* int sched_yield(void); */
+   MAYBE_PRINTF("sched_yield ()\n" );
+}
+
+PRE(select)
+{
+   /* struct sel_arg_struct {
+      unsigned long n;
+      fd_set *inp, *outp, *exp;
+      struct timeval *tvp;
+      };
+      int old_select(struct sel_arg_struct *arg);
+   */
+   SYSCALL_TRACK( pre_mem_read, tid, "select(args)", arg1, 5*sizeof(UInt) );
+
+   {
+      UInt* arg_struct = (UInt*)arg1;
+      UInt a1, a2, a3, a4, a5;
+
+      a1 = arg_struct[0];
+      a2 = arg_struct[1];
+      a3 = arg_struct[2];
+      a4 = arg_struct[3];
+      a5 = arg_struct[4];
+
+      MAYBE_PRINTF("select ( %d, %p, %p, %p, %p )\n", 
+		   a1,a2,a3,a4,a5);
+      if (a2 != (Addr)NULL)
+	 SYSCALL_TRACK( pre_mem_read, tid, "select(readfds)", a2, 
+			a1/8 /* __FD_SETSIZE/8 */ );
+      if (a3 != (Addr)NULL)
+	 SYSCALL_TRACK( pre_mem_read, tid, "select(writefds)", a3, 
+			arg1/8 /* __FD_SETSIZE/8 */ );
+      if (a4 != (Addr)NULL)
+	 SYSCALL_TRACK( pre_mem_read, tid, "select(exceptfds)", a4, 
+			a1/8 /* __FD_SETSIZE/8 */ );
+      if (a5 != (Addr)NULL)
+	 SYSCALL_TRACK( pre_mem_read, tid, "select(timeout)", a5, 
+			sizeof(struct timeval) );
+   }
+}
+
+PRE(setitimer)
+{
+         /* setitimer(int which, const struct itimerval *value,
+                                 struct itimerval *ovalue); */
+         MAYBE_PRINTF("setitimer ( %d, %p, %p )\n", arg1,arg2,arg3);
+         if (arg2 != (Addr)NULL)
+            SYSCALL_TRACK( pre_mem_read,tid, "setitimer(value)", 
+                             arg2, sizeof(struct itimerval) );
+         if (arg3 != (Addr)NULL)
+            SYSCALL_TRACK( pre_mem_write,tid, "setitimer(ovalue)", 
+                             arg3, sizeof(struct itimerval));
+}
+
+POST(setitimer)
+{
+   if (arg3 != (Addr)NULL) {
+      VG_TRACK( post_mem_write,arg3, sizeof(struct itimerval));
+   }
+}
+
+PRE(setfsgid32)
+{
+   /* int setfsgid(uid_t fsgid); */
+   MAYBE_PRINTF("setfsgid ( %d )\n", arg1);
+}
+
+PRE(setgid)
+{
+   /* int setgid(gid_t gid); */
+   MAYBE_PRINTF("setgid ( %d )\n", arg1);
+}
+
+PREALIAS(setgid32, setgid);
+
+PRE(setsid)
+{
+   /* pid_t setsid(void); */
+   MAYBE_PRINTF("setsid ()\n");
+}
+
+PRE(setgroups)
+{
+   /* int setgroups(size_t size, const gid_t *list); */
+   MAYBE_PRINTF("setgroups ( %d, %p )\n", arg1, arg2);
+   if (arg1 > 0)
+      SYSCALL_TRACK( pre_mem_read, tid, "setgroups(list)", arg2, 
+		     arg1 * sizeof(gid_t) );
+}
+
+PREALIAS(setgroups32, setgroups);
+
+PRE(setpgid)
+{
+   /* int setpgid(pid_t pid, pid_t pgid); */
+   MAYBE_PRINTF("setpgid ( %d, %d )\n", arg1, arg2);
+}
+
+POST(setpgid)
+{
+   VG_(main_pgrp) = VG_(getpgrp)();
+}
+
+PRE(setregid32)
+{
+   /* int setregid(gid_t rgid, gid_t egid); */
+   MAYBE_PRINTF("setregid32(?) ( %d, %d )\n", arg1, arg2);
+}
+
+PRE(setresuid32)
+{
+   /* int setresuid(uid_t ruid, uid_t euid, uid_t suid); */
+   MAYBE_PRINTF("setresuid32(?) ( %d, %d, %d )\n", arg1, arg2, arg3);
+}
+
+PRE(setreuid)
+{
+   /* int setreuid(uid_t ruid, uid_t euid); */
+   MAYBE_PRINTF("setreuid ( 0x%x, 0x%x )\n", arg1, arg2);
+}
+
+PREALIAS(setreuid32, setreuid);
+
+PRE(setrlimit)
+{
+   /* int setrlimit (int resource, const struct rlimit *rlim); */
+   MAYBE_PRINTF("setrlimit ( %d, %p )\n", arg1,arg2);
+   SYSCALL_TRACK( pre_mem_read, tid, "setrlimit(rlim)", 
+		  arg2, sizeof(struct rlimit) );
+}
+
+PRE(setuid)
+{
+   /* int setuid(uid_t uid); */
+   MAYBE_PRINTF("setuid ( %d )\n", arg1);
+}
+
+PREALIAS(setuid32, setuid);
+
+PRE(socketcall)
+{
+   /* int socketcall(int call, unsigned long *args); */
+   MAYBE_PRINTF("socketcall ( %d, %p )\n",arg1,arg2);
+   switch (arg1 /* request */) {
+
+   case SYS_SOCKETPAIR:
+      /* int socketpair(int d, int type, int protocol, int sv[2]); */
+      SYSCALL_TRACK( pre_mem_read, tid, "socketcall.socketpair(args)", 
+		     arg2, 4*sizeof(Addr) );
+      SYSCALL_TRACK( pre_mem_write, tid, "socketcall.socketpair(sv)", 
+		     ((UInt*)arg2)[3], 2*sizeof(int) );
+      break;
+
+   case SYS_SOCKET:
+      /* int socket(int domain, int type, int protocol); */
+      SYSCALL_TRACK( pre_mem_read, tid, "socketcall.socket(args)", 
+		     arg2, 3*sizeof(Addr) );
+      break;
+
+   case SYS_BIND:
+      /* int bind(int sockfd, struct sockaddr *my_addr, 
+	 int addrlen); */
+      SYSCALL_TRACK( pre_mem_read, tid, "socketcall.bind(args)", 
+		     arg2, 3*sizeof(Addr) );
+      pre_mem_read_sockaddr( tid, "socketcall.bind(my_addr.%s)",
+			     (struct sockaddr *) (((UInt*)arg2)[1]), ((UInt*)arg2)[2]);
+      break;
+               
+   case SYS_LISTEN:
+      /* int listen(int s, int backlog); */
+      SYSCALL_TRACK( pre_mem_read, tid, "socketcall.listen(args)", 
+		     arg2, 2*sizeof(Addr) );
+      break;
+
+   case SYS_ACCEPT: {
+      /* int accept(int s, struct sockaddr *addr, int *addrlen); */
+      SYSCALL_TRACK( pre_mem_read, tid, "socketcall.accept(args)", 
+		     arg2, 3*sizeof(Addr) );
+      {
+	 Addr addr_p     = ((UInt*)arg2)[1];
+	 Addr addrlen_p  = ((UInt*)arg2)[2];
+	 if (addr_p != (Addr)NULL) 
+	    buf_and_len_pre_check ( tid, addr_p, addrlen_p,
+				    "socketcall.accept(addr)",
+				    "socketcall.accept(addrlen_in)" );
+      }
+      break;
+   }
+
+   case SYS_SENDTO:
+      /* int sendto(int s, const void *msg, int len, 
+	 unsigned int flags, 
+	 const struct sockaddr *to, int tolen); */
+      SYSCALL_TRACK( pre_mem_read, tid, "socketcall.sendto(args)", arg2, 
+		     6*sizeof(Addr) );
+      SYSCALL_TRACK( pre_mem_read, tid, "socketcall.sendto(msg)",
+		     ((UInt*)arg2)[1], /* msg */
+		     ((UInt*)arg2)[2]  /* len */ );
+      pre_mem_read_sockaddr( tid, "socketcall.sendto(to.%s)",
+			     (struct sockaddr *) (((UInt*)arg2)[4]), ((UInt*)arg2)[5]);
+      break;
+
+   case SYS_SEND:
+      /* int send(int s, const void *msg, size_t len, int flags); */
+      SYSCALL_TRACK( pre_mem_read, tid, "socketcall.send(args)", arg2,
+		     4*sizeof(Addr) );
+      SYSCALL_TRACK( pre_mem_read, tid, "socketcall.send(msg)",
+		     ((UInt*)arg2)[1], /* msg */
+		     ((UInt*)arg2)[2]  /* len */ );
+      break;
+
+   case SYS_RECVFROM:
+      /* int recvfrom(int s, void *buf, int len, unsigned int flags,
+	 struct sockaddr *from, int *fromlen); */
+      SYSCALL_TRACK( pre_mem_read, tid, "socketcall.recvfrom(args)", 
+		     arg2, 6*sizeof(Addr) );
+      {
+	 Addr buf_p      = ((UInt*)arg2)[1];
+	 Int  len        = ((UInt*)arg2)[2];
+	 Addr from_p     = ((UInt*)arg2)[4];
+	 Addr fromlen_p  = ((UInt*)arg2)[5];
+
+	 SYSCALL_TRACK( pre_mem_write, tid, "socketcall.recvfrom(buf)", 
+			buf_p, len );
+	 if (from_p != (Addr)NULL) 
+	    buf_and_len_pre_check ( tid, from_p, fromlen_p, 
+				    "socketcall.recvfrom(from)",
+				    "socketcall.recvfrom(fromlen_in)" );
+      }
+      break;
+   
+   case SYS_RECV:
+      /* int recv(int s, void *buf, int len, unsigned int flags); */
+      /* man 2 recv says:
+	 The  recv call is normally used only on a connected socket
+	 (see connect(2)) and is identical to recvfrom with a  NULL
+	 from parameter.
+      */
+      SYSCALL_TRACK( pre_mem_read, tid, "socketcall.recv(args)", 
+		     arg2, 4*sizeof(Addr) );
+      SYSCALL_TRACK( pre_mem_write, tid, "socketcall.recv(buf)", 
+		     ((UInt*)arg2)[1], /* buf */
+		     ((UInt*)arg2)[2]  /* len */ );
+      break;
+
+   case SYS_CONNECT:
+      /* int connect(int sockfd, 
+	 struct sockaddr *serv_addr, int addrlen ); */
+      SYSCALL_TRACK( pre_mem_read, tid, "socketcall.connect(args)", 
+		     arg2, 3*sizeof(Addr) );
+      SYSCALL_TRACK( pre_mem_read, tid, 
+		     "socketcall.connect(serv_addr.sa_family)",
+		     ((UInt*)arg2)[1], /* serv_addr */
+		     sizeof (sa_family_t));
+      pre_mem_read_sockaddr( tid,
+			     "socketcall.connect(serv_addr.%s)",
+			     (struct sockaddr *) (((UInt*)arg2)[1]), ((UInt*)arg2)[2]);
+      break;
+
+   case SYS_SETSOCKOPT:
+      /* int setsockopt(int s, int level, int optname, 
+	 const void *optval, int optlen); */
+      SYSCALL_TRACK( pre_mem_read, tid, "socketcall.setsockopt(args)", 
+		     arg2, 5*sizeof(Addr) );
+      SYSCALL_TRACK( pre_mem_read, tid, "socketcall.setsockopt(optval)",
+		     ((UInt*)arg2)[3], /* optval */
+		     ((UInt*)arg2)[4]  /* optlen */ );
+      break;
+
+   case SYS_GETSOCKOPT:
+      /* int setsockopt(int s, int level, int optname, 
+	 void *optval, socklen_t *optlen); */
+      SYSCALL_TRACK( pre_mem_read, tid, "socketcall.getsockopt(args)", 
+		     arg2, 5*sizeof(Addr) );
+      {
+	 Addr optval_p  = ((UInt*)arg2)[3];
+	 Addr optlen_p  = ((UInt*)arg2)[4];
+	 /* vg_assert(sizeof(socklen_t) == sizeof(UInt)); */
+	 if (optval_p != (Addr)NULL) 
+	    buf_and_len_pre_check ( tid, optval_p, optlen_p,
+				    "socketcall.getsockopt(optval)",
+				    "socketcall.getsockopt(optlen)" );
+      }
+      break;
+
+   case SYS_GETSOCKNAME:
+      /* int getsockname(int s, struct sockaddr* name, int* namelen) */
+      SYSCALL_TRACK( pre_mem_read, tid, "socketcall.getsockname(args)",
+		     arg2, 3*sizeof(Addr) );
+      {
+	 Addr name_p     = ((UInt*)arg2)[1];
+	 Addr namelen_p  = ((UInt*)arg2)[2];
+
+	 /* Nb: name_p cannot be NULL */
+	 buf_and_len_pre_check ( tid, name_p, namelen_p,
+				 "socketcall.getsockname(name)",
+				 "socketcall.getsockname(namelen_in)" );
+      }
+      break;
+
+   case SYS_GETPEERNAME:
+      /* int getpeername(int s, struct sockaddr* name, int* namelen) */
+      SYSCALL_TRACK( pre_mem_read, tid, "socketcall.getpeername(args)",
+		     arg2, 3*sizeof(Addr) );
+      {
+	 Addr name_p     = ((UInt*)arg2)[1];
+	 Addr namelen_p  = ((UInt*)arg2)[2];
+
+	 /* Nb: name_p cannot be NULL */
+	 buf_and_len_pre_check ( tid, name_p, namelen_p,
+				 "socketcall.getpeername(name)",
+				 "socketcall.getpeername(namelen_in)" );
+      }
+      break;
+
+   case SYS_SHUTDOWN:
+      /* int shutdown(int s, int how); */
+      SYSCALL_TRACK( pre_mem_read, tid, "socketcall.shutdown(args)", 
+		     arg2, 2*sizeof(Addr) );
+      break;
+
+   case SYS_SENDMSG: {
+      /* int sendmsg(int s, const struct msghdr *msg, int flags); */
+
+      /* this causes warnings, and I don't get why. glibc bug?
+       * (after all it's glibc providing the arguments array)
+       SYSCALL_TRACK( pre_mem_read, "socketcall.sendmsg(args)", 
+       arg2, 3*sizeof(Addr) );
+      */
+
+      struct msghdr *msg = (struct msghdr *)((UInt *)arg2)[ 1 ];
+      msghdr_foreachfield ( tid, msg, pre_mem_read_sendmsg );
+
+      break;
+   }
+      
+   case SYS_RECVMSG: {
+      /* int recvmsg(int s, struct msghdr *msg, int flags); */
+
+      /* this causes warnings, and I don't get why. glibc bug?
+       * (after all it's glibc providing the arguments array)
+       SYSCALL_TRACK( pre_mem_read, "socketcall.recvmsg(args)", 
+       arg2, 3*sizeof(Addr) );
+      */
+
+      struct msghdr *msg = (struct msghdr *)((UInt *)arg2)[ 1 ];
+      msghdr_foreachfield ( tid, msg, pre_mem_write_recvmsg );
+
+      break;
+   }
+
+   default:
+      VG_(message)(Vg_DebugMsg,"FATAL: unhandled socketcall 0x%x",arg1);
+      VG_(core_panic)("... bye!\n");
+      break; /*NOTREACHED*/
+   }
+}
+
+POST(socketcall)
+{
+   /* int socketcall(int call, unsigned long *args); */
+   MAYBE_PRINTF("socketcall ( %d, %p )\n",arg1,arg2);
+
+   switch (arg1 /* request */) {
+
+   case SYS_SOCKETPAIR:
+      /* XXX TODO: check return fd against VG_MAX_FD */
+      VG_TRACK( post_mem_write, ((UInt*)arg2)[3], 2*sizeof(int) );
+      break;
+
+   case SYS_SOCKET:
+      if (!fd_allowed(res, "socket", tid)) {
+	 VG_(close)(res);
+	 res = -VKI_EMFILE;
+      }
+      break;
+
+   case SYS_BIND:
+      /* int bind(int sockfd, struct sockaddr *my_addr, 
+			int addrlen); */
+      break;
+               
+   case SYS_LISTEN:
+      /* int listen(int s, int backlog); */
+      break;
+
+   case SYS_ACCEPT: {
+      /* int accept(int s, struct sockaddr *addr, int *addrlen); */
+      if (!fd_allowed(res, "accept", tid)) {
+	 VG_(close)(res);
+	 res = -VKI_EMFILE;
+      } else {
+	 Addr addr_p     = ((UInt*)arg2)[1];
+	 Addr addrlen_p  = ((UInt*)arg2)[2];
+
+	 if (addr_p != (Addr)NULL) 
+	    buf_and_len_post_check ( tid, res, addr_p, addrlen_p,
+				     "socketcall.accept(addrlen_out)" );
+      }
+      break;
+   }
+
+   case SYS_SENDTO:
+      break;
+
+   case SYS_SEND:
+      break;
+
+   case SYS_RECVFROM:
+      {
+	 Addr buf_p      = ((UInt*)arg2)[1];
+	 Int  len        = ((UInt*)arg2)[2];
+	 Addr from_p     = ((UInt*)arg2)[4];
+	 Addr fromlen_p  = ((UInt*)arg2)[5];
+
+	 if (from_p != (Addr)NULL) 
+	    buf_and_len_post_check ( tid, res, from_p, fromlen_p,
+				     "socketcall.recvfrom(fromlen_out)" );
+	 VG_TRACK( post_mem_write, buf_p, len );
+      }
+      break;
+
+   case SYS_RECV:
+      if (res >= 0 
+	  && ((UInt*)arg2)[1] != (UInt)NULL) {
+	 VG_TRACK( post_mem_write, ((UInt*)arg2)[1], /* buf */
+		   ((UInt*)arg2)[2]  /* len */ );
+      }
+      break;
+
+   case SYS_CONNECT:
+      break;
+
+   case SYS_SETSOCKOPT:
+      break;
+
+   case SYS_GETSOCKOPT:
+      {
+	 Addr optval_p  = ((UInt*)arg2)[3];
+	 Addr optlen_p  = ((UInt*)arg2)[4];
+
+	 if (optval_p != (Addr)NULL) 
+	    buf_and_len_post_check ( tid, res, optval_p, optlen_p,
+				     "socketcall.getsockopt(optlen_out)" );
+      }
+      break;
+
+   case SYS_GETSOCKNAME:
+      {
+	 Addr name_p     = ((UInt*)arg2)[1];
+	 Addr namelen_p  = ((UInt*)arg2)[2];
+
+	 buf_and_len_post_check ( tid, res, name_p, namelen_p,
+				  "socketcall.getsockname(namelen_out)" );
+      }
+      break;
+
+   case SYS_GETPEERNAME:
+      {
+	 Addr name_p     = ((UInt*)arg2)[1];
+	 Addr namelen_p  = ((UInt*)arg2)[2];
+
+	 buf_and_len_post_check ( tid, res, name_p, namelen_p,
+				  "socketcall.getpeername(namelen_out)" );
+      }
+      break;
+
+   case SYS_SHUTDOWN:
+      break;
+
+   case SYS_SENDMSG:
+      break;
+
+   case SYS_RECVMSG:
+   {
+      struct msghdr *msg = (struct msghdr *)((UInt *)arg2)[ 1 ];
+
+      msghdr_foreachfield( tid, msg, post_mem_write_recvmsg );
+
+      break;
+   }
+
+   default:
+      VG_(message)(Vg_DebugMsg,"FATAL: unhandled socketcall 0x%x",arg1);
+      VG_(core_panic)("... bye!\n");
+      break; /*NOTREACHED*/
+   }
+}
+
+PRE(stat)
+{
+   /* int stat(const char *file_name, struct stat *buf); */
+   MAYBE_PRINTF("stat ( %p, %p )\n",arg1,arg2);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "stat(file_name)", arg1 );
+   SYSCALL_TRACK( pre_mem_write, tid, "stat(buf)", 
+		  arg2, sizeof(struct stat) );
+}
+
+POST(stat)
+{
+   VG_TRACK( post_mem_write, arg2, sizeof(struct stat) );
+}
+
+PRE(statfs)
+{
+   /* int statfs(const char *path, struct statfs *buf); */
+   MAYBE_PRINTF("statfs ( %p, %p )\n",arg1,arg2);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "statfs(path)", arg1 );
+   SYSCALL_TRACK( pre_mem_write, tid, "stat(buf)", 
+		  arg2, sizeof(struct statfs) );
+}
+
+POST(statfs)
+{
+   VG_TRACK( post_mem_write, arg2, sizeof(struct statfs) );
+}
+
+PRE(symlink)
+{
+   /* int symlink(const char *oldpath, const char *newpath); */
+   MAYBE_PRINTF("symlink ( %p, %p )\n",arg1,arg2);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "symlink(oldpath)", arg1 );
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "symlink(newpath)", arg2 );
+}
+
+PRE(stat64)
+{
+   /* int stat64(const char *file_name, struct stat64 *buf); */
+   MAYBE_PRINTF("stat64 ( %p, %p )\n",arg1,arg2);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "stat64(file_name)", arg1 );
+   SYSCALL_TRACK( pre_mem_write, tid, "stat64(buf)", 
+		  arg2, sizeof(struct stat64) );
+}
+
+POST(stat64)
+{
+   VG_TRACK( post_mem_write, arg2, sizeof(struct stat64) );
+}
+
+PRE(fstat64)
+{
+   /* int fstat64(int filedes, struct stat64 *buf); */
+   MAYBE_PRINTF("fstat64 ( %d, %p )\n",arg1,arg2);
+   SYSCALL_TRACK( pre_mem_write, tid, "fstat64(buf)", 
+		  arg2, sizeof(struct stat64) );
+}
+
+POST(fstat64)
+{
+   VG_TRACK( post_mem_write, arg2, sizeof(struct stat64) );
+}
+
+PRE(sysinfo)
+{
+   /* int sysinfo(struct sysinfo *info); */
+   MAYBE_PRINTF("sysinfo ( %p )\n",arg1);
+   SYSCALL_TRACK( pre_mem_write, tid, "sysinfo(info)", 
+		  arg1, sizeof(struct sysinfo) );
+}
+
+POST(sysinfo)
+{
+   VG_TRACK( post_mem_write, arg1, sizeof(struct sysinfo) );
+}
+
+PRE(time)
+{
+   /* time_t time(time_t *t); */
+   MAYBE_PRINTF("time ( %p )\n",arg1);
+   if (arg1 != (UInt)NULL) {
+      SYSCALL_TRACK( pre_mem_write, tid, "time", arg1, sizeof(time_t) );
+   }
+}
+
+POST(time)
+{
+   if (arg1 != (UInt)NULL) {
+      VG_TRACK( post_mem_write, arg1, sizeof(time_t) );
+   }
+}
+
+PRE(times)
+{
+   /* clock_t times(struct tms *buf); */
+   MAYBE_PRINTF("times ( %p )\n",arg1);
+   SYSCALL_TRACK( pre_mem_write, tid, "times(buf)", 
+		  arg1, sizeof(struct tms) );
+}
+
+POST(times)
+{
+   if (arg1 != (UInt)NULL) {
+      VG_TRACK( post_mem_write, arg1, sizeof(struct tms) );
+   }
+}
+
+PRE(truncate)
+{
+   /* int truncate(const char *path, size_t length); */
+   MAYBE_PRINTF("truncate ( %p, %d )\n", arg1,arg2);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "truncate(path)", arg1 );
+}
+
+PRE(umask)
+{
+   /* mode_t umask(mode_t mask); */
+   MAYBE_PRINTF("umask ( %d )\n", arg1);
+}
+
+PRE(unlink)
+{
+   /* int unlink(const char *pathname) */
+   MAYBE_PRINTF("ulink ( %p )\n",arg1);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "unlink(pathname)", arg1 );
+}
+
+PRE(uname)
+{
+   /* int uname(struct utsname *buf); */
+   MAYBE_PRINTF("uname ( %p )\n",arg1);
+   SYSCALL_TRACK( pre_mem_write, tid, "uname(buf)", 
+		  arg1, sizeof(struct utsname) );
+}
+
+POST(uname)
+{
+   if (arg1 != (UInt)NULL) {
+      VG_TRACK( post_mem_write, arg1, sizeof(struct utsname) );
+   }
+}
+
+PRE(utime)
+{
+   /* int utime(const char *filename, struct utimbuf *buf); */
+   MAYBE_PRINTF("utime ( %p, %p )\n", arg1,arg2);
+   SYSCALL_TRACK( pre_mem_read_asciiz, tid, "utime(filename)", arg1 );
+   if (arg2 != (UInt)NULL)
+      SYSCALL_TRACK( pre_mem_read, tid, "utime(buf)", arg2, 
+		     sizeof(struct utimbuf) );
+}
+
+PRE(waitpid)
+{
+   /* pid_t waitpid(pid_t pid, int *status, int options); */
+
+   MAYBE_PRINTF("waitpid ( %d, %p, %d )\n",
+                arg1,arg2,arg3);
+   if (arg2 != (Addr)NULL)
+      SYSCALL_TRACK( pre_mem_write, tid, "waitpid(status)",
+                     arg2, sizeof(int) );
+}
+
+POST(waitpid)
+{
+   if (arg2 != (Addr)NULL)
+      VG_TRACK( post_mem_write, arg2, sizeof(int) );
+}
+
+PRE(wait4)
+{
+   /* pid_t wait4(pid_t pid, int *status, int options,
+      struct rusage *rusage) */
+   MAYBE_PRINTF("wait4 ( %d, %p, %d, %p )\n",
+		arg1,arg2,arg3,arg4);
+   arg3 &= ~(VKI__WCLONE | VKI__WALL);
+
+   if (arg2 != (Addr)NULL)
+      SYSCALL_TRACK( pre_mem_write, tid, "wait4(status)", 
+		     arg2, sizeof(int) );
+   if (arg4 != (Addr)NULL)
+      SYSCALL_TRACK( pre_mem_write, tid, "wait4(rusage)", arg4, 
+		     sizeof(struct rusage) );
+}
+
+POST(wait4)
+{
+   if (arg2 != (Addr)NULL)
+      VG_TRACK( post_mem_write, arg2, sizeof(int) );
+   if (arg4 != (Addr)NULL)
+      VG_TRACK( post_mem_write, arg4, sizeof(struct rusage) );
+}
+
+PRE(writev)
+{
+   /* int writev(int fd, const struct iovec * vector, size_t count); */
+   Int i;
+   struct iovec * vec;
+   MAYBE_PRINTF("writev ( %d, %p, %d )\n",arg1,arg2,arg3);
+   if (!fd_allowed(arg1, "writev", tid)) {
+      res = -VKI_EBADF;
+   } else {
+      SYSCALL_TRACK( pre_mem_read, tid, "writev(vector)", 
+		     arg2, arg3 * sizeof(struct iovec) );
+      /* ToDo: don't do any of the following if the vector is invalid */
+      vec = (struct iovec *)arg2;
+      for (i = 0; i < (Int)arg3; i++)
+	 SYSCALL_TRACK( pre_mem_read, tid, "writev(vector[...])",
+			(UInt)vec[i].iov_base,vec[i].iov_len );
+   }
+}
+
+PRE(prctl)
+{
+   /* int prctl(int option, unsigned long arg2, unsigned long arg3,
+      unsigned long arg4, unsigned long arg5); */
+   MAYBE_PRINTF( "prctl ( %d, %d, %d, %d, %d )\n", arg1, arg2, arg3,
+		 arg4, arg5 );
+}
+
+PRE(adjtimex)
+{
+   struct timex *tx = (struct timex *)arg1;
+   MAYBE_PRINTF("adjtimex ( %p )\n", arg1);
+
+   SYSCALL_TRACK(pre_mem_read, tid, "adjtimex(timex->modes)", arg1, sizeof(tx->modes));
+
+#define ADJX(bit,field) 				\
+   if (tx->modes & bit)					\
+      SYSCALL_TRACK(pre_mem_read, tid,			\
+		    "adjtimex(timex->"#field")",	\
+		    (UInt)&tx->field, sizeof(tx->field))
+   ADJX(ADJ_FREQUENCY, freq);
+   ADJX(ADJ_MAXERROR, maxerror);
+   ADJX(ADJ_ESTERROR, esterror);
+   ADJX(ADJ_STATUS, status);
+   ADJX(ADJ_TIMECONST, constant);
+   ADJX(ADJ_TICK, tick);
+#undef ADJX
+   
+   SYSCALL_TRACK(pre_mem_write, tid, "adjtimex(timex)", arg1, sizeof(struct timex));
+}
+
+POST(adjtimex)
+{
+   VG_TRACK(post_mem_write, arg1, sizeof(struct timex));
+}
+
+#define SIGNAL_SIMULATION	1
+
+PRE(pause)
+{
+   /* int pause(void); */
+   MAYBE_PRINTF("pause ( )\n");
+}
+
+PRE(rt_sigsuspend)
+{
+   /* int sigsuspend(const sigset_t *mask); */
+   MAYBE_PRINTF("sigsuspend ( %p )\n", arg1 );
+   if (arg1 != (Addr)NULL) {
+      /* above NULL test is paranoia */
+      SYSCALL_TRACK( pre_mem_read, tid, "sigsuspend(mask)", arg1, 
+		     sizeof(vki_ksigset_t) );
+   }
+}
+
+PREALIAS(sigsuspend, rt_sigsuspend);
+
+PRE(rt_sigtimedwait)
+{
+   /* int sigtimedwait(const  sigset_t  *set,  siginfo_t  *info,
+      const struct timespec timeout); */
+   if (arg2 != (UInt)NULL)
+      SYSCALL_TRACK( pre_mem_write, tid, "sigtimedwait(info)", arg2,
+		     sizeof(siginfo_t) );
+}
+
+POST(rt_sigtimedwait)
+{
+   if (arg2 != (UInt)NULL)
+      VG_TRACK( post_mem_write, arg2, sizeof(siginfo_t) );
+}
+
+PRE(rt_sigqueueinfo)
+{
+   /*  long sys_rt_sigqueueinfo(int pid, int sig, siginfo_t *uinfo) */
+   MAYBE_PRINTF("rt_sigqueueinfo(%d, %d, %p)\n", arg1, arg2, arg3);
+   if (arg2 != (UInt)NULL)
+      SYSCALL_TRACK( pre_mem_read, tid, "sigqueueinfo(uinfo)", arg3, 
+		     sizeof(siginfo_t) );
+}
+
+POST(rt_sigqueueinfo)
+{
+   if (res >= 0 && 
+       arg2 != 0 &&
+       !VG_(is_sig_ign)(arg2) &&
+       !VG_(ksigismember)(&tst->eff_sig_mask, arg2) &&
+       arg1 == VG_(getpid)()) {
+      VG_(proxy_waitsig)();
+   }
+}
+
+PRE(sigaltstack)
+{
+   /* int sigaltstack(const stack_t *ss, stack_t *oss); */
+   MAYBE_PRINTF("sigaltstack ( %p, %p )\n",arg1,arg2);
+   if (arg1 != (UInt)NULL) {
+      SYSCALL_TRACK( pre_mem_read, tid, "sigaltstack(ss)", 
+		     arg1, sizeof(vki_kstack_t) );
+   }
+   if (arg2 != (UInt)NULL) {
+      SYSCALL_TRACK( pre_mem_write, tid, "sigaltstack(oss)", 
+		     arg2, sizeof(vki_kstack_t) );
+   }
+
+   if (SIGNAL_SIMULATION)
+      VG_(do__NR_sigaltstack) (tid);
+}
+
+POST(sigaltstack)
+{
+   if (res == 0 && arg2 != (UInt)NULL)
+      VG_TRACK( post_mem_write, arg2, sizeof(vki_kstack_t));
+}
+
+PRE(sigaction)
+{
+   /* int sigaction(int signum, struct k_sigaction *act, 
+      struct k_sigaction *oldact); */
+   MAYBE_PRINTF("sigaction ( %d, %p, %p )\n",arg1,arg2,arg3);
+   if (arg2 != (UInt)NULL)
+      SYSCALL_TRACK( pre_mem_read, tid, "sigaction(act)", 
+		     arg2, sizeof(vki_ksigaction));
+   if (arg3 != (UInt)NULL)
+      SYSCALL_TRACK( pre_mem_write, tid, "sigaction(oldact)", 
+		     arg3, sizeof(vki_ksigaction));
+
+   if (SIGNAL_SIMULATION)
+      VG_(do__NR_sigaction)(tid);
+}
+
+POST(sigaction)
+{
+   if (res == 0 && arg3 != (UInt)NULL)
+      VG_TRACK( post_mem_write, arg3, sizeof(vki_ksigaction));
+}
+
+PREALIAS(rt_sigaction, sigaction);
+POSTALIAS(rt_sigaction, sigaction);
+
+PRE(sigprocmask)
+{
+   /* int sigprocmask(int how, k_sigset_t *set, 
+      k_sigset_t *oldset); */
+   MAYBE_PRINTF("sigprocmask ( %d, %p, %p )\n",arg1,arg2,arg3);
+   if (arg2 != (UInt)NULL)
+      SYSCALL_TRACK( pre_mem_read, tid, "sigprocmask(set)", 
+		     arg2, sizeof(vki_ksigset_t));
+   if (arg3 != (UInt)NULL)
+      SYSCALL_TRACK( pre_mem_write, tid, "sigprocmask(oldset)", 
+		     arg3, sizeof(vki_ksigset_t));
+
+   if (SIGNAL_SIMULATION)
+      VG_(do__NR_sigprocmask) ( tid, 
+				arg1 /*how*/, 
+				(vki_ksigset_t*) arg2,
+				(vki_ksigset_t*) arg3 );
+}
+
+POST(sigprocmask)
+{
+   if (res == 0 && arg3 != (UInt)NULL)
+      VG_TRACK( post_mem_write, arg3, sizeof(vki_ksigset_t));
+}
+
+PREALIAS(rt_sigprocmask, sigprocmask);
+POSTALIAS(rt_sigprocmask, sigprocmask);
+
+PRE(sigpending)
+{
+   /* int sigpending( sigset_t *set ) ; */
+   MAYBE_PRINTF( "sigpending ( %p )\n", arg1 );
+   SYSCALL_TRACK( pre_mem_write, tid, "sigpending(set)", 
+		  arg1, sizeof(vki_ksigset_t));
+}
+
+POST(sigpending)
+{
+   if ( !VG_( is_kerror )( res ) && res == 0 )
+      VG_TRACK( post_mem_write, arg1, sizeof( vki_ksigset_t ) ) ;
+}
+
+PREALIAS(rt_sigpending, sigpending);
+POSTALIAS(rt_sigpending, sigpending);
+
+
+#undef SYSNO
+#undef res
+#undef arg1
+#undef arg2
+#undef arg3
+#undef arg4
+#undef arg5
+#undef arg6
+
+struct sys_info {
+   Bool	may_block;		/* is a potentially blocking syscall */
+   void	(*before)(ThreadId tid, ThreadState *tst);
+   void	(*after)(ThreadId tid, ThreadState *tst);
+};
+#define SYSB_(name, blk)	[__NR_##name] = { blk, before_##name, NULL }
+#define SYSBA(name, blk)	[__NR_##name] = { blk, before_##name, after_##name }
+
+static void bad_before(ThreadId tid, ThreadState *tst)
+{
+   VG_(message)
+      (Vg_DebugMsg,"WARNING: unhandled syscall: %d", tst->m_eax);
+   VG_(message)
+      (Vg_DebugMsg,"Do not panic.  You may be able to fix this easily.");
+   VG_(message)
+      (Vg_DebugMsg,"Read the file README_MISSING_SYSCALL_OR_IOCTL.");
+
+   tst->m_eax = -VKI_ENOSYS;
+}
+
+static void bad_after(ThreadId tid, ThreadState *tst)
+{
+}
+
+static const struct sys_info bad_sys = { False, bad_before, bad_after };
+
+static const struct sys_info special_sys[] = {
+   /* special */
+   SYSB_(exit_group,		False),
+   SYSB_(exit,			False),
+   SYSB_(clone,			False),
+
+   SYSB_(modify_ldt,		False),
+
+#if SIGNAL_SIMULATION
+   SYSBA(sigaltstack,		False),
+   SYSBA(rt_sigaction,		False),
+   SYSBA(sigaction,		False),
+   SYSBA(rt_sigprocmask,	False),
+   SYSBA(sigprocmask,		False),
+#endif /* SIGNAL_SIMULATION */
+};
+#define MAX_SPECIAL_SYS		(sizeof(special_sys)/sizeof(special_sys[0]))
+
+static const struct sys_info sys_info[] = {
+   SYSBA(ptrace,		False),
+   SYSB_(mount,			True),
+   SYSB_(umount,		False),
+
+   SYSB_(setresgid,		False),
+   SYSB_(vhangup,		False),
+   SYSB_(iopl,			False),
+
+   SYSB_(setxattr,		True),
+   SYSB_(lsetxattr,		True),
+   SYSB_(fsetxattr,		True),
+   SYSBA(getxattr,		True),
+   SYSBA(lgetxattr,		True),
+   SYSBA(fgetxattr,		True),
+   SYSBA(listxattr,		True),
+   SYSBA(llistxattr,		True),
+   SYSBA(flistxattr,		True),
+   SYSB_(removexattr,		True),
+   SYSB_(lremovexattr,		True),
+   SYSB_(fremovexattr,		True),
+
+   SYSB_(quotactl,		False),
+   SYSBA(lookup_dcookie,	False),
+
+   SYSB_(truncate64,		True),
+   SYSB_(fdatasync,		True),
+   SYSB_(msync,			True),
+
+   SYSBA(getpmsg,		True),
+   SYSB_(putpmsg,		True),
+
+   SYSBA(getitimer,		False),
+   SYSBA(syslog,		True),
+   SYSB_(personality,		False),
+   SYSB_(chroot,		False),
+   SYSB_(madvise,		True),
+   SYSBA(mremap,		False),
+   SYSB_(nice,			False),
+   SYSB_(setresgid32,		False),
+   SYSB_(setfsuid32,		False),
+   SYSBA(_sysctl,		False),
+
+   SYSB_(sched_getscheduler,	False),	/* ??? */
+   SYSB_(sched_setscheduler,	False),	/* ??? */
+
+   SYSB_(mlock,			True),
+   SYSB_(munlock,		True),
+   SYSB_(mlockall,		True),
+   SYSB_(munlockall,		True),
+
+   SYSB_(sched_get_priority_max,	False),	/* ??? */
+   SYSB_(sched_get_priority_min,	False),	/* ??? */
+
+   SYSB_(setpriority,		False),
+   SYSB_(getpriority,		False),
+
+   SYSB_(setfsgid,		False),
+   SYSB_(setregid,		False),
+   SYSB_(setresuid,		False),
+   SYSB_(setfsuid,		False),
+
+   SYSBA(sendfile,		True),
+   SYSBA(sendfile64,		True),
+   SYSB_(pwrite64,		True),
+   SYSB_(sync,			True),
+   SYSBA(fstatfs,		False),
+   SYSB_(getsid,		False),
+   SYSBA(pread64,		True),
+   SYSB_(mknod,			False),
+   SYSB_(flock,			True),
+   SYSB_(init_module,		True),
+   SYSB_(ioperm,		False),
+   SYSBA(capget,		False),
+   SYSB_(capset,		False),
+   SYSBA(execve,		False),
+   SYSB_(access,		False),
+   SYSBA(brk,			False),
+   SYSB_(chdir,			False),
+   SYSB_(chmod,			False),
+   SYSB_(chown32,		False),
+   SYSB_(lchown32,		False),
+   SYSB_(chown,			False),
+   SYSB_(close,			False),
+   SYSBA(dup,			False),
+   SYSBA(dup2,			False),
+   SYSB_(fcntl,			True),
+   SYSB_(fchdir,		False),
+   SYSB_(fchown32,		False),
+   SYSB_(fchown,		False),
+   SYSB_(fchmod,		False),
+   SYSB_(fcntl64,		True),
+   SYSBA(fstat,			False),
+   SYSBA(fork,			False),
+   SYSB_(fsync,			True),
+   SYSB_(ftruncate,		True),
+   SYSB_(ftruncate64,		True),
+   SYSBA(getdents,		True),
+   SYSBA(getdents64,		True),
+   SYSBA(getgroups32,		True),
+   SYSBA(getgroups,		False),
+   SYSBA(getcwd,		False),
+   SYSB_(geteuid,		False),
+   SYSB_(geteuid32,		False),
+   SYSB_(getegid,		False),
+   SYSB_(getegid32,		False),
+   SYSB_(getgid,		False),
+   SYSB_(getgid32,		False),
+   SYSB_(getpid,		False),
+   SYSB_(getpgid,		False),
+   SYSB_(getpgrp,		False),
+   SYSB_(getppid,		False),
+   SYSBA(getresgid,		False),
+   SYSBA(getresgid32,		False),
+   SYSBA(getresuid,		False),
+   SYSBA(getresuid32,		False),
+   SYSBA(ugetrlimit,		False),
+   SYSBA(getrlimit,		False),
+   SYSBA(getrusage,		False),
+   SYSBA(gettimeofday,		False),
+   SYSB_(getuid,		False),
+   SYSB_(getuid32,		False),
+   SYSBA(ipc,			True),
+   SYSBA(ioctl,			True),
+   SYSBA(kill,			False),
+   SYSB_(link,			True),
+   SYSB_(lseek,			False),
+   SYSBA(_llseek,		False),
+   SYSBA(lstat,			False),
+   SYSBA(lstat64,		False),
+   SYSB_(mkdir,			True),
+   SYSBA(mmap2,			False),
+   SYSBA(mmap,			False),
+   SYSBA(mprotect,		False),
+   SYSBA(munmap,		False),
+   SYSBA(nanosleep,		True),
+   SYSB_(_newselect,		True),
+   SYSBA(open,			True),
+   SYSBA(read,			True),
+   SYSB_(write,			True),
+   SYSBA(creat,			True),
+   SYSBA(pipe,			False),
+   SYSBA(poll,			True),
+   SYSBA(readlink,		False),
+   SYSBA(readv,			True),
+   SYSB_(rename,		False),
+   SYSB_(rmdir,			True),
+   SYSBA(sched_setparam,	False),	/* ??? */
+   SYSBA(sched_getparam,	False),	/* ??? */
+   SYSB_(sched_yield,		False),	/* ??? */
+   SYSB_(select,		True),
+   SYSBA(setitimer,		False),
+   SYSB_(setfsgid32,		False),
+   SYSB_(setgid32,		False),
+   SYSB_(setgid,		False),
+   SYSB_(setsid,		False),
+   SYSB_(setgroups32,		False),
+   SYSB_(setgroups,		False),
+   SYSBA(setpgid,		False),
+   SYSB_(setregid32,		False),
+   SYSB_(setresuid32,		False),
+   SYSB_(setreuid32,		False),
+   SYSB_(setreuid,		False),
+   SYSB_(setrlimit,		False),
+   SYSB_(setuid32,		False),
+   SYSB_(setuid,		False),
+   SYSBA(socketcall,		True),
+   SYSBA(stat,			False),
+   SYSBA(statfs,		False),
+   SYSB_(symlink,		True),
+   SYSBA(stat64,		False),
+   SYSBA(fstat64,		False),
+   SYSBA(sysinfo,		False),
+   SYSBA(time,			False),
+   SYSBA(times,			False),
+   SYSB_(truncate,		True),
+   SYSB_(umask,			False),
+   SYSB_(unlink,		True),
+   SYSBA(uname,			False),
+   SYSB_(utime,			True),
+   SYSBA(waitpid,		True),
+   SYSBA(wait4,			True),
+   SYSB_(writev,		True),
+   SYSB_(prctl,			True),
+   SYSBA(adjtimex,		False),
+
+   /* new signal handling makes these normal blocking syscalls */
+   SYSB_(pause,			True),
+   SYSB_(sigsuspend,		True),
+   SYSB_(rt_sigsuspend,		True),
+   SYSBA(rt_sigtimedwait,	True),
+   SYSBA(rt_sigqueueinfo,	False),
+   SYSBA(sigpending,		True), /* not blocking, but must run in LWP context */
+   SYSBA(rt_sigpending,		True), /* not blocking, but must run in LWP context */
+   SYSB_(alarm,			True), /* not blocking, but must run in LWP context */
+
+#if !SIGNAL_SIMULATION
+   SYSBA(sigaltstack,		False),
+   SYSBA(rt_sigaction,		False),
+   SYSBA(sigaction,		False),
+   SYSBA(rt_sigprocmask,	False),
+   SYSBA(sigprocmask,		False),
+#endif /* !SIGNAL_SIMULATION */
+};
+#define MAX_SYS_INFO		(sizeof(sys_info)/sizeof(sys_info[0]))
+
+#undef SYSB_
+#undef SYSBA
+
+Bool VG_(pre_syscall) ( ThreadId tid )
 {
    ThreadState* tst;
-   UInt         syscallno, arg1, arg2, arg3, arg4, arg5;
-   /* Do not make this unsigned! */
-   Int res;
-   void* pre_res = 0;   /* shut gcc up */
+   UInt         syscallno;
+   const struct sys_info *sys;
+   Bool special = False;
+   Bool syscall_done = False;	/* we actually ran the syscall */
 
    VGP_PUSHCC(VgpCoreSysWrap);
 
-   vg_assert(VG_(is_valid_tid)(tid));
-   tst              = & VG_(threads)[tid];
-   syscallno        = tst->m_eax;
-   arg1             = tst->m_ebx;
-   arg2             = tst->m_ecx;
-   arg3             = tst->m_edx;
-   arg4             = tst->m_esi;
-   arg5             = tst->m_edi;
+   tst = VG_(get_ThreadState)(tid);
 
-   /* Do any pre-syscall actions */
-   if (VG_(needs).syscall_wrapper) {
-      VGP_PUSHCC(VgpSkinSysWrap);
-      pre_res = SK_(pre_syscall)(tid, syscallno, /*isBlocking*/False);
-      VGP_POPCC(VgpSkinSysWrap);
-   }
+   /* Convert vfork to fork, since we can't handle it otherwise. */
+   if (tst->m_eax == __NR_vfork)
+      tst->m_eax = __NR_fork;
 
-   /* the syscall no is in %eax.  For syscalls with <= 5 args,
-      args 1 .. 5 to the syscall are in %ebx %ecx %edx %esi %edi.
-      For calls with > 5 args, %ebx points to a lump of memory
+   syscallno = tst->m_eax;
+
+   if (tst->syscallno != -1)
+      VG_(printf)("tid %d has syscall %d\n", tst->tid, tst->syscallno);
+
+   vg_assert(tst->syscallno == -1);		/* should be no current syscall */
+   vg_assert(tst->status == VgTs_Runnable);	/* should be runnable */
+
+   /* the syscall no is in %eax.  For syscalls with <= 6 args,
+      args 1 .. 6 to the syscall are in %ebx %ecx %edx %esi %edi %ebp.
+      For calls with > 6 args, %ebx points to a lump of memory
       containing the args.
 
       The result is returned in %eax.  If this value >= 0, the call
@@ -459,3194 +4359,116 @@
       comes from.
    */
 
-   MAYBE_PRINTF("SYSCALL[%d,%d](%3d): ", 
-                  VG_(getpid)(), tid, syscallno);
+   /* post_syscall expects us to be "waiting" even if we don't
+      block */
+   tst->syscallno = syscallno;
+   tst->status = VgTs_WaitSys;
 
-   switch (syscallno) {
-
-#     if defined(__NR_exit_group)
-      case __NR_exit_group:
-         VG_(core_panic)("syscall exit_group() not caught by the scheduler?!");
-         break;
-#     endif
-
-      case __NR_exit:
-         VG_(core_panic)("syscall exit() not caught by the scheduler?!");
-         break;
-
-      case __NR_clone:
-         VG_(unimplemented)
-            ("clone(): not supported by Valgrind.\n   "
-             "We do now support programs linked against\n   "
-             "libpthread.so, though.  Re-run with -v and ensure that\n   "
-             "you are picking up Valgrind's implementation of libpthread.so.");
-         break;
-
-      /* !!!!!!!!!! New, untested syscalls !!!!!!!!!!!!!!!!!!!!! */
-  
-#     if defined(__NR_ptrace)
-      case __NR_ptrace: { /* syscall 26 */
-         /* long ptrace (enum __ptrace_request request, pid_t pid, 
-                         void *addr, void *data); ... sort of. */
-         /* Sigh ... the /usr/include/sys/user.h on R H 6.2 doesn't 
-            define struct user_fpxregs_struct.  On the basis that it 
-            is defined as follows on my R H 7.2 (glibc-2.2.4) box, 
-            I kludge it.
-
-            struct user_fpxregs_struct
-            {
-               unsigned short int cwd;
-               unsigned short int swd;
-               unsigned short int twd;
-               unsigned short int fop;
-               long int fip;
-               long int fcs;
-               long int foo;
-               long int fos;
-               long int mxcsr;
-               long int reserved;
-               long int st_space[32];  8*16 bytes for each FP-reg = 128 bytes
-               long int xmm_space[32]; 8*16 bytes for each XMM-reg = 128 bytes
-               long int padding[56];
-            };
-         */
-         const Int sizeof_struct_user_fpxregs_struct
-            = sizeof(unsigned short) * (1 + 1 + 1 + 1) 
-              + sizeof(long int) * (1 + 1 + 1 + 1 + 1 + 1 + 32 + 32 + 56);
-
-         MAYBE_PRINTF("ptrace ( %d, %d, %p, %p )\n", arg1,arg2,arg3,arg4);
-         switch (arg1) {
-             case 12:   /* PTRACE_GETREGS */
-                 SYSCALL_TRACK( pre_mem_write, tid, "ptrace(getregs)", arg4, 
-                                   sizeof (struct user_regs_struct));
-                 break;
-             case 14:   /* PTRACE_GETFPREGS */
-                 SYSCALL_TRACK( pre_mem_write, tid, "ptrace(getfpregs)", arg4, 
-                                   sizeof (struct user_fpregs_struct));
-                 break;
-             case 18:   /* PTRACE_GETFPXREGS */
-                 SYSCALL_TRACK( pre_mem_write, tid, "ptrace(getfpxregs)", arg4, 
-                                   sizeof_struct_user_fpxregs_struct);
-                 break;
-            case 1: case 2: case 3:    /* PTRACE_PEEK{TEXT,DATA,USER} */
-                 SYSCALL_TRACK( pre_mem_write, tid, "ptrace(peek)", arg4, 
-                                   sizeof (long));
-                 break;
-             case 13:   /* PTRACE_SETREGS */
-                 SYSCALL_TRACK( pre_mem_read, tid, "ptrace(setregs)", arg4, 
-                                   sizeof (struct user_regs_struct));
-                 break;
-             case 15:   /* PTRACE_SETFPREGS */
-                 SYSCALL_TRACK( pre_mem_read, tid, "ptrace(setfpregs)", arg4, 
-                                   sizeof (struct user_fpregs_struct));
-                 break;
-             case 19:   /* PTRACE_SETFPXREGS */
-                 SYSCALL_TRACK( pre_mem_read, tid, "ptrace(setfpxregs)", arg4, 
-                                   sizeof_struct_user_fpxregs_struct);
-                 break;
-             default:
-                 break;
-         }
-         KERNEL_DO_SYSCALL(tid, res);
-         if (!VG_(is_kerror)(res)) {
-             switch (arg1) {
-                 case 12:  /* PTRACE_GETREGS */
-                     VG_TRACK( post_mem_write, arg4, 
-                               sizeof (struct user_regs_struct));
-                     break;
-                 case 14:  /* PTRACE_GETFPREGS */
-                     VG_TRACK( post_mem_write, arg4, 
-                               sizeof (struct user_fpregs_struct));
-                     break;
-                 case 18:  /* PTRACE_GETFPXREGS */
-                     VG_TRACK( post_mem_write, arg4, 
-                              sizeof_struct_user_fpxregs_struct);
-                     break;
-                case 1: case 2: case 3:    /* PTRACE_PEEK{TEXT,DATA,USER} */
-                     VG_TRACK( post_mem_write, arg4, sizeof (long));
-                     break;
-                 default:
-                     break;
-             }
-         }
-         }
-         break;
-#     endif
-
-#     if defined(__NR_mount)
-      case __NR_mount: /* syscall 21 */
-         /* int mount(const char *specialfile, const char *dir,
-            const char *filesystemtype, unsigned long rwflag,
-            const void *data); */
-         MAYBE_PRINTF( "mount( %p, %p, %p )\n" ,arg1,arg2,arg3);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid,"mount(specialfile)",arg1);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid,"mount(dir)",arg2);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid,"mount(filesystemtype)",arg3);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_umount)
-      case __NR_umount: /* syscall 22 */
-         /* int umount(const char *path) */
-         MAYBE_PRINTF("umount( %p )\n", arg1);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid,"umount(path)",arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_modify_ldt)
-      case __NR_modify_ldt: /* syscall 123 */
-         /* int modify_ldt(int func, void *ptr, 
-                           unsigned long bytecount); */
-         MAYBE_PRINTF("modify_ldt ( %d, %p, %d )\n", arg1,arg2,arg3);
-         if (arg1 == 0) {
-            /* read the LDT into ptr */
-            SYSCALL_TRACK( pre_mem_write, tid, 
-                           "modify_ldt(ptr)(func=0)", arg2, arg3 );
-         }
-         if (arg1 == 1 || arg1 == 0x11) {
-            /* write the LDT with the entry pointed at by ptr */
-            SYSCALL_TRACK( pre_mem_read, tid, 
-                           "modify_ldt(ptr)(func=1 or 0x11)", arg2, 
-                           sizeof(struct vki_modify_ldt_ldt_s) );
-         }
-         /* "do" the syscall ourselves; the kernel never sees it */
-         res = VG_(sys_modify_ldt)( tid, arg1, (void*)arg2, arg3 );
-         SET_SYSCALL_RETVAL(tid, res);
-         if (arg1 == 0 && !VG_(is_kerror)(res) && res > 0) {
-            VG_TRACK( post_mem_write, arg2, res );
-         }
-         break;
-#     endif
-  
-#     if defined(__NR_setresgid)
-      case __NR_setresgid: /* syscall 170 */
-         /* int setresgid(gid_t rgid, gid_t egid, gid_t sgid); */
-         MAYBE_PRINTF("setresgid ( %d, %d, %d )\n", arg1, arg2, arg3);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_vhangup)
-      case __NR_vhangup: /* syscall 111 */
-         /* int vhangup(void); */
-         MAYBE_PRINTF("vhangup()\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_iopl)
-      case __NR_iopl: /* syscall 110 */
-         /* int iopl(int level); */
-         MAYBE_PRINTF("iopl ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_setxattr)
-      case __NR_setxattr: /* syscall 226 */
-         /* int setxattr (const char *path, const char *name,
-                          const void *value, size_t size, int flags); */
-      case __NR_lsetxattr: /* syscall 227 */
-         /* int lsetxattr (const char *path, const char *name,
-                           const void *value, size_t size, int flags); */
-         if (VG_(clo_trace_syscalls))
-             VG_(printf)("setxattr ( %p, %p, %p, %d, %d )\n",
-                         arg1, arg2, arg3, arg4, arg5);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "setxattr(path)", arg1 );
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "setxattr(name)", arg2 );
-         SYSCALL_TRACK( pre_mem_read, tid, "setxattr(value)", arg3, arg4 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_fsetxattr)
-      case __NR_fsetxattr: /* syscall 228 */
-         /* int fsetxattr (int filedes, const char *name,
-                           const void *value, size_t size, int flags); */
-         if (VG_(clo_trace_syscalls))
-             VG_(printf)("fsetxattr ( %d, %p, %p, %d, %d )\n",
-                         arg1, arg2, arg3, arg4, arg5);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "fsetxattr(name)", arg2 );
-         SYSCALL_TRACK( pre_mem_read, tid, "fsetxattr(value)", arg3, arg4 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_getxattr)
-      case __NR_getxattr: /* syscall 229 */
-         /* ssize_t getxattr (const char *path, const char* name,
-                              void* value, size_t size); */
-      case __NR_lgetxattr: /* syscall 230 */
-         /* ssize_t lgetxattr (const char *path, const char *name,
-                               void *value, size_t size); */
-         MAYBE_PRINTF("getxattr ( %p, %p, %p, %d )\n", 
-                        arg1,arg2,arg3, arg4);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "getxattr(path)", arg1 );
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "getxattr(name)", arg2 );
-         SYSCALL_TRACK( pre_mem_write, tid, "getxattr(value)", arg3, arg4 );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res > 0 
-                                  && arg3 != (Addr)NULL) {
-            VG_TRACK( post_mem_write, arg3, res );
-         }
-         break;
-#     endif
-
-#     if defined(__NR_fgetxattr)
-      case __NR_fgetxattr: /* syscall 231 */
-         /* ssize_t fgetxattr (int filedes, const char *name,
-                               void *value, size_t size); */
-         if (VG_(clo_trace_syscalls))
-             VG_(printf)("fgetxattr ( %d, %p, %p, %d )\n",
-                         arg1, arg2, arg3, arg4);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "fgetxattr(name)", arg2 );
-         SYSCALL_TRACK( pre_mem_write, tid, "fgetxattr(value)", arg3, arg4 );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res > 0 && arg3 != (Addr)NULL)
-             VG_TRACK( post_mem_write, arg3, res );
-         break;
-#     endif
-
-#     if defined(__NR_listxattr)
-      case __NR_listxattr: /* syscall 232 */
-         /* ssize_t listxattr (const char *path, char *list, size_t size); */
-      case __NR_llistxattr: /* syscall 233 */
-         /* ssize_t llistxattr (const char *path, char *list, size_t size); */
-         if (VG_(clo_trace_syscalls))
-             VG_(printf)("listxattr ( %p, %p, %d )\n", arg1, arg2, arg3);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "listxattr(path)", arg1 );
-         SYSCALL_TRACK( pre_mem_write, tid, "listxattr(list)", arg2, arg3 );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res > 0 && arg2 != (Addr)NULL)
-             VG_TRACK( post_mem_write, arg2, res );
-         break;
-#     endif
-
-#     if defined(__NR_flistxattr)
-      case __NR_flistxattr: /* syscall 234 */
-         /* ssize_t flistxattr (int filedes, char *list, size_t size); */
-         if (VG_(clo_trace_syscalls))
-             VG_(printf)("flistxattr ( %d, %p, %d )\n", arg1, arg2, arg3);
-         SYSCALL_TRACK( pre_mem_write, tid, "listxattr(list)", arg2, arg3 );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res > 0 && arg2 != (Addr)NULL)
-             VG_TRACK( post_mem_write, arg2, res );
-         break;
-#     endif
-
-#     if defined(__NR_removexattr)
-      case __NR_removexattr: /* syscall 235 */
-         /* int removexattr (const char *path, const char *name); */
-      case __NR_lremovexattr: /* syscall 236 */
-         /* int lremovexattr (const char *path, const char *name); */
-         if (VG_(clo_trace_syscalls))
-             VG_(printf)("removexattr ( %p, %p )\n", arg1, arg2);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "listxattr(path)", arg1 );
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "listxattr(name)", arg2 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_fremovexattr)
-      case __NR_fremovexattr: /* syscall 237 */
-         /* int fremovexattr (int filedes, const char *name); */
-         if (VG_(clo_trace_syscalls))
-             VG_(printf)("removexattr ( %d, %p )\n", arg1, arg2);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "listxattr(name)", arg2 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_quotactl)
-      case __NR_quotactl: /* syscall 131 */
-         /* int quotactl(int cmd, char *special, int uid, caddr_t addr); */
-         MAYBE_PRINTF("quotactl (0x%x, %p, 0x%x, 0x%x )\n", 
-                        arg1,arg2,arg3, arg4);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "quotactl(special)", arg2 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_lookup_dcookie)
-      case __NR_lookup_dcookie: /* syscall 253 */
-         /* int lookup_dcookie (uint64_t cookie, char *buf, size_t sz); */
-         MAYBE_PRINTF("lookup_dcookie (0x%llx, %p, %d)\n",
-                     arg1 | ((long long) arg2 << 32), arg3, arg4);
-         SYSCALL_TRACK( pre_mem_write, tid, "lookup_dcookie(buf)", arg3, arg4);
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && arg3 != (Addr)NULL)
-             VG_TRACK( post_mem_write, arg3, res);
-         break;
-#     endif
-
-#     if defined(__NR_truncate64)
-      case __NR_truncate64: /* syscall 193 */
-         /* int truncate64(const char *path, off64_t length); */
-         MAYBE_PRINTF("truncate64 ( %p, %lld )\n",
-                        arg1, ((ULong)arg2) | (((ULong) arg3) << 32));
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "truncate64(path)", arg1 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_fdatasync)
-      case __NR_fdatasync: /* syscall 148 */
-         /* int fdatasync(int fd); */
-         MAYBE_PRINTF("fdatasync ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_msync) /* syscall 144 */
-      case __NR_msync:
-         /* int msync(const void *start, size_t length, int flags); */
-         MAYBE_PRINTF("msync ( %p, %d, %d )\n", arg1,arg2,arg3);
-         SYSCALL_TRACK( pre_mem_read, tid, "msync(start)", arg1, arg2 );
-         KERNEL_DO_SYSCALL(tid,res);  
-         break;
-#     endif
-
-#     if defined(__NR_getpmsg) /* syscall 188 */
-      case __NR_getpmsg: 
-      {
-      /* LiS getpmsg from http://www.gcom.com/home/linux/lis/ */
-      /* int getpmsg(int fd, struct strbuf *ctrl, struct strbuf *data, 
-                             int *bandp, int *flagsp); */
-      struct strbuf {
-         int     maxlen;         /* no. of bytes in buffer */
-         int     len;            /* no. of bytes returned */
-         caddr_t buf;            /* pointer to data */
-      };
-      struct strbuf *ctrl;
-      struct strbuf *data;
-      MAYBE_PRINTF("getpmsg ( %d, %p, %p, %p, %p )\n",
-                      arg1,arg2,arg3,arg4,arg5);
-      ctrl = (struct strbuf *)arg2;
-      data = (struct strbuf *)arg3;
-      if (ctrl && ctrl->maxlen > 0)
-          SYSCALL_TRACK( pre_mem_write, tid, "getpmsg(ctrl)", 
-                                (UInt)ctrl->buf, ctrl->maxlen);
-      if (data && data->maxlen > 0)
-          SYSCALL_TRACK( pre_mem_write, tid, "getpmsg(data)", 
-                                 (UInt)data->buf, data->maxlen);
-      if (arg4)
-          SYSCALL_TRACK( pre_mem_write, tid, "getpmsg(bandp)", 
-                                (UInt)arg4, sizeof(int));
-      if (arg5)
-          SYSCALL_TRACK( pre_mem_write, tid, "getpmsg(flagsp)", 
-                                (UInt)arg5, sizeof(int));
-      KERNEL_DO_SYSCALL(tid,res);
-      if (!VG_(is_kerror)(res) && res == 0 && ctrl && ctrl->len > 0) {
-         VG_TRACK( post_mem_write, (UInt)ctrl->buf, ctrl->len);
-      }
-      if (!VG_(is_kerror)(res) && res == 0 && data && data->len > 0) {
-         VG_TRACK( post_mem_write, (UInt)data->buf, data->len);
-      }
-      }
-      break;
-#     endif
-
-
-#     if defined(__NR_putpmsg) /* syscall 189 */
-      case __NR_putpmsg: 
-      {
-      /* LiS putpmsg from http://www.gcom.com/home/linux/lis/ */
-      /* int putpmsg(int fd, struct strbuf *ctrl, struct strbuf *data, 
-                             int band, int flags); */
-      struct strbuf {
-         int     maxlen;         /* no. of bytes in buffer */
-         int     len;            /* no. of bytes returned */
-         caddr_t buf;            /* pointer to data */
-      };
-      struct strbuf *ctrl;
-      struct strbuf *data;
-      MAYBE_PRINTF("putpmsg ( %d, %p, %p, %d, %d )\n",
-                     arg1,arg2,arg3,arg4,arg5);
-      ctrl = (struct strbuf *)arg2;
-      data = (struct strbuf *)arg3;
-      if (ctrl && ctrl->len > 0)
-          SYSCALL_TRACK( pre_mem_read, tid, "putpmsg(ctrl)",
-                                (UInt)ctrl->buf, ctrl->len);
-      if (data && data->len > 0)
-          SYSCALL_TRACK( pre_mem_read, tid, "putpmsg(data)",
-                                (UInt)data->buf, data->len);
-      KERNEL_DO_SYSCALL(tid,res);
-      }
-      break;
-#     endif
-
-      case __NR_getitimer: /* syscall 105 */
-         /* int getitimer(int which, struct itimerval *value); */
-         MAYBE_PRINTF("getitimer ( %d, %p )\n", arg1, arg2);
-         SYSCALL_TRACK( pre_mem_write, tid, "getitimer(timer)", arg2, 
-                           sizeof(struct itimerval) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && arg2 != (Addr)NULL) {
-            VG_TRACK( post_mem_write,arg2, sizeof(struct itimerval));
-         }
-         break;
-
-#     if defined(__NR_syslog)
-      case __NR_syslog: /* syscall 103 */
-         /* int syslog(int type, char *bufp, int len); */
-         MAYBE_PRINTF("syslog (%d, %p, %d)\n",arg1,arg2,arg3);
-         switch(arg1) {
-            case 2: case 3: case 4:
-               SYSCALL_TRACK( pre_mem_write, tid, "syslog(buf)", arg2, arg3);
-	       break;
-            default: 
-               break;
-         }
-         KERNEL_DO_SYSCALL(tid, res);
-         if (!VG_(is_kerror)(res)) {
-            switch (arg1) {
-               case 2: case 3: case 4:
-                  VG_TRACK( post_mem_write, arg2, arg3 );
-                  break;
-               default:
-                  break;
-            }
-         }
-         break;
-#     endif
-
-      case __NR_personality: /* syscall 136 */
-         /* int personality(unsigned long persona); */
-         MAYBE_PRINTF("personality ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_chroot: /* syscall 61 */
-         /* int chroot(const char *path); */
-         MAYBE_PRINTF("chroot ( %p )\n", arg1);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "chroot(path)", arg1 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_madvise)
-      case __NR_madvise: /* syscall 219 */
-         /* int madvise(void *start, size_t length, int advice ); */
-         MAYBE_PRINTF("madvise ( %p, %d, %d )\n", arg1,arg2,arg3);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_mremap)
-      /* Treating it like an munmap() followed by a mmap() */
-      case __NR_mremap: /* syscall 163 */
-         /* void* mremap(void * old_address, size_t old_size, 
-                         size_t new_size, unsigned long flags); */
-         MAYBE_PRINTF("mremap ( %p, %d, %d, 0x%x )\n", 
-                        arg1, arg2, arg3, arg4);
-         SYSCALL_TRACK( pre_mem_write, tid, "mremap(old_address)", arg1, arg2 );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res)) {
-            mremap_segment( arg1, arg2, (Addr)res, arg3 );
-         }
-         break;         
-#     endif
-
-      case __NR_nice: /* syscall 34 */
-         /* int nice(int inc); */
-         MAYBE_PRINTF("nice ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_adjtimex)
-      case __NR_adjtimex: /* syscall 124 */
-        /* int adjtimex(struct timex *buf) */
-         MAYBE_PRINTF("adjtimex ( %p )\n",arg1);
-         SYSCALL_TRACK( pre_mem_write, tid, "adjtimex(buf)",
-                        arg1, sizeof(struct timex) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res))
-            VG_TRACK( post_mem_write, arg1, sizeof(struct timex) );
-        break;
-#     endif
-
-      /* !!!!!!!!!! New, untested syscalls, 14 Mar 02 !!!!!!!!!! */
-
-#     if defined(__NR_setresgid32)
-      case __NR_setresgid32: /* syscall 210 */
-         /* int setresgid(gid_t rgid, gid_t egid, gid_t sgid); */
-         MAYBE_PRINTF("setresgid32 ( %d, %d, %d )\n", arg1, arg2, arg3);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_setfsuid32)
-      case __NR_setfsuid32: /* syscall 215 */
-         /* int setfsuid(uid_t fsuid); */
-          MAYBE_PRINTF("setfsuid ( %d )\n", arg1);
-          KERNEL_DO_SYSCALL(tid,res);
-          break;
-#     endif
-
-#     if defined(__NR__sysctl)
-      case __NR__sysctl:
-      /* int _sysctl(struct __sysctl_args *args); */
-         MAYBE_PRINTF("_sysctl ( %p )\n", arg1 );
-         SYSCALL_TRACK( pre_mem_write, tid, "_sysctl(args)", arg1, 
-                            sizeof(struct __sysctl_args) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res))
-            VG_TRACK( post_mem_write, arg1, sizeof(struct __sysctl_args) );
-         break;
-#     endif
-
-#     if defined(__NR_sched_getscheduler)
-      case __NR_sched_getscheduler: /* syscall 157 */
-         /* int sched_getscheduler(pid_t pid); */
-         MAYBE_PRINTF("sched_getscheduler ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_sched_setscheduler)
-      case __NR_sched_setscheduler: /* syscall 156 */
-         /* int sched_setscheduler(pid_t pid, int policy, 
-                const struct sched_param *p); */
-         MAYBE_PRINTF("sched_setscheduler ( %d, %d, %p )\n",arg1,arg2,arg3);
-         if (arg3 != (UInt)NULL)
-            SYSCALL_TRACK( pre_mem_read, tid,
-                              "sched_setscheduler(struct sched_param *p)", 
-                              arg3, sizeof(struct sched_param));
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_mlock)
-      case __NR_mlock: /* syscall 150 */
-         /* int mlock(const void * addr, size_t len) */
-         MAYBE_PRINTF("mlock ( %p, %d )\n", arg1, arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_munlock)
-      case __NR_munlock: /* syscall 151 */
-         /* int munlock(const void * addr, size_t len) */
-         MAYBE_PRINTF("munlock ( %p, %d )\n", arg1, arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_mlockall)
-      case __NR_mlockall: /* syscall 152 */
-         /* int mlockall(int flags); */
-         MAYBE_PRINTF("mlockall ( %x )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_munlockall)
-      case __NR_munlockall: /* syscall 153 */
-         /* int munlockall(void); */
-         MAYBE_PRINTF("munlockall ( )\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#if   defined(__NR_sched_get_priority_max)
-      case __NR_sched_get_priority_max: /* syscall 159 */
-         /* int sched_get_priority_max(int policy); */
-         MAYBE_PRINTF("sched_get_priority_max ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#if   defined(__NR_sched_get_priority_min)
-      case __NR_sched_get_priority_min: /* syscall 160 */
-         /* int sched_get_priority_min(int policy); */
-         MAYBE_PRINTF("sched_get_priority_min ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#if   defined(__NR_setpriority)
-      case __NR_setpriority: /* syscall 97 */
-         /* int setpriority(int which, int who, int prio); */
-         MAYBE_PRINTF("setpriority ( %d, %d, %d )\n", arg1, arg2, arg3);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#if   defined(__NR_getpriority)
-      case __NR_getpriority: /* syscall 96 */
-         /* int getpriority(int which, int who); */
-         MAYBE_PRINTF("getpriority ( %d, %d )\n", arg1, arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_setfsgid)
-      case __NR_setfsgid: /* syscall 139 */
-         /* int setfsgid(gid_t gid); */
-         MAYBE_PRINTF("setfsgid ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_setregid)
-      case __NR_setregid: /* syscall 71 */
-         /* int setregid(gid_t rgid, gid_t egid); */
-         MAYBE_PRINTF("setregid ( %d, %d )\n", arg1, arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_setresuid)
-      case __NR_setresuid: /* syscall 164 */
-         /* int setresuid(uid_t ruid, uid_t euid, uid_t suid); */
-         MAYBE_PRINTF("setresuid ( %d, %d, %d )\n", arg1, arg2, arg3);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_setfsuid)
-      case __NR_setfsuid: /* syscall 138 */
-         /* int setfsuid(uid_t uid); */
-         MAYBE_PRINTF("setfsuid ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-      /* !!!!!!!!!! New, untested syscalls, 8 Mar 02 !!!!!!!!!!! */
-
-#     if defined(__NR_sendfile)
-      case __NR_sendfile: /* syscall 187 */
-         /* ssize_t sendfile(int out_fd, int in_fd, off_t *offset, 
-                             size_t count) */
-         MAYBE_PRINTF("sendfile ( %d, %d, %p, %d )\n",arg1,arg2,arg3,arg4);
-         if (arg3 != (UInt)NULL)
-            SYSCALL_TRACK( pre_mem_write, tid, "sendfile(offset)",
-                           arg3, sizeof(off_t) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && arg3 != (UInt)NULL) {
-            VG_TRACK( post_mem_write, arg3, sizeof( off_t ) );
-         }
-         break;
-#     endif
-
-      /* !!!!!!!!!! New, untested syscall, 3 Jun 03 !!!!!!!!!!! */
-#     if defined(__NR_sendfile64)
-      case __NR_sendfile64: /* syscall 239 */
-          /* ssize_t sendfile64(int out_df, int in_fd, loff_t *offset,
-                                size_t count); */
-         MAYBE_PRINTF("sendfile64 ( %d, %d, %p, %d )\n",arg1,arg2,arg3,arg4);
-         if (arg3 != (UInt)NULL)
-            SYSCALL_TRACK( pre_mem_write, tid, "sendfile64(offset)",
-                           arg3, sizeof(loff_t) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && arg3 != (UInt)NULL ) {
-            VG_TRACK( post_mem_write, arg3, sizeof(loff_t) );
-         }
-         break;
-#     endif
-
-      /* !!!!!!!!!! New, untested syscalls, 7 Mar 02 !!!!!!!!!!! */
-
-#     if defined(__NR_pwrite)
-      case __NR_pwrite: /* syscall 181 */
-         /* ssize_t pwrite (int fd, const void *buf, size_t nbytes,
-                            off_t offset); */
-         MAYBE_PRINTF("pwrite ( %d, %p, %d, %d )\n", arg1, arg2, arg3, arg4);
-         SYSCALL_TRACK( pre_mem_read, tid, "pwrite(buf)", arg2, arg3 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-      /* !!!!!!!!!! New, untested syscalls, 6 Mar 02 !!!!!!!!!!! */
-
-      case __NR_sync: /* syscall 36 */
-         /* int sync(); */
-         MAYBE_PRINTF("sync ( )\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break; 
- 
-      case __NR_fstatfs: /* syscall 100 */
-         /* int fstatfs(int fd, struct statfs *buf); */
-         MAYBE_PRINTF("fstatfs ( %d, %p )\n",arg1,arg2);
-         SYSCALL_TRACK( pre_mem_write, tid, "stat(buf)", 
-                        arg2, sizeof(struct statfs) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res))
-            VG_TRACK( post_mem_write, arg2, sizeof(struct statfs) );
-         break;
-
-      /* !!!!!!!!!! New, untested syscalls, 4 Mar 02 !!!!!!!!!!! */
-
-      case __NR_pause: /* syscall 29 */
-         /* int pause(void); */
-         MAYBE_PRINTF("pause ( )\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_getsid: /* syscall 147 */
-         /* pid_t getsid(pid_t pid); */
-         MAYBE_PRINTF("getsid ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_pread)
-      case __NR_pread: /* syscall 180 */
-         /* ssize_t pread(int fd, void *buf, size_t count, off_t offset); */
-         MAYBE_PRINTF("pread ( %d, %p, %d, %d ) ...\n",arg1,arg2,arg3,arg4);
-         SYSCALL_TRACK( pre_mem_write, tid, "pread(buf)", arg2, arg3 );
-         KERNEL_DO_SYSCALL(tid,res);
-         MAYBE_PRINTF("SYSCALL[%d]       pread ( %d, %p, %d, %d ) --> %d\n",
-                        VG_(getpid)(),
-                        arg1, arg2, arg3, arg4, res);
-         if (!VG_(is_kerror)(res) && res > 0) {
-            VG_TRACK( post_mem_write, arg2, res );
-         }
-         break;
-#     endif
-
-      /* !!!!!!!!!! New, untested syscalls, 27 Feb 02 !!!!!!!!!! */
-
-      case __NR_mknod: /* syscall 14 */
-         /* int mknod(const char *pathname, mode_t mode, dev_t dev); */
-         MAYBE_PRINTF("mknod ( %p, 0x%x, 0x%x )\n", arg1, arg2, arg3 );
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "mknod(pathname)", arg1 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_flock: /* syscall 143 */
-         /* int flock(int fd, int operation); */
-         MAYBE_PRINTF("flock ( %d, %d )\n", arg1, arg2 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_rt_sigsuspend)
-      /* Viewed with great suspicion by me, but, hey, let's do it
-         anyway ... */
-      case __NR_rt_sigsuspend: /* syscall 179 */
-         /* int sigsuspend(const sigset_t *mask); */
-         MAYBE_PRINTF("sigsuspend ( %p )\n", arg1 );
-         if (arg1 != (Addr)NULL) {
-            /* above NULL test is paranoia */
-            SYSCALL_TRACK( pre_mem_read, tid, "sigsuspend(mask)", arg1, 
-                              sizeof(vki_ksigset_t) );
-         }
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_rt_sigtimedwait)
-      case __NR_rt_sigtimedwait: /* syscall 177 */
-          /* int sigtimedwait(const  sigset_t  *set,  siginfo_t  *info,
-                              const struct timespec timeout); */
-         if (arg2 != (UInt)NULL)
-            SYSCALL_TRACK( pre_mem_write, tid, "sigtimedwait(info)", arg2,
-                           sizeof(siginfo_t) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && arg2 != (UInt)NULL)
-            VG_TRACK( post_mem_write, arg2, sizeof(siginfo_t) );
-         break;
-#     endif
-                
-      case __NR_init_module: /* syscall 128 */
-         /* int init_module(const char *name, struct module *image); */
-         MAYBE_PRINTF("init_module ( %p, %p )\n", arg1, arg2 );
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "init_module(name)", arg1 );
-         SYSCALL_TRACK( pre_mem_read, tid, "init_module(image)", arg2, 
-                           VKI_SIZEOF_STRUCT_MODULE );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_ioperm: /* syscall 101 */
-         /* int ioperm(unsigned long from, unsigned long num, int turn_on); */
-         MAYBE_PRINTF("ioperm ( %d, %d, %d )\n", arg1, arg2, arg3 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_capget: /* syscall 184 */
-         /* int capget(cap_user_header_t header, cap_user_data_t data); */
-         MAYBE_PRINTF("capget ( %p, %p )\n", arg1, arg2 );
-         SYSCALL_TRACK( pre_mem_read, tid, "capget(header)", arg1, 
-                                             sizeof(vki_cap_user_header_t) );
-         SYSCALL_TRACK( pre_mem_write, tid, "capget(data)", arg2, 
-                                           sizeof( vki_cap_user_data_t) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && arg2 != (Addr)NULL)
-            VG_TRACK( post_mem_write, arg2, sizeof( vki_cap_user_data_t) );
-         break;
- 
-      /* Added by Gerald Carter <jerry@samba.org> 2002-12-17 */
-#     if defined(__NR_capset)
-      /* int capset(cap_user_header_t header, const cap_user_data_t data); */
-      case __NR_capset: /* syscall 185 */ 
-         SYSCALL_TRACK( pre_mem_read, tid, "capset(header)", 
-                        arg1, sizeof(vki_cap_user_header_t) );
-         SYSCALL_TRACK( pre_mem_read, tid, "capset(data)", 
-                        arg2, sizeof( vki_cap_user_data_t) );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-      /* end modifications by <jerry@samba.org> */
-
-      /* !!!!!!!!!!!!!!!!!!!!! mutant ones !!!!!!!!!!!!!!!!!!!!! */
-
-      case __NR_execve: /* syscall 11 */
-         /* int execve (const char *filename, 
-                        char *const argv [], 
-                        char *const envp[]); */
-         MAYBE_PRINTF("execve ( %p(%s), %p, %p ) --- NOT CHECKED\n", 
-                        arg1, arg1, arg2, arg3);
-         /* Resistance is futile.  Nuke all other threads.  POSIX
-            mandates this. */
-            VG_(nuke_all_threads_except)( tid );
-         /* Make any binding for LD_PRELOAD disappear, so that child
-            processes don't get traced into. */
-         if (!VG_(clo_trace_children)) {
-            Int i;
-            Char** envp = (Char**)arg3;
-            Char*  ld_preload_str = NULL;
-            Char*  ld_library_path_str = NULL;
-            for (i = 0; envp[i] != NULL; i++) {
-               if (VG_(strncmp)(envp[i], "LD_PRELOAD=", 11) == 0)
-                  ld_preload_str = &envp[i][11];
-               if (VG_(strncmp)(envp[i], "LD_LIBRARY_PATH=", 16) == 0)
-                  ld_library_path_str = &envp[i][16];
-            }
-            VG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH)(
-	       ld_preload_str, ld_library_path_str );
-         }
-         KERNEL_DO_SYSCALL(tid,res);
-         /* Should we still be alive here?  Don't think so. */
-         /* Actually, above comment is wrong.  execve can fail, just
-            like any other syscall -- typically the file to exec does
-            not exist.  Hence: */
-         vg_assert(VG_(is_kerror)(res));
-         break;
-
-      /* !!!!!!!!!!!!!!!!!!!!!     end     !!!!!!!!!!!!!!!!!!!!! */
-
-      case __NR_access: /* syscall 33 */
-         /* int access(const char *pathname, int mode); */
-         MAYBE_PRINTF("access ( %p, %d )\n", arg1,arg2);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "access(pathname)", arg1 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_alarm: /* syscall 27 */
-         /* unsigned int alarm(unsigned int seconds); */
-         MAYBE_PRINTF("alarm ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_brk: /* syscall 45 */
-         /* libc   says: int   brk(void *end_data_segment);
-            kernel says: void* brk(void* end_data_segment);  (more or less)
-
-            libc returns 0 on success, and -1 (and sets errno) on failure.
-            Nb: if you ask to shrink the dataseg end below what it
-            currently is, that always succeeds, even if the dataseg end
-            doesn't actually change (eg. brk(0)).  Unless it seg faults.
-
-            Kernel returns the new dataseg end.  If the brk() failed, this
-            will be unchanged from the old one.  That's why calling (kernel)
-            brk(0) gives the current dataseg end (libc brk() just returns
-            zero in that case).
-
-            Both will seg fault if you shrink it back into a text segment.
-         */
-         MAYBE_PRINTF("brk ( %p ) --> ",arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         MAYBE_PRINTF("0x%x\n", res);
-
-         if (res == arg1) {
-            /* brk() succeeded */
-            if (res < curr_dataseg_end) {
-               /* successfully shrunk the data segment. */
-               VG_TRACK( die_mem_brk, (Addr)arg1,
-                                      curr_dataseg_end-arg1 );
-            } else
-            if (res > curr_dataseg_end && res != 0) {
-               /* successfully grew the data segment */
-               VG_TRACK( new_mem_brk, curr_dataseg_end,
-                                      arg1-curr_dataseg_end );
-            }
-            curr_dataseg_end = res;
-
-         } else {
-            /* brk() failed */
-            vg_assert(curr_dataseg_end == res);
-         }
-         break;
-
-      case __NR_chdir: /* syscall 12 */
-         /* int chdir(const char *path); */
-         MAYBE_PRINTF("chdir ( %p )\n", arg1);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "chdir(path)", arg1 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_chmod: /* syscall 15 */
-         /* int chmod(const char *path, mode_t mode); */
-         MAYBE_PRINTF("chmod ( %p, %d )\n", arg1,arg2);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "chmod(path)", arg1 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_chown32)
-      case __NR_chown32: /* syscall 212 */
-#     endif
-#     if defined(__NR_lchown32)
-      case __NR_lchown32: /* syscall 198 */
-#     endif
-      case __NR_chown: /* syscall 16 */
-         /* int chown(const char *path, uid_t owner, gid_t group); */
-         MAYBE_PRINTF("chown ( %p, 0x%x, 0x%x )\n", arg1,arg2,arg3);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "chown(path)", arg1 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_close: /* syscall 6 */
-         /* int close(int fd); */
-         MAYBE_PRINTF("close ( %d )\n",arg1);
-         /* Detect and negate attempts by the client to close Valgrind's
-            logfile fd ... */
-         if (arg1 == (UInt)VG_(clo_logfile_fd)) {
-            VG_(message)(Vg_UserMsg, 
-              "Warning: client attempted to close "
-               "Valgrind's logfile fd (%d).", 
-               VG_(clo_logfile_fd));
-            VG_(message)(Vg_UserMsg, 
-              "   Use --logfile-fd=<number> to select an "
-              "alternative logfile fd." );
-            /* Pretend the close succeeded, regardless.  (0 == success) */
-            res = 0;
-            SET_SYSCALL_RETVAL(tid, res);
-         } else {
-            KERNEL_DO_SYSCALL(tid,res);
-         }
-         break;
-
-      case __NR_dup: /* syscall 41 */
-         /* int dup(int oldfd); */
-         MAYBE_PRINTF("dup ( %d ) --> ", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         MAYBE_PRINTF("%d\n", res);
-         break;
-
-      case __NR_dup2: /* syscall 63 */
-         /* int dup2(int oldfd, int newfd); */
-         MAYBE_PRINTF("dup2 ( %d, %d ) ...\n", arg1,arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         MAYBE_PRINTF("SYSCALL[%d]       dup2 ( %d, %d ) = %d\n", 
-                        VG_(getpid)(), 
-                        arg1, arg2, res);
-         break;
-
-      case __NR_fcntl: /* syscall 55 */
-         /* int fcntl(int fd, int cmd, int arg); */
-         MAYBE_PRINTF("fcntl ( %d, %d, %d )\n",arg1,arg2,arg3);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_fchdir: /* syscall 133 */
-         /* int fchdir(int fd); */
-         MAYBE_PRINTF("fchdir ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_fchown32)
-      case __NR_fchown32: /* syscall 207 */
-#     endif
-      case __NR_fchown: /* syscall 95 */
-         /* int fchown(int filedes, uid_t owner, gid_t group); */
-         MAYBE_PRINTF("fchown ( %d, %d, %d )\n", arg1,arg2,arg3);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_fchmod: /* syscall 94 */
-         /* int fchmod(int fildes, mode_t mode); */
-         MAYBE_PRINTF("fchmod ( %d, %d )\n", arg1,arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_fcntl64)
-      case __NR_fcntl64: /* syscall 221 */
-         /* I don't know what the prototype for this is supposed to be. */
-         /* ??? int fcntl(int fd, int cmd); */
-         MAYBE_PRINTF("fcntl64 (?!) ( %d, %d )\n", arg1,arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-      case __NR_fstat: /* syscall 108 */
-         /* int fstat(int filedes, struct stat *buf); */
-         MAYBE_PRINTF("fstat ( %d, %p )\n",arg1,arg2);
-         SYSCALL_TRACK( pre_mem_write, tid, "fstat", arg2, sizeof(struct stat) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res))
-            VG_TRACK( post_mem_write, arg2, sizeof(struct stat) );
-         break;
-
-      case __NR_vfork: /* syscall 190 */
-         /* pid_t vfork(void); */
-         MAYBE_PRINTF("vfork ( ) ... becomes ... ");
-         /* KLUDGE: we prefer to do a fork rather than vfork. 
-            vfork gives a SIGSEGV, and the stated semantics looks
-            pretty much impossible for us. */
-         tst->m_eax = __NR_fork;
-         /* fall through ... */
-      case __NR_fork: /* syscall 2 */
-         /* pid_t fork(void); */
-         MAYBE_PRINTF("fork ()\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         if (res == 0) {
-            /* I am the child.  Nuke all other threads which I might
-               have inherited from my parent.  POSIX mandates this. */
-            VG_(nuke_all_threads_except)( tid );
-         }
-         break;
-
-      case __NR_fsync: /* syscall 118 */
-         /* int fsync(int fd); */
-         MAYBE_PRINTF("fsync ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_ftruncate: /* syscall 93 */
-         /* int ftruncate(int fd, size_t length); */
-         MAYBE_PRINTF("ftruncate ( %d, %d )\n", arg1,arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_ftruncate64)
-      case __NR_ftruncate64: /* syscall 194 */
-         /* int ftruncate64(int fd, off64_t length); */
-         MAYBE_PRINTF("ftruncate64 ( %d, %lld )\n", 
-                        arg1,arg2|((long long) arg3 << 32));
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-      case __NR_getdents: /* syscall 141 */
-         /* int getdents(unsigned int fd, struct dirent *dirp, 
-                         unsigned int count); */
-         MAYBE_PRINTF("getdents ( %d, %p, %d )\n",arg1,arg2,arg3);
-         SYSCALL_TRACK( pre_mem_write, tid, "getdents(dirp)", arg2, arg3 );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res > 0)
-            VG_TRACK( post_mem_write, arg2, res );
-         break;
-
-#     if defined(__NR_getdents64)
-      case __NR_getdents64: /* syscall 220 */
-         /* int getdents(unsigned int fd, struct dirent64 *dirp, 
-                         unsigned int count); */
-         MAYBE_PRINTF("getdents64 ( %d, %p, %d )\n",arg1,arg2,arg3);
-         SYSCALL_TRACK( pre_mem_write, tid, "getdents64(dirp)", arg2, arg3 );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res > 0)
-            VG_TRACK( post_mem_write, arg2, res );
-         break;
-#     endif
-
-#     if defined(__NR_getgroups32)
-      case __NR_getgroups32: /* syscall 205 */
-#     endif
-      case __NR_getgroups: /* syscall 80 */
-         /* int getgroups(int size, gid_t list[]); */
-         MAYBE_PRINTF("getgroups ( %d, %p )\n", arg1, arg2);
-         if (arg1 > 0)
-            SYSCALL_TRACK( pre_mem_write, tid, "getgroups(list)", arg2, 
-                               arg1 * sizeof(gid_t) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (arg1 > 0 && !VG_(is_kerror)(res) && res > 0)
-            VG_TRACK( post_mem_write, arg2, res * sizeof(gid_t) );
-         break;
-
-      case __NR_getcwd: /* syscall 183 */
-         /* char *getcwd(char *buf, size_t size);  (but see comment below) */
-         MAYBE_PRINTF("getcwd ( %p, %d )\n",arg1,arg2);
-         SYSCALL_TRACK( pre_mem_write, tid, "getcwd(buf)", arg1, arg2 );
-         KERNEL_DO_SYSCALL(tid,res);
-
-         /* From linux/fs/dcache.c:
-          *  NOTE! The user-level library version returns a character
-          *  pointer. The kernel system call just returns the length of the
-          *  buffer filled (which includes the ending '\0' character), or a
-          *  negative error value. 
-          */
-         if (!VG_(is_kerror)(res) && res != (Addr)NULL)
-            VG_TRACK( post_mem_write, arg1, res );
-         break;
-
-      case __NR_geteuid: /* syscall 49 */
-         /* uid_t geteuid(void); */
-         MAYBE_PRINTF("geteuid ( )\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_geteuid32)
-      case __NR_geteuid32: /* syscall 201 */
-         /* ?? uid_t geteuid32(void); */
-         MAYBE_PRINTF("geteuid32(?) ( )\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-      case __NR_getegid: /* syscall 50 */
-         /* gid_t getegid(void); */
-         MAYBE_PRINTF("getegid ()\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_getegid32)
-      case __NR_getegid32: /* syscall 202 */
-         /* gid_t getegid32(void); */
-         MAYBE_PRINTF("getegid32 ()\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-      case __NR_getgid: /* syscall 47 */
-         /* gid_t getgid(void); */
-         MAYBE_PRINTF("getgid ()\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_getgid32)
-      case __NR_getgid32: /* syscall 200 */
-         /* gid_t getgid32(void); */
-         MAYBE_PRINTF("getgid32 ()\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-      case __NR_getpid: /* syscall 20 */
-         /* pid_t getpid(void); */
-         MAYBE_PRINTF("getpid ()\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_getpgid: /* syscall 132 */
-         /* pid_t getpgid(pid_t pid); */
-         MAYBE_PRINTF("getpgid ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_getpgrp: /* syscall 65 */
-         /* pid_t getpgrp(void); */
-         MAYBE_PRINTF("getpgrp ()\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_getppid: /* syscall 64 */
-         /* pid_t getppid(void); */
-         MAYBE_PRINTF("getppid ()\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_getresgid: /* syscall 171 */
-         /* int getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid); */
-         MAYBE_PRINTF("getresgid ( %p, %p, %p )\n", arg1,arg2,arg3);
-         SYSCALL_TRACK( pre_mem_write, tid, "getresgid(rgid)", 
-                                       arg1, sizeof(gid_t) );
-         SYSCALL_TRACK( pre_mem_write, tid, "getresgid(egid)", 
-                                       arg2, sizeof(gid_t) );
-         SYSCALL_TRACK( pre_mem_write, tid, "getresgid(sgid)", 
-                                       arg3, sizeof(gid_t) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res == 0) {
-            VG_TRACK( post_mem_write, arg1, sizeof(gid_t) );
-            VG_TRACK( post_mem_write, arg2, sizeof(gid_t) );
-            VG_TRACK( post_mem_write, arg3, sizeof(gid_t) );
-         }
-         break;
-
-#     if defined(__NR_getresgid32)
-      case __NR_getresgid32: /* syscall 211 */
-         /* int getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid); */
-         MAYBE_PRINTF("getresgid32 ( %p, %p, %p )\n", arg1,arg2,arg3);
-         SYSCALL_TRACK( pre_mem_write, tid, "getresgid32(rgid)", 
-                                       arg1, sizeof(gid_t) );
-         SYSCALL_TRACK( pre_mem_write, tid, "getresgid32(egid)", 
-                                       arg2, sizeof(gid_t) );
-         SYSCALL_TRACK( pre_mem_write, tid, "getresgid32(sgid)", 
-                                       arg3, sizeof(gid_t) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res == 0) {
-            VG_TRACK( post_mem_write, arg1, sizeof(gid_t) );
-            VG_TRACK( post_mem_write, arg2, sizeof(gid_t) );
-            VG_TRACK( post_mem_write, arg3, sizeof(gid_t) );
-         }
-         break;
-#     endif
-
-      case __NR_getresuid: /* syscall 165 */
-         /* int getresuid(uid_t *ruid, uid_t *euid, uid_t *suid); */
-         MAYBE_PRINTF("getresuid ( %p, %p, %p )\n", arg1,arg2,arg3);
-         SYSCALL_TRACK( pre_mem_write, tid, "getresuid(ruid)", 
-                                       arg1, sizeof(uid_t) );
-         SYSCALL_TRACK( pre_mem_write, tid, "getresuid(euid)", 
-                                       arg2, sizeof(uid_t) );
-         SYSCALL_TRACK( pre_mem_write, tid, "getresuid(suid)", 
-                                       arg3, sizeof(uid_t) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res == 0) {
-            VG_TRACK( post_mem_write, arg1, sizeof(uid_t) );
-            VG_TRACK( post_mem_write, arg2, sizeof(uid_t) );
-            VG_TRACK( post_mem_write, arg3, sizeof(uid_t) );
-         }
-         break;
-
-#     if defined(__NR_getresuid32)
-      case __NR_getresuid32: /* syscall 209 */
-         /* int getresuid(uid_t *ruid, uid_t *euid, uid_t *suid); */
-         MAYBE_PRINTF("getresuid32 ( %p, %p, %p )\n", arg1,arg2,arg3);
-         SYSCALL_TRACK( pre_mem_write, tid, "getresuid32(ruid)", 
-                                       arg1, sizeof(uid_t) );
-         SYSCALL_TRACK( pre_mem_write, tid, "getresuid32(euid)", 
-                                       arg2, sizeof(uid_t) );
-         SYSCALL_TRACK( pre_mem_write, tid, "getresuid32(suid)", 
-                                       arg3, sizeof(uid_t) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res == 0) {
-            VG_TRACK( post_mem_write, arg1, sizeof(uid_t) );
-            VG_TRACK( post_mem_write, arg2, sizeof(uid_t) );
-            VG_TRACK( post_mem_write, arg3, sizeof(uid_t) );
-         }
-         break;
-#     endif
-
-#     if defined(__NR_ugetrlimit)
-      case __NR_ugetrlimit: /* syscall 191 */
-#     endif
-      case __NR_getrlimit: /* syscall 76 */
-         /* int getrlimit (int resource, struct rlimit *rlim); */
-         MAYBE_PRINTF("getrlimit ( %d, %p )\n", arg1,arg2);
-         SYSCALL_TRACK( pre_mem_write, tid, "getrlimit(rlim)", arg2, 
-                           sizeof(struct rlimit) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res == 0)
-            VG_TRACK( post_mem_write, arg2, sizeof(struct rlimit) );
-         break;
-
-      case __NR_getrusage: /* syscall 77 */
-         /* int getrusage (int who, struct rusage *usage); */
-         MAYBE_PRINTF("getrusage ( %d, %p )\n", arg1,arg2);
-         SYSCALL_TRACK( pre_mem_write, tid, "getrusage(usage)", arg2, 
-                           sizeof(struct rusage) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res == 0)
-            VG_TRACK( post_mem_write,arg2, sizeof(struct rusage) );
-         break;
-
-      case __NR_gettimeofday: /* syscall 78 */
-         /* int gettimeofday(struct timeval *tv, struct timezone *tz); */
-         MAYBE_PRINTF("gettimeofday ( %p, %p )\n",arg1,arg2);
-         SYSCALL_TRACK( pre_mem_write, tid, "gettimeofday(tv)", arg1, 
-                           sizeof(struct timeval) );
-         if (arg2 != 0)
-            SYSCALL_TRACK( pre_mem_write, tid, "gettimeofday(tz)", arg2, 
-                              sizeof(struct timezone) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res == 0) {
-            VG_TRACK( post_mem_write, arg1, sizeof(struct timeval) );
-            if (arg2 != 0)
-               VG_TRACK( post_mem_write, arg2, sizeof(struct timezone) );
-         }
-         break;
-
-      case __NR_getuid: /* syscall 24 */
-         /* uid_t getuid(void); */
-         MAYBE_PRINTF("getuid ( )\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_getuid32)
-      case __NR_getuid32: /* syscall 199 */
-         /* ???uid_t getuid32(void); */
-         MAYBE_PRINTF("getuid32 ( )\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-      case __NR_ipc: /* syscall 117 */
-         /* int ipc ( unsigned int call, int first, int second, 
-                      int third, void *ptr, long fifth); */
-         {
-         UInt arg6 = tst->m_ebp;
-
-         MAYBE_PRINTF("ipc ( %d, %d, %d, %d, %p, %d )\n",
-                        arg1,arg2,arg3,arg4,arg5,arg6);
-         switch (arg1 /* call */) {
-            case 1: /* IPCOP_semop */
-               SYSCALL_TRACK( pre_mem_read, tid, "semop(sops)", arg5, 
-                                  arg3 * sizeof(struct sembuf) );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case 2: /* IPCOP_semget */
-            case 3: /* IPCOP_semctl */
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case 11: /* IPCOP_msgsnd */
-               {
-                  struct msgbuf *msgp = (struct msgbuf *)arg5;
-                  Int msgsz = arg3;
-
-                  SYSCALL_TRACK( pre_mem_read, tid, "msgsnd(msgp->mtype)", 
-                                     (UInt)&msgp->mtype, sizeof(msgp->mtype) );
-                  SYSCALL_TRACK( pre_mem_read, tid, "msgsnd(msgp->mtext)", 
-                                     (UInt)msgp->mtext, msgsz );
-
-                  KERNEL_DO_SYSCALL(tid,res);
-                  break;
-               }
-            case 12: /* IPCOP_msgrcv */
-               {
-                  struct msgbuf *msgp;
-                  Int msgsz = arg3;
- 
-                  msgp = (struct msgbuf *)deref_Addr( tid,
-                            (Addr) (&((struct ipc_kludge *)arg5)->msgp),
-                            "msgrcv(msgp)" );
-
-                  SYSCALL_TRACK( pre_mem_write, tid, "msgrcv(msgp->mtype)", 
-                                     (UInt)&msgp->mtype, sizeof(msgp->mtype) );
-                  SYSCALL_TRACK( pre_mem_write, tid, "msgrcv(msgp->mtext)", 
-                                     (UInt)msgp->mtext, msgsz );
-
-                  KERNEL_DO_SYSCALL(tid,res);
-
-                  if ( !VG_(is_kerror)(res) && res > 0 ) {
-                     VG_TRACK( post_mem_write, (UInt)&msgp->mtype, 
-                                               sizeof(msgp->mtype) );
-                     VG_TRACK( post_mem_write, (UInt)msgp->mtext, res );
-                  }
-                  break;
-               }
-            case 13: /* IPCOP_msgget */
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case 14: /* IPCOP_msgctl */
-               {
-                  switch (arg3 /* cmd */) {
-                     case IPC_STAT:
-                        SYSCALL_TRACK( pre_mem_write, tid, "msgctl(buf)", arg5, 
-                                           sizeof(struct msqid_ds) );
-                        KERNEL_DO_SYSCALL(tid,res);
-                        if ( !VG_(is_kerror)(res) && res > 0 ) {
-                           VG_TRACK( post_mem_write, arg5, 
-                                                     sizeof(struct msqid_ds) );
-                        }
-                        break;
-                     case IPC_SET:
-                        SYSCALL_TRACK( pre_mem_read, tid, "msgctl(buf)", arg5, 
-                                           sizeof(struct msqid_ds) );
-                        KERNEL_DO_SYSCALL(tid,res);
-                        break;
-#                    if defined(IPC_64)
-                     case IPC_STAT|IPC_64:
-                        SYSCALL_TRACK( pre_mem_write, tid, "msgctl(buf)", arg5, 
-                                           sizeof(struct msqid64_ds) );
-                        KERNEL_DO_SYSCALL(tid,res);
-                        if ( !VG_(is_kerror)(res) && res > 0 ) {
-                           VG_TRACK( post_mem_write, arg5, 
-                                                     sizeof(struct msqid64_ds) );
-                        }
-                        break;
-#                    endif
-#                    if defined(IPC_64)
-                     case IPC_SET|IPC_64:
-                        SYSCALL_TRACK( pre_mem_read, tid, "msgctl(buf)", arg5, 
-                                           sizeof(struct msqid64_ds) );
-                        KERNEL_DO_SYSCALL(tid,res);
-                        break;
-#                    endif
-                     default:
-                        KERNEL_DO_SYSCALL(tid,res);
-                        break;
-                  }
-                  break;
-               }
-            case 21: /* IPCOP_shmat */
-               {
-                  Int shmid = arg2;
-                  /*Int shmflag = arg3;*/
-                  Addr addr;
-
-                  KERNEL_DO_SYSCALL(tid,res);
-
-                  if ( VG_(is_kerror) ( res ) )
-                     break;
-                  
-                  /* force readability. before the syscall it is
-                   * indeed uninitialized, as can be seen in
-                   * glibc/sysdeps/unix/sysv/linux/shmat.c */
-                  VG_TRACK( post_mem_write, arg4, sizeof( ULong ) );
-
-                  addr = deref_Addr ( tid, arg4, "shmat(addr)" );
-                  if ( addr > 0 ) { 
-                     UInt segmentSize = get_shm_size ( shmid );
-                     if ( segmentSize > 0 ) {
-                        /* we don't distinguish whether it's read-only or
-                         * read-write -- it doesn't matter really. */
-                        VG_TRACK( new_mem_mmap, addr, segmentSize, 
-                                                True, True, False );
-                     }
-                  }
-                  break;
-               }
-            case 22: /* IPCOP_shmdt */
-                  KERNEL_DO_SYSCALL(tid,res);
-                  /* ### FIXME: this should call make_noaccess on the
-                   * area passed to shmdt. But there's no way to
-                   * figure out the size of the shared memory segment
-                   * just from the address...  Maybe we want to keep a
-                   * copy of the exiting mappings inside valgrind? */
-                  break;
-            case 23: /* IPCOP_shmget */
-                KERNEL_DO_SYSCALL(tid,res);
-                break;
-            case 24: /* IPCOP_shmctl */
-	      /* Subject: shmctl: The True Story
-                    Date: Thu, 9 May 2002 18:07:23 +0100 (BST)
-                    From: Reuben Thomas <rrt@mupsych.org>
-                      To: Julian Seward <jseward@acm.org>
-
-                 1. As you suggested, the syscall subop is in arg1.
-
-                 2. There are a couple more twists, so the arg order
-                    is actually:
-
-                 arg1 syscall subop
-                 arg2 file desc
-                 arg3 shm operation code (can have IPC_64 set)
-                 arg4 0 ??? is arg3-arg4 a 64-bit quantity when IPC_64
-                        is defined?
-                 arg5 pointer to buffer
-
-                 3. With this in mind, I've amended the case as below:
-	      */
-               {
-                  UInt cmd = arg3;
-                  Bool out_arg = False;
-                  if ( arg5 ) {
-#                    if defined(IPC_64)
-                     cmd = cmd & (~IPC_64);
-#                    endif
-                     out_arg = cmd == SHM_STAT || cmd == IPC_STAT;
-                     if ( out_arg )
-                        SYSCALL_TRACK( pre_mem_write, tid, 
-                           "shmctl(SHM_STAT or IPC_STAT,buf)", 
-                           arg5, sizeof(struct shmid_ds) );
-                     else
-                        SYSCALL_TRACK( pre_mem_read, tid, 
-                           "shmctl(SHM_XXXX,buf)", 
-                           arg5, sizeof(struct shmid_ds) );
-                  }
-                  KERNEL_DO_SYSCALL(tid,res);
-                  if ( arg5 && !VG_(is_kerror)(res) && res == 0 && out_arg )
-                          VG_TRACK( post_mem_write, arg5, 
-                                                    sizeof(struct shmid_ds) );
-               }
-               break;
-            default:
-               VG_(message)(Vg_DebugMsg,
-                            "FATAL: unhandled syscall(ipc) %d",
-                            arg1 );
-               VG_(core_panic)("... bye!\n");
-               break; /*NOTREACHED*/
-         }
-         }
-         break;
-
-      case __NR_ioctl: /* syscall 54 */
-         /* int ioctl(int d, int request, ...)
-            [The  "third"  argument  is traditionally char *argp, 
-             and will be so named for this discussion.]
-         */
-         /*
-         VG_(message)(
-            Vg_DebugMsg, 
-            "is an IOCTL,  request = 0x%x,   d = %d,   argp = 0x%x", 
-            arg2,arg1,arg3);
-         */
-         MAYBE_PRINTF("ioctl ( %d, 0x%x, %p )\n",arg1,arg2,arg3);
-         switch (arg2 /* request */) {
-            case TCSETS:
-            case TCSETSW:
-            case TCSETSF:
-               SYSCALL_TRACK( pre_mem_read, tid, "ioctl(TCSET{S,SW,SF})", arg3, 
-                                 VKI_SIZEOF_STRUCT_TERMIOS );
-               KERNEL_DO_SYSCALL(tid,res);
-               break; 
-            case TCGETS:
-               SYSCALL_TRACK( pre_mem_write, tid, "ioctl(TCGETS)", arg3, 
-                                 VKI_SIZEOF_STRUCT_TERMIOS );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  VG_TRACK( post_mem_write, arg3, VKI_SIZEOF_STRUCT_TERMIOS );
-               break;
-            case TCSETA:
-            case TCSETAW:
-            case TCSETAF:
-               SYSCALL_TRACK( pre_mem_read, tid, "ioctl(TCSET{A,AW,AF})", arg3,
-                                 VKI_SIZEOF_STRUCT_TERMIO );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case TCGETA:
-               SYSCALL_TRACK( pre_mem_write, tid, "ioctl(TCGETA)", arg3,
-                                 VKI_SIZEOF_STRUCT_TERMIO );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  VG_TRACK( post_mem_write, arg3, VKI_SIZEOF_STRUCT_TERMIO );
-               break;
-            case TCSBRK:
-            case TCXONC:
-            case TCSBRKP:
-            case TCFLSH:
-               /* These just take an int by value */
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case TIOCGWINSZ:
-               SYSCALL_TRACK( pre_mem_write, tid, "ioctl(TIOCGWINSZ)", arg3, 
-                                 sizeof(struct winsize) );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  VG_TRACK( post_mem_write, arg3, sizeof(struct winsize) );
-               break;
-            case TIOCSWINSZ:
-               SYSCALL_TRACK( pre_mem_read, tid, "ioctl(TIOCSWINSZ)", arg3, 
-                                 sizeof(struct winsize) );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case TIOCLINUX:
-               SYSCALL_TRACK( pre_mem_read, tid, "ioctl(TIOCLINUX)", arg3, 
-                                 sizeof(char *) );
-               if (*(char *)arg3 == 11) {
-                  SYSCALL_TRACK( pre_mem_read, tid, "ioctl(TIOCLINUX, 11)", 
-                                    arg3, 2 * sizeof(char *) );
-               }
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  VG_TRACK( post_mem_write, arg3, sizeof(char *) );
-               break;
-            case TIOCGPGRP:
-               /* Get process group ID for foreground processing group. */
-               SYSCALL_TRACK( pre_mem_write, tid, "ioctl(TIOCGPGRP)", arg3,
-                                 sizeof(pid_t) );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  VG_TRACK( post_mem_write, arg3, sizeof(pid_t) );
-               break;
-            case TIOCSPGRP:
-               /* Set a process group ID? */
-               SYSCALL_TRACK( pre_mem_write, tid, "ioctl(TIOCGPGRP)", arg3,
-                                 sizeof(pid_t) );
-               KERNEL_DO_SYSCALL(tid,res); 
-               if (!VG_(is_kerror)(res) && res == 0)
-                  VG_TRACK( post_mem_write, arg3, sizeof(pid_t) );
-               break;
-            case TIOCGPTN: /* Get Pty Number (of pty-mux device) */
-               SYSCALL_TRACK( pre_mem_write, tid, "ioctl(TIOCGPTN)", 
-                                             arg3, sizeof(int) );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                   VG_TRACK( post_mem_write, arg3, sizeof(int));
-               break;
-            case TIOCSCTTY:
-               /* Just takes an int value.  */
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case TIOCSPTLCK: /* Lock/unlock Pty */
-               SYSCALL_TRACK( pre_mem_read, tid, "ioctl(TIOCSPTLCK)", 
-                                            arg3, sizeof(int) );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case FIONBIO:
-               SYSCALL_TRACK( pre_mem_read, tid, "ioctl(FIONBIO)", 
-                                            arg3, sizeof(int) );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case FIOASYNC:
-               SYSCALL_TRACK( pre_mem_read, tid, "ioctl(FIOASYNC)", 
-                                            arg3, sizeof(int) );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case FIONREAD:                /* identical to SIOCINQ */
-               SYSCALL_TRACK( pre_mem_write, tid, "ioctl(FIONREAD)", 
-                                             arg3, sizeof(int) );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  VG_TRACK( post_mem_write, arg3, sizeof(int) );
-               break;
-
-            /* If you get compilation problems here, change the #if
-               1 to #if 0 and get rid of <scsi/sg.h> in
-               vg_unsafe.h. */
-#       if 1
-            case SG_SET_COMMAND_Q:
-               SYSCALL_TRACK( pre_mem_read, tid, "ioctl(SG_SET_COMMAND_Q)", 
-                                 arg3, sizeof(int) );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-#           if defined(SG_IO)
-            case SG_IO:
-               SYSCALL_TRACK( pre_mem_write, tid, "ioctl(SG_IO)", arg3, 
-                                 sizeof(struct sg_io_hdr) );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  VG_TRACK( post_mem_write,arg3, sizeof(struct sg_io_hdr));
-               break;
-#           endif /* SG_IO */
-            case SG_GET_SCSI_ID:
-               /* Note: sometimes sg_scsi_id is called sg_scsi_id_t */
-               SYSCALL_TRACK( pre_mem_write, tid, "ioctl(SG_GET_SCSI_ID)", arg3, 
-                                 sizeof(struct sg_scsi_id) );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  VG_TRACK( post_mem_write,arg3, sizeof(struct sg_scsi_id));
-               break;
-            case SG_SET_RESERVED_SIZE:
-               SYSCALL_TRACK( pre_mem_read, tid, "ioctl(SG_SET_RESERVED_SIZE)", 
-                                 arg3, sizeof(int) );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case SG_SET_TIMEOUT:
-               SYSCALL_TRACK( pre_mem_read, tid, "ioctl(SG_SET_TIMEOUT)", arg3, 
-                                 sizeof(int) );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case SG_GET_RESERVED_SIZE:
-               SYSCALL_TRACK( pre_mem_write, tid, 
-                                             "ioctl(SG_GET_RESERVED_SIZE)", arg3, 
-                                 sizeof(int) );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  VG_TRACK( post_mem_write,arg3, sizeof(int));
-               break;
-            case SG_GET_TIMEOUT:
-               SYSCALL_TRACK( pre_mem_write, tid, "ioctl(SG_GET_TIMEOUT)", arg3, 
-                                 sizeof(int) );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  VG_TRACK( post_mem_write,arg3, sizeof(int));
-               break;
-            case SG_GET_VERSION_NUM:
-               SYSCALL_TRACK( pre_mem_read, tid, "ioctl(SG_GET_VERSION_NUM)", 
-                                 arg3, sizeof(int) );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-#       endif
-
-            case IIOCGETCPS:
-               /* In early 2.4 kernels, ISDN_MAX_CHANNELS was only defined
-                * when KERNEL was. I never saw a larger value than 64 though */
-#              ifndef ISDN_MAX_CHANNELS
-#              define ISDN_MAX_CHANNELS 64
-#              endif
-               SYSCALL_TRACK( pre_mem_write, tid, "ioctl(IIOCGETCPS)", arg3,
-                                 ISDN_MAX_CHANNELS 
-                                 * 2 * sizeof(unsigned long) );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  VG_TRACK( post_mem_write, arg3, ISDN_MAX_CHANNELS 
-                                        * 2 * sizeof(unsigned long) );
-               break;
-            case IIOCNETGPN:
-               SYSCALL_TRACK( pre_mem_read, tid, "ioctl(IIOCNETGPN)",
-                                 (UInt)&((isdn_net_ioctl_phone *)arg3)->name,
-                                 sizeof(((isdn_net_ioctl_phone *)arg3)->name) );
-               SYSCALL_TRACK( pre_mem_write, tid, "ioctl(IIOCNETGPN)", arg3,
-                                 sizeof(isdn_net_ioctl_phone) );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  VG_TRACK( post_mem_write, arg3, sizeof(isdn_net_ioctl_phone) );
-               break;
-
-            /* These all use struct ifreq AFAIK */
-            case SIOCGIFINDEX:
-            case SIOCGIFFLAGS:        /* get flags                    */
-            case SIOCGIFHWADDR:       /* Get hardware address         */
-            case SIOCGIFMTU:          /* get MTU size                 */
-            case SIOCGIFADDR:         /* get PA address               */
-            case SIOCGIFNETMASK:      /* get network PA mask          */
-            case SIOCGIFMETRIC:       /* get metric                   */
-            case SIOCGIFMAP:          /* Get device parameters        */
-            case SIOCGIFTXQLEN:       /* Get the tx queue length      */
-            case SIOCGIFDSTADDR:      /* get remote PA address        */
-            case SIOCGIFBRDADDR:      /* get broadcast PA address     */
-            case SIOCGIFNAME:         /* get iface name               */
-               SYSCALL_TRACK( pre_mem_write, tid, "ioctl(SIOCGIFINDEX)", arg3, 
-                                sizeof(struct ifreq));
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  VG_TRACK( post_mem_write,arg3, sizeof(struct ifreq));
-               break;
-            case SIOCGIFCONF:         /* get iface list               */
-               /* WAS:
-               SYSCALL_TRACK( pre_mem_write,"ioctl(SIOCGIFCONF)", arg3, 
-                                sizeof(struct ifconf));
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  VG_TRACK( post_mem_write,arg3, sizeof(struct ifconf));
-               */
-               SYSCALL_TRACK( pre_mem_read, tid, "ioctl(SIOCGIFCONF)", arg3, 
-                                sizeof(struct ifconf));
-               if ( arg3 ) {
-                  // TODO len must be readable and writable
-                  // buf pointer only needs to be readable
-                  struct ifconf *ifc = (struct ifconf *) arg3;
-                  SYSCALL_TRACK( pre_mem_write, tid, "ioctl(SIOCGIFCONF).ifc_buf",
-                                   (Addr)(ifc->ifc_buf), (UInt)(ifc->ifc_len) );
-               }
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0 && arg3 ) {
-                  struct ifconf *ifc = (struct ifconf *) arg3;
-                  if (ifc->ifc_buf != NULL)
-                     VG_TRACK( post_mem_write, (Addr)(ifc->ifc_buf), 
-                                     (UInt)(ifc->ifc_len) );
-               }
-               break;
-            case SIOCGSTAMP:
-               SYSCALL_TRACK( pre_mem_write, tid, "ioctl(SIOCGSTAMP)", arg3, 
-                                sizeof(struct timeval));
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  VG_TRACK( post_mem_write,arg3, sizeof(struct timeval));
-               break;
-            /* SIOCOUTQ is an ioctl that, when called on a socket, returns
-               the number of bytes currently in that socket's send buffer.
-               It writes this value as an int to the memory location
-               indicated by the third argument of ioctl(2). */
-            case SIOCOUTQ:
-               SYSCALL_TRACK( pre_mem_write, tid, "ioctl(SIOCOUTQ)", arg3, 
-                                sizeof(int));
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  VG_TRACK( post_mem_write,arg3, sizeof(int));
-               break;
-            case SIOCGRARP:           /* get RARP table entry         */
-            case SIOCGARP:            /* get ARP table entry          */
-               SYSCALL_TRACK( pre_mem_write, tid, "ioctl(SIOCGARP)", arg3, 
-                                sizeof(struct arpreq));
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  VG_TRACK( post_mem_write,arg3, sizeof(struct arpreq));
-               break;
-                    
-            case SIOCSIFFLAGS:        /* set flags                    */
-            case SIOCSIFMAP:          /* Set device parameters        */
-            case SIOCSIFTXQLEN:       /* Set the tx queue length      */
-            case SIOCSIFDSTADDR:      /* set remote PA address        */
-            case SIOCSIFBRDADDR:      /* set broadcast PA address     */
-            case SIOCSIFNETMASK:      /* set network PA mask          */
-            case SIOCSIFMETRIC:       /* set metric                   */
-            case SIOCSIFADDR:         /* set PA address               */
-            case SIOCSIFMTU:          /* set MTU size                 */
-            case SIOCSIFHWADDR:       /* set hardware address         */
-               SYSCALL_TRACK( pre_mem_read, tid,"ioctl(SIOCSIFFLAGS)", arg3, 
-                                sizeof(struct ifreq));
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            /* Routing table calls.  */
-            case SIOCADDRT:           /* add routing table entry      */
-            case SIOCDELRT:           /* delete routing table entry   */
-               SYSCALL_TRACK( pre_mem_read, tid,"ioctl(SIOCADDRT/DELRT)", arg3, 
-                                sizeof(struct rtentry));
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-
-            /* RARP cache control calls. */
-            case SIOCDRARP:           /* delete RARP table entry      */
-            case SIOCSRARP:           /* set RARP table entry         */
-            /* ARP cache control calls. */
-            case SIOCSARP:            /* set ARP table entry          */
-            case SIOCDARP:            /* delete ARP table entry       */
-               SYSCALL_TRACK( pre_mem_read, tid, "ioctl(SIOCSIFFLAGS)", arg3, 
-                                sizeof(struct ifreq));
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-
-            case SIOCSPGRP:
-               SYSCALL_TRACK( pre_mem_read, tid, "ioctl(SIOCSPGRP)", arg3, 
-                                            sizeof(int) );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-
-            /* linux/soundcard interface (OSS) */
-            case SNDCTL_SEQ_GETOUTCOUNT:
-            case SNDCTL_SEQ_GETINCOUNT:
-            case SNDCTL_SEQ_PERCMODE:
-            case SNDCTL_SEQ_TESTMIDI:
-            case SNDCTL_SEQ_RESETSAMPLES:
-            case SNDCTL_SEQ_NRSYNTHS:
-            case SNDCTL_SEQ_NRMIDIS:
-            case SNDCTL_SEQ_GETTIME:
-            case SNDCTL_DSP_GETFMTS:
-            case SNDCTL_DSP_GETTRIGGER:
-            case SNDCTL_DSP_GETODELAY:
-#           if defined(SNDCTL_DSP_GETSPDIF)
-            case SNDCTL_DSP_GETSPDIF:
-#           endif
-            case SNDCTL_DSP_GETCAPS:
-            case SOUND_PCM_READ_RATE:
-            case SOUND_PCM_READ_CHANNELS:
-            case SOUND_PCM_READ_BITS:
-            case (SOUND_PCM_READ_BITS|0x40000000): /* what the fuck ? */
-            case SOUND_PCM_READ_FILTER:
-               SYSCALL_TRACK( pre_mem_write, tid,
-                                 "ioctl(SNDCTL_XXX|SOUND_XXX (SIOR, int))", 
-                                arg3,
-                                sizeof(int));
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  VG_TRACK( post_mem_write,arg3, sizeof(int));
-               break;
-            case SNDCTL_SEQ_CTRLRATE:
-            case SNDCTL_DSP_SPEED:
-            case SNDCTL_DSP_STEREO:
-            case SNDCTL_DSP_GETBLKSIZE: 
-            case SNDCTL_DSP_CHANNELS:
-            case SOUND_PCM_WRITE_FILTER:
-            case SNDCTL_DSP_SUBDIVIDE:
-            case SNDCTL_DSP_SETFRAGMENT:
-#           if defined(SNDCTL_DSP_GETCHANNELMASK)
-            case SNDCTL_DSP_GETCHANNELMASK:
-#           endif
-#           if defined(SNDCTL_DSP_BIND_CHANNEL)
-            case SNDCTL_DSP_BIND_CHANNEL:
-#           endif
-            case SNDCTL_TMR_TIMEBASE:
-            case SNDCTL_TMR_TEMPO:
-            case SNDCTL_TMR_SOURCE:
-            case SNDCTL_MIDI_PRETIME:
-            case SNDCTL_MIDI_MPUMODE:
-               SYSCALL_TRACK( pre_mem_read, tid, "ioctl(SNDCTL_XXX|SOUND_XXX "
-                                     "(SIOWR, int))", 
-                                arg3, sizeof(int));
-               SYSCALL_TRACK( pre_mem_write, tid, "ioctl(SNDCTL_XXX|SOUND_XXX "
-                                     "(SIOWR, int))", 
-                                arg3, sizeof(int));
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case SNDCTL_DSP_GETOSPACE:
-            case SNDCTL_DSP_GETISPACE:
-               SYSCALL_TRACK( pre_mem_write, tid, 
-                                "ioctl(SNDCTL_XXX|SOUND_XXX "
-                                "(SIOR, audio_buf_info))", arg3,
-                                sizeof(audio_buf_info));
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  VG_TRACK( post_mem_write,arg3, sizeof(audio_buf_info));
-               break;
-            case SNDCTL_DSP_SETTRIGGER:
-               SYSCALL_TRACK( pre_mem_read, tid, 
-                                "ioctl(SNDCTL_XXX|SOUND_XXX (SIOW, int))", 
-                                arg3, sizeof(int));
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-
-            case SNDCTL_DSP_POST:
-            case SNDCTL_DSP_RESET:
-            case SNDCTL_DSP_SYNC:
-            case SNDCTL_DSP_SETSYNCRO:
-            case SNDCTL_DSP_SETDUPLEX:
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-
-            /* Real Time Clock (/dev/rtc) ioctls */
-#           ifndef GLIBC_2_1
-            case RTC_UIE_ON:
-            case RTC_UIE_OFF:
-            case RTC_AIE_ON:
-            case RTC_AIE_OFF:
-            case RTC_PIE_ON:
-            case RTC_PIE_OFF:
-            case RTC_IRQP_SET:
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case RTC_RD_TIME:
-            case RTC_ALM_READ:
-               SYSCALL_TRACK( pre_mem_write, tid, "ioctl(RTC_RD_TIME/ALM_READ)", 
-                              arg3, sizeof(struct rtc_time));
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror) && res == 0)
-                  VG_TRACK( post_mem_write,arg3, sizeof(struct rtc_time));
-               break;
-            case RTC_ALM_SET:
-               SYSCALL_TRACK( pre_mem_read, tid, "ioctl(RTC_ALM_SET)", arg3,
-                                sizeof(struct rtc_time));
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case RTC_IRQP_READ:
-               SYSCALL_TRACK( pre_mem_write, tid, "ioctl(RTC_IRQP_READ)", arg3,
-                                sizeof(unsigned long));
-               KERNEL_DO_SYSCALL(tid,res);
-               if(!VG_(is_kerror) && res == 0)
-                   VG_TRACK( post_mem_write,arg3, sizeof(unsigned long));
-               break;
-#           endif /* GLIBC_2_1 */
-
-#           ifdef BLKGETSIZE
-            case BLKGETSIZE:
-               SYSCALL_TRACK( pre_mem_write, tid, "ioctl(BLKGETSIZE)", arg3,
-                                sizeof(unsigned long));
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  VG_TRACK( post_mem_write,arg3, sizeof(unsigned long));
-               break;
-#           endif /* BLKGETSIZE */
-
-            /* CD ROM stuff (??)  */
-            case CDROMSUBCHNL:
-                SYSCALL_TRACK( pre_mem_read, tid, 
-                   "ioctl(CDROMSUBCHNL (cdsc_format, char))",
-                   (int) &(((struct cdrom_subchnl *) arg3)->cdsc_format), 
-                   sizeof(((struct cdrom_subchnl *) arg3)->cdsc_format));
-                SYSCALL_TRACK( pre_mem_write, tid, 
-                   "ioctl(CDROMSUBCHNL)", arg3, 
-                   sizeof(struct cdrom_subchnl));
-                KERNEL_DO_SYSCALL(tid,res);
-                if (!VG_(is_kerror)(res) && res == 0)
-                   VG_TRACK( post_mem_write,arg3, sizeof(struct cdrom_subchnl));
-                break;
-            case CDROMREADTOCHDR:
-                SYSCALL_TRACK( pre_mem_write, tid, 
-                   "ioctl(CDROMREADTOCHDR)", arg3, 
-                   sizeof(struct cdrom_tochdr));
-                KERNEL_DO_SYSCALL(tid,res);
-                if (!VG_(is_kerror)(res) && res == 0)
-                   VG_TRACK( post_mem_write,arg3, sizeof(struct cdrom_tochdr));
-                break;
-            case CDROMREADTOCENTRY:
-                 SYSCALL_TRACK( pre_mem_read, tid, 
-                    "ioctl(CDROMREADTOCENTRY (cdte_format, char))",
-                    (int) &(((struct cdrom_tocentry *) arg3)->cdte_format), 
-                    sizeof(((struct cdrom_tocentry *) arg3)->cdte_format));
-                 SYSCALL_TRACK( pre_mem_read, tid, 
-                    "ioctl(CDROMREADTOCENTRY (cdte_track, char))",
-                    (int) &(((struct cdrom_tocentry *) arg3)->cdte_track), 
-                    sizeof(((struct cdrom_tocentry *) arg3)->cdte_track));
-                 SYSCALL_TRACK( pre_mem_write, tid, 
-                    "ioctl(CDROMREADTOCENTRY)", arg3, 
-                    sizeof(struct cdrom_tocentry));
-                 KERNEL_DO_SYSCALL(tid,res);
-                 if (!VG_(is_kerror)(res) && res == 0)
-                    VG_TRACK( post_mem_write,arg3, sizeof(struct cdrom_tochdr));
-                 break;
-            case CDROMPLAYMSF:
-                 SYSCALL_TRACK( pre_mem_read, tid, "ioctl(CDROMPLAYMSF)", arg3, 
-                    sizeof(struct cdrom_msf));
-                 KERNEL_DO_SYSCALL(tid,res);
-                 break;
-            /* The following two are probably bogus (should check args
-               for readability).  JRS 20021117 */
-            case CDROM_DRIVE_STATUS: /* 0x5326 */
-            case CDROM_CLEAR_OPTIONS: /* 0x5321 */
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-
-            /* We don't have any specific information on it, so
-               try to do something reasonable based on direction and
-               size bits.  The encoding scheme is described in
-               /usr/include/asm/ioctl.h.  
-
-               According to Simon Hausmann, _IOC_READ means the kernel
-               writes a value to the ioctl value passed from the user
-               space and the other way around with _IOC_WRITE. */
-            default: {
-               UInt dir  = _IOC_DIR(arg2);
-               UInt size = _IOC_SIZE(arg2);
-               if (VG_(strstr)(VG_(clo_weird_hacks), "lax-ioctls") != NULL) {
-                   /* 
-                    * Be very lax about ioctl handling; the only
-                    * assumption is that the size is correct. Doesn't
-                    * require the full buffer to be initialized when
-                    * writing.  Without this, using some device
-                    * drivers with a large number of strange ioctl
-                    * commands becomes very tiresome.
-                    */
-               } 
-               else if (/* size == 0 || */ dir == _IOC_NONE) {
-                  static Int moans = 3;
-                  if (moans > 0) {
-                     moans--;
-                     VG_(message)(Vg_UserMsg, 
-                        "Warning: noted but unhandled ioctl 0x%x"
-                        " with no size/direction hints",
-                        arg2); 
-                     VG_(message)(Vg_UserMsg, 
-                        "   This could cause spurious value errors"
-                        " to appear.");
-                     VG_(message)(Vg_UserMsg, 
-                        "   See README_MISSING_SYSCALL_OR_IOCTL for "
-                        "guidance on writing a proper wrapper." );
-                  }
-               } else {
-                  if ((dir & _IOC_WRITE) && size > 0)
-                     SYSCALL_TRACK( pre_mem_read, tid, "ioctl(generic)", 
-                                                      arg3, size);
-                  if ((dir & _IOC_READ) && size > 0)
-                     SYSCALL_TRACK( pre_mem_write, tid, "ioctl(generic)", 
-                                                       arg3, size);
-               }
-               KERNEL_DO_SYSCALL(tid,res);
-               if (size > 0 && (dir & _IOC_READ)
-                   && !VG_(is_kerror)(res) && res == 0
-                   && arg3 != (Addr)NULL)
-                  VG_TRACK( post_mem_write,arg3, size);
-               break;
-            }
-         }
-         break;
-
-      case __NR_kill: /* syscall 37 */
-         /* int kill(pid_t pid, int sig); */
-         MAYBE_PRINTF("kill ( %d, %d )\n", arg1,arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_link: /* syscall 9 */
-         /* int link(const char *oldpath, const char *newpath); */
-         MAYBE_PRINTF("link ( %p, %p)\n", arg1, arg2);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "link(oldpath)", arg1);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "link(newpath)", arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_lseek: /* syscall 19 */
-         /* off_t lseek(int fildes, off_t offset, int whence); */
-         MAYBE_PRINTF("lseek ( %d, %d, %d )\n",arg1,arg2,arg3);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR__llseek: /* syscall 140 */
-         /* int _llseek(unsigned int fd, unsigned long offset_high,       
-                        unsigned long  offset_low, 
-                        loff_t * result, unsigned int whence); */
-         MAYBE_PRINTF("llseek ( %d, 0x%x, 0x%x, %p, %d )\n",
-                        arg1,arg2,arg3,arg4,arg5);
-         SYSCALL_TRACK( pre_mem_write, tid, "llseek(result)", arg4, 
-                                       sizeof(loff_t));
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res == 0)
-            VG_TRACK( post_mem_write, arg4, sizeof(loff_t) );
-         break;
-
-      case __NR_lstat: /* syscall 107 */
-         /* int lstat(const char *file_name, struct stat *buf); */
-         MAYBE_PRINTF("lstat ( %p, %p )\n",arg1,arg2);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "lstat(file_name)", arg1 );
-         SYSCALL_TRACK( pre_mem_write, tid, "lstat(buf)", arg2, 
-                                       sizeof(struct stat) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res == 0) {
-            VG_TRACK( post_mem_write, arg2, sizeof(struct stat) );
-         }
-         break;
-
-#     if defined(__NR_lstat64)
-      case __NR_lstat64: /* syscall 196 */
-         /* int lstat64(const char *file_name, struct stat64 *buf); */
-         MAYBE_PRINTF("lstat64 ( %p, %p )\n",arg1,arg2);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "lstat64(file_name)", arg1 );
-         SYSCALL_TRACK( pre_mem_write, tid, "lstat64(buf)", arg2, 
-                                            sizeof(struct stat64) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res == 0) {
-            VG_TRACK( post_mem_write, arg2, sizeof(struct stat64) );
-         }
-         break;
-#     endif
-
-      case __NR_mkdir: /* syscall 39 */
-         /* int mkdir(const char *pathname, mode_t mode); */
-         MAYBE_PRINTF("mkdir ( %p, %d )\n", arg1,arg2);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "mkdir(pathname)", arg1 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_mmap2)
-      case __NR_mmap2: /* syscall 192 */
-         /* My impression is that this is exactly like __NR_mmap 
-            except that all 6 args are passed in regs, rather than in 
-            a memory-block. */
-         /* void* mmap(void *start, size_t length, int prot, 
-                       int flags, int fd, off_t offset); 
-         */
-         if (VG_(clo_trace_syscalls)) {
-            UInt arg6 = tst->m_ebp;
-            VG_(printf)("mmap2 ( %p, %d, %d, %d, %d, %d )\n",
-                        arg1, arg2, arg3, arg4, arg5, arg6 );
-         }
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res)) {
-            mmap_segment( (Addr)res, arg2, arg3, arg5 );
-         }
-         break;
-#     endif
-
-      case __NR_mmap: /* syscall 90 */
-         /* void* mmap(void *start, size_t length, int prot, 
-                       int flags, int fd, off_t offset); 
-         */
-         SYSCALL_TRACK( pre_mem_read, tid, "mmap(args)", arg1, 6*sizeof(UInt) );
-         {
-            UInt* arg_block = (UInt*)arg1;
-            UInt arg6;
-            arg1 = arg_block[0];
-            arg2 = arg_block[1];
-            arg3 = arg_block[2];
-            arg4 = arg_block[3];
-            arg5 = arg_block[4];
-            arg6 = arg_block[5];
-            MAYBE_PRINTF("mmap ( %p, %d, %d, %d, %d, %d )\n",
-                        arg1, arg2, arg3, arg4, arg5, arg6 );
-         }
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res)) {
-            mmap_segment( (Addr)res, arg2, arg3, arg5 );
-         }
-         break;
-
-      case __NR_mprotect: /* syscall 125 */
-         /* int mprotect(const void *addr, size_t len, int prot); */
-         /* should addr .. addr+len-1 be checked before the call? */
-         MAYBE_PRINTF("mprotect ( %p, %d, %d )\n", arg1,arg2,arg3);
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res)) {
-            mprotect_segment( arg1, arg2, arg3 );
-         }
-         break;
-
-      case __NR_munmap: /* syscall 91 */
-         /* int munmap(void *start, size_t length); */
-         /* should start .. start+length-1 be checked before the call? */
-         MAYBE_PRINTF("munmap ( %p, %d )\n", arg1,arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res)) {
-            munmap_segment( arg1, arg2 );
-         }
-         break;
-
-      case __NR_nanosleep: /* syscall 162 */
-         /* int nanosleep(const struct timespec *req, struct timespec *rem); */
-         MAYBE_PRINTF("nanosleep ( %p, %p )\n", arg1,arg2);
-         SYSCALL_TRACK( pre_mem_read, tid, "nanosleep(req)", arg1, 
-                                              sizeof(struct timespec) );
-         if (arg2 != (UInt)NULL)
-            SYSCALL_TRACK( pre_mem_write, tid, "nanosleep(rem)", arg2, 
-                               sizeof(struct timespec) );
-         KERNEL_DO_SYSCALL(tid,res);
-         /* Somewhat bogus ... is only written by the kernel if
-            res == -1 && errno == EINTR. */
-         if (!VG_(is_kerror)(res) && arg2 != (UInt)NULL)
-            VG_TRACK( post_mem_write, arg2, sizeof(struct timespec) );
-         break;
-
-      case __NR__newselect: /* syscall 142 */
-         /* int select(int n,  
-                       fd_set *readfds, fd_set *writefds, fd_set *exceptfds, 
-                       struct timeval *timeout);
-         */
-         MAYBE_PRINTF("newselect ( %d, %p, %p, %p, %p )\n",
-                        arg1,arg2,arg3,arg4,arg5);
-         if (arg2 != 0)
-            SYSCALL_TRACK( pre_mem_read, tid, "newselect(readfds)",   
-                              arg2, arg1/8 /* __FD_SETSIZE/8 */ );
-         if (arg3 != 0)
-            SYSCALL_TRACK( pre_mem_read, tid, "newselect(writefds)",  
-                              arg3, arg1/8 /* __FD_SETSIZE/8 */ );
-         if (arg4 != 0)
-            SYSCALL_TRACK( pre_mem_read, tid, "newselect(exceptfds)", 
-                              arg4, arg1/8 /* __FD_SETSIZE/8 */ );
-         if (arg5 != 0)
-            SYSCALL_TRACK( pre_mem_read, tid, "newselect(timeout)", arg5, 
-                              sizeof(struct timeval) );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-         
-      case __NR_open: /* syscall 5 */
-         /* int open(const char *pathname, int flags); */
-         MAYBE_PRINTF("open ( %p(%s), %d ) --> ",arg1,arg1,arg2);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "open(pathname)", arg1 );
-         KERNEL_DO_SYSCALL(tid,res);
-         MAYBE_PRINTF("%d\n",res);
-         break;
-
-      case __NR_creat: /* syscall 8 */
-         /* int creat(const char *pathname, mode_t mode); */
-         MAYBE_PRINTF("creat ( %p(%s), %d ) --> ",arg1,arg1,arg2);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "creat(pathname)", arg1 );
-         KERNEL_DO_SYSCALL(tid,res);
-         MAYBE_PRINTF("%d\n",res);
-         break;
-
-      case __NR_pipe: /* syscall 42 */
-         /* int pipe(int filedes[2]); */
-         MAYBE_PRINTF("pipe ( %p ) ...\n", arg1);
-         SYSCALL_TRACK( pre_mem_write, tid, "pipe(filedes)", 
-                                            arg1, 2*sizeof(int) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res))
-            VG_TRACK( post_mem_write, arg1, 2*sizeof(int) );
-         if (VG_(clo_trace_syscalls) && !VG_(is_kerror)(res))
-            VG_(printf)("SYSCALL[%d]       pipe --> (rd %d, wr %d)\n", 
-                        VG_(getpid)(), 
-                        ((UInt*)arg1)[0], ((UInt*)arg1)[1] );
-         break;
-
-      case __NR_poll: /* syscall 168 */
-         /* struct pollfd {
-               int fd;           -- file descriptor
-               short events;     -- requested events
-               short revents;    -- returned events
-            };
-           int poll(struct pollfd *ufds, unsigned int nfds, 
-                                         int timeout) 
-         */
-         MAYBE_PRINTF("poll ( %p, %d, %d )\n",arg1,arg2,arg3);
-         /* In fact some parts of this struct should be readable too.
-            This should be fixed properly. */
-         SYSCALL_TRACK( pre_mem_write, tid, "poll(ufds)", 
-                           arg1, arg2 * sizeof(struct pollfd) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res > 0) {
-            UInt i;
-            struct pollfd * arr = (struct pollfd *)arg1;
-            for (i = 0; i < arg2; i++)
-               VG_TRACK( post_mem_write, (Addr)(&arr[i].revents), 
-                                         sizeof(Short) );
-         }
-         /* For some unknown reason, %ebx sometimes gets changed by poll...
-            let the skin know (using the `post_reg_write_syscall_return'
-            event isn't ideal... 
-          
-            Update from Lennert Buytenhek <buytenh@gnu.org>: 
-            This came up a while ago on linux-kernel, search for a posting
-            by dvorak <dvorak@xs4all.nl>.
-
-            Basically, the linux syscall handler pushes the arguments to the
-            stack, and inside sys_poll (or another routine), one of the
-            parameters to the function is changed.  gcc uses the stack space
-            the parameter came in as the storage location for that variable,
-            and will sometimes spill that variable back to stack.  The linux
-            syscall handler pops all registers on return, and that is how
-            %ebx can get changed sometimes.
-          */
-         if (arg1 != tst->m_ebx) {
-            VG_TRACK( post_reg_write_syscall_return, tid, R_EBX );
-         }
-         break;
- 
-      case __NR_readlink: /* syscall 85 */
-         /* int readlink(const char *path, char *buf, size_t bufsiz); */
-         MAYBE_PRINTF("readlink ( %p, %p, %d )\n", arg1,arg2,arg3);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "readlink(path)", arg1 );
-         SYSCALL_TRACK( pre_mem_write, tid, "readlink(buf)", arg2,arg3 );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res > 0) {
-            VG_TRACK( post_mem_write, arg2, res );
-         }
-         break;
-
-      case __NR_readv: { /* syscall 145 */
-         /* int readv(int fd, const struct iovec * vector, size_t count); */
-         Int i;
-         struct iovec * vec;
-         MAYBE_PRINTF("readv ( %d, %p, %d )\n",arg1,arg2,arg3);
-         SYSCALL_TRACK( pre_mem_read, tid, "readv(vector)", 
-                           arg2, arg3 * sizeof(struct iovec) );
-         /* ToDo: don't do any of the following if the vector is invalid */
-         vec = (struct iovec *)arg2;
-         for (i = 0; i < (Int)arg3; i++)
-            SYSCALL_TRACK( pre_mem_write, tid, "readv(vector[...])",
-                              (UInt)vec[i].iov_base,vec[i].iov_len );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res > 0) {
-            /* res holds the number of bytes read. */
-            for (i = 0; i < arg3; i++) {
-               Int nReadThisBuf = vec[i].iov_len;
-               if (nReadThisBuf > res) nReadThisBuf = res;
-               VG_TRACK( post_mem_write, (UInt)vec[i].iov_base, nReadThisBuf );
-               res -= nReadThisBuf;
-               if (res < 0) VG_(core_panic)("readv: res < 0");
-            }
-         }
-         break;
-      }
-
-      case __NR_rename: /* syscall 38 */
-         /* int rename(const char *oldpath, const char *newpath); */
-         MAYBE_PRINTF("rename ( %p, %p )\n", arg1, arg2 );
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "rename(oldpath)", arg1 );
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "rename(newpath)", arg2 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_rmdir: /* syscall 40 */
-         /* int rmdir(const char *pathname); */
-         MAYBE_PRINTF("rmdir ( %p )\n", arg1);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "rmdir(pathname)", arg1 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_sched_setparam: /* syscall 154 */
-         /* int sched_setparam(pid_t pid, const struct sched_param *p); */
-         MAYBE_PRINTF("sched_setparam ( %d, %p )\n", arg1, arg2 );
-         SYSCALL_TRACK( pre_mem_read, tid, "sched_setparam(ptr)",
-                           arg2, sizeof(struct sched_param) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res))
-            VG_TRACK( post_mem_write, arg2, sizeof(struct sched_param) );
-         break;
-
-      case __NR_sched_getparam: /* syscall 155 */
-         /* int sched_getparam(pid_t pid, struct sched_param *p); */
-         MAYBE_PRINTF("sched_getparam ( %d, %p )\n", arg1, arg2 );
-         SYSCALL_TRACK( pre_mem_write, tid, "sched_getparam(ptr)",
-                           arg2, sizeof(struct sched_param) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res))
-            VG_TRACK( post_mem_write, arg2, sizeof(struct sched_param) );
-         break;
-
-      case __NR_sched_yield: /* syscall 158 */
-         /* int sched_yield(void); */
-         MAYBE_PRINTF("sched_yield ()\n" );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_select: /* syscall 82 */
-         /* struct sel_arg_struct {
-              unsigned long n;
-              fd_set *inp, *outp, *exp;
-              struct timeval *tvp;
-            };
-            int old_select(struct sel_arg_struct *arg);
-         */
-         SYSCALL_TRACK( pre_mem_read, tid, "select(args)", arg1, 5*sizeof(UInt) );
-         {
-            UInt* arg_struct = (UInt*)arg1;
-            arg1 = arg_struct[0];
-            arg2 = arg_struct[1];
-            arg3 = arg_struct[2];
-            arg4 = arg_struct[3];
-            arg5 = arg_struct[4];
-
-            MAYBE_PRINTF("select ( %d, %p, %p, %p, %p )\n", 
-                         arg1,arg2,arg3,arg4,arg5);
-            if (arg2 != (Addr)NULL)
-               SYSCALL_TRACK( pre_mem_read, tid, "select(readfds)", arg2, 
-                                          arg1/8 /* __FD_SETSIZE/8 */ );
-            if (arg3 != (Addr)NULL)
-               SYSCALL_TRACK( pre_mem_read, tid, "select(writefds)", arg3, 
-                                          arg1/8 /* __FD_SETSIZE/8 */ );
-            if (arg4 != (Addr)NULL)
-               SYSCALL_TRACK( pre_mem_read, tid, "select(exceptfds)", arg4, 
-                                          arg1/8 /* __FD_SETSIZE/8 */ );
-            if (arg5 != (Addr)NULL)
-               SYSCALL_TRACK( pre_mem_read, tid, "select(timeout)", arg5, 
-                                          sizeof(struct timeval) );
-         }
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_setitimer: /* syscall 104 */
-         /* setitimer(int which, const struct itimerval *value,
-                                 struct itimerval *ovalue); */
-         MAYBE_PRINTF("setitimer ( %d, %p, %p )\n", arg1,arg2,arg3);
-         if (arg2 != (Addr)NULL)
-            SYSCALL_TRACK( pre_mem_read, tid, "setitimer(value)", 
-                             arg2, sizeof(struct itimerval) );
-         if (arg3 != (Addr)NULL)
-            SYSCALL_TRACK( pre_mem_write, tid, "setitimer(ovalue)", 
-                             arg3, sizeof(struct itimerval));
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && arg3 != (Addr)NULL) {
-            VG_TRACK( post_mem_write,arg3, sizeof(struct itimerval));
-         }
-         break;
-
-#     if defined(__NR_setfsgid32)
-      case __NR_setfsgid32: /* syscall 216 */
-         /* int setfsgid(uid_t fsgid); */
-         MAYBE_PRINTF("setfsgid ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_setgid32)
-      case __NR_setgid32: /* syscall 214 */
-#     endif
-      case __NR_setgid: /* syscall 46 */
-         /* int setgid(gid_t gid); */
-         MAYBE_PRINTF("setgid ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_setsid: /* syscall 66 */
-         /* pid_t setsid(void); */
-         MAYBE_PRINTF("setsid ()\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_setgroups32)
-      case __NR_setgroups32: /* syscall 206 */
-#     endif
-      case __NR_setgroups: /* syscall 81 */
-         /* int setgroups(size_t size, const gid_t *list); */
-         MAYBE_PRINTF("setgroups ( %d, %p )\n", arg1, arg2);
-         if (arg1 > 0)
-            SYSCALL_TRACK( pre_mem_read, tid, "setgroups(list)", arg2, 
-                               arg1 * sizeof(gid_t) );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_setpgid: /* syscall 57 */
-         /* int setpgid(pid_t pid, pid_t pgid); */
-         MAYBE_PRINTF("setpgid ( %d, %d )\n", arg1, arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_setregid32)
-      case __NR_setregid32: /* syscall 204 */
-         /* int setregid(gid_t rgid, gid_t egid); */
-         MAYBE_PRINTF("setregid32(?) ( %d, %d )\n", arg1, arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_setresuid32)
-      case __NR_setresuid32: /* syscall 208 */
-         /* int setresuid(uid_t ruid, uid_t euid, uid_t suid); */
-         MAYBE_PRINTF("setresuid32(?) ( %d, %d, %d )\n", arg1, arg2, arg3);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_setreuid32)
-      case __NR_setreuid32: /* syscall 203 */
-#     endif
-      case __NR_setreuid: /* syscall 70 */
-         /* int setreuid(uid_t ruid, uid_t euid); */
-         MAYBE_PRINTF("setreuid ( 0x%x, 0x%x )\n", arg1, arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_setrlimit: /* syscall 75 */
-         /* int setrlimit (int resource, const struct rlimit *rlim); */
-         MAYBE_PRINTF("setrlimit ( %d, %p )\n", arg1,arg2);
-         SYSCALL_TRACK( pre_mem_read, tid, "setrlimit(rlim)", 
-                                      arg2, sizeof(struct rlimit) );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_setuid32)
-      case __NR_setuid32: /* syscall 213 */
-#     endif
-      case __NR_setuid: /* syscall 23 */
-         /* int setuid(uid_t uid); */
-         MAYBE_PRINTF("setuid ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_socketcall: /* syscall 102 */
-         /* int socketcall(int call, unsigned long *args); */
-         MAYBE_PRINTF("socketcall ( %d, %p )\n",arg1,arg2);
-         switch (arg1 /* request */) {
-
-            case SYS_SOCKETPAIR:
-               /* int socketpair(int d, int type, int protocol, int sv[2]); */
-               SYSCALL_TRACK( pre_mem_read, tid, "socketcall.socketpair(args)", 
-                                 arg2, 4*sizeof(Addr) );
-               SYSCALL_TRACK( pre_mem_write, tid, "socketcall.socketpair(sv)", 
-                                 ((UInt*)arg2)[3], 2*sizeof(int) );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res))
-                  VG_TRACK( post_mem_write, ((UInt*)arg2)[3], 2*sizeof(int) );
-               break;
-
-            case SYS_SOCKET:
-               /* int socket(int domain, int type, int protocol); */
-               SYSCALL_TRACK( pre_mem_read, tid, "socketcall.socket(args)", 
-                                 arg2, 3*sizeof(Addr) );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-
-            case SYS_BIND:
-               /* int bind(int sockfd, struct sockaddr *my_addr, 
-                           int addrlen); */
-               SYSCALL_TRACK( pre_mem_read, tid, "socketcall.bind(args)", 
-                                 arg2, 3*sizeof(Addr) );
-               pre_mem_read_sockaddr( tid, "socketcall.bind(my_addr.%s)",
-                  (struct sockaddr *) (((UInt*)arg2)[1]), ((UInt*)arg2)[2]);
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-               
-            case SYS_LISTEN:
-               /* int listen(int s, int backlog); */
-               SYSCALL_TRACK( pre_mem_read, tid, "socketcall.listen(args)", 
-                                 arg2, 2*sizeof(Addr) );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-
-            case SYS_ACCEPT: {
-               /* int accept(int s, struct sockaddr *addr, int *addrlen); */
-               SYSCALL_TRACK( pre_mem_read, tid, "socketcall.accept(args)", 
-                                 arg2, 3*sizeof(Addr) );
-               {
-               Addr addr_p     = ((UInt*)arg2)[1];
-               Addr addrlen_p  = ((UInt*)arg2)[2];
-               if (addr_p != (Addr)NULL) 
-                  buf_and_len_pre_check ( tid, addr_p, addrlen_p,
-                                          "socketcall.accept(addr)",
-                                          "socketcall.accept(addrlen_in)" );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (addr_p != (Addr)NULL) 
-                  buf_and_len_post_check ( tid, res, addr_p, addrlen_p,
-                                           "socketcall.accept(addrlen_out)" );
-               }
-               break;
-            }
-
-            case SYS_SENDTO:
-               /* int sendto(int s, const void *msg, int len, 
-                             unsigned int flags, 
-                             const struct sockaddr *to, int tolen); */
-               SYSCALL_TRACK( pre_mem_read, tid, "socketcall.sendto(args)", arg2, 
-                                 6*sizeof(Addr) );
-               SYSCALL_TRACK( pre_mem_read, tid, "socketcall.sendto(msg)",
-                                 ((UInt*)arg2)[1], /* msg */
-                                 ((UInt*)arg2)[2]  /* len */ );
-               pre_mem_read_sockaddr( tid, "socketcall.sendto(to.%s)",
-                  (struct sockaddr *) (((UInt*)arg2)[4]), ((UInt*)arg2)[5]);
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-
-            case SYS_SEND:
-               /* int send(int s, const void *msg, size_t len, int flags); */
-               SYSCALL_TRACK( pre_mem_read, tid, "socketcall.send(args)", arg2,
-                                 4*sizeof(Addr) );
-               SYSCALL_TRACK( pre_mem_read, tid, "socketcall.send(msg)",
-                                 ((UInt*)arg2)[1], /* msg */
-                                 ((UInt*)arg2)[2]  /* len */ );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-
-            case SYS_RECVFROM:
-               /* int recvfrom(int s, void *buf, int len, unsigned int flags,
-                               struct sockaddr *from, int *fromlen); */
-               SYSCALL_TRACK( pre_mem_read, tid, "socketcall.recvfrom(args)", 
-                                 arg2, 6*sizeof(Addr) );
-               {
-               Addr buf_p      = ((UInt*)arg2)[1];
-               Int  len        = ((UInt*)arg2)[2];
-               Addr from_p     = ((UInt*)arg2)[4];
-               Addr fromlen_p  = ((UInt*)arg2)[5];
-
-               SYSCALL_TRACK( pre_mem_write, tid, "socketcall.recvfrom(buf)", 
-                                             buf_p, len );
-               if (from_p != (Addr)NULL) 
-                  buf_and_len_pre_check ( tid, from_p, fromlen_p, 
-                                          "socketcall.recvfrom(from)",
-                                          "socketcall.recvfrom(fromlen_in)" );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (from_p != (Addr)NULL) 
-                  buf_and_len_post_check ( tid, res, from_p, fromlen_p,
-                                           "socketcall.recvfrom(fromlen_out)" );
-               if (!VG_(is_kerror)(res))
-                  VG_TRACK( post_mem_write, buf_p, len );
-               }
-               break;
-
-            case SYS_RECV:
-               /* int recv(int s, void *buf, int len, unsigned int flags); */
-               /* man 2 recv says:
-               The  recv call is normally used only on a connected socket
-               (see connect(2)) and is identical to recvfrom with a  NULL
-               from parameter.
-               */
-               SYSCALL_TRACK( pre_mem_read, tid, "socketcall.recv(args)", 
-                                 arg2, 4*sizeof(Addr) );
-               SYSCALL_TRACK( pre_mem_write, tid, "socketcall.recv(buf)", 
-                                 ((UInt*)arg2)[1], /* buf */
-                                 ((UInt*)arg2)[2]  /* len */ );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res >= 0 
-                                   && ((UInt*)arg2)[1] != (UInt)NULL) {
-                  VG_TRACK( post_mem_write, ((UInt*)arg2)[1], /* buf */
-                                 ((UInt*)arg2)[2]  /* len */ );
-               }
-               break;
-
-            case SYS_CONNECT:
-               /* int connect(int sockfd, 
-                              struct sockaddr *serv_addr, int addrlen ); */
-               SYSCALL_TRACK( pre_mem_read, tid, "socketcall.connect(args)", 
-                                 arg2, 3*sizeof(Addr) );
-               SYSCALL_TRACK( pre_mem_read, tid, 
-                                 "socketcall.connect(serv_addr.sa_family)",
-                                 ((UInt*)arg2)[1], /* serv_addr */
-                                 sizeof (sa_family_t));
-               pre_mem_read_sockaddr( tid,
-                  "socketcall.connect(serv_addr.%s)",
-                  (struct sockaddr *) (((UInt*)arg2)[1]), ((UInt*)arg2)[2]);
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-
-            case SYS_SETSOCKOPT:
-               /* int setsockopt(int s, int level, int optname, 
-                                 const void *optval, int optlen); */
-               SYSCALL_TRACK( pre_mem_read, tid, "socketcall.setsockopt(args)", 
-                                 arg2, 5*sizeof(Addr) );
-               SYSCALL_TRACK( pre_mem_read, tid, "socketcall.setsockopt(optval)",
-                                 ((UInt*)arg2)[3], /* optval */
-                                 ((UInt*)arg2)[4]  /* optlen */ );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-
-            case SYS_GETSOCKOPT:
-               /* int setsockopt(int s, int level, int optname, 
-                                 void *optval, socklen_t *optlen); */
-               SYSCALL_TRACK( pre_mem_read, tid, "socketcall.getsockopt(args)", 
-                                 arg2, 5*sizeof(Addr) );
-               {
-               Addr optval_p  = ((UInt*)arg2)[3];
-               Addr optlen_p  = ((UInt*)arg2)[4];
-               /* vg_assert(sizeof(socklen_t) == sizeof(UInt)); */
-               if (optval_p != (Addr)NULL) 
-                  buf_and_len_pre_check ( tid, optval_p, optlen_p,
-                                          "socketcall.getsockopt(optval)",
-                                          "socketcall.getsockopt(optlen)" );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (optval_p != (Addr)NULL) 
-                  buf_and_len_post_check ( tid, res, optval_p, optlen_p,
-                                           "socketcall.getsockopt(optlen_out)" );
-               }
-               break;
-
-            case SYS_GETSOCKNAME:
-               /* int getsockname(int s, struct sockaddr* name, int* namelen) */
-               SYSCALL_TRACK( pre_mem_read, tid, "socketcall.getsockname(args)",
-                                            arg2, 3*sizeof(Addr) );
-               {
-               Addr name_p     = ((UInt*)arg2)[1];
-               Addr namelen_p  = ((UInt*)arg2)[2];
-
-               /* Nb: name_p cannot be NULL */
-               buf_and_len_pre_check ( tid, name_p, namelen_p,
-                                       "socketcall.getsockname(name)",
-                                       "socketcall.getsockname(namelen_in)" );
-               KERNEL_DO_SYSCALL(tid,res);
-               buf_and_len_post_check ( tid, res, name_p, namelen_p,
-                                        "socketcall.getsockname(namelen_out)" );
-               }
-               break;
-
-            case SYS_GETPEERNAME:
-               /* int getpeername(int s, struct sockaddr* name, int* namelen) */
-               SYSCALL_TRACK( pre_mem_read, tid, "socketcall.getpeername(args)",
-                                            arg2, 3*sizeof(Addr) );
-               {
-               Addr name_p     = ((UInt*)arg2)[1];
-               Addr namelen_p  = ((UInt*)arg2)[2];
-
-               /* Nb: name_p cannot be NULL */
-               buf_and_len_pre_check ( tid, name_p, namelen_p,
-                                       "socketcall.getpeername(name)",
-                                       "socketcall.getpeername(namelen_in)" );
-               KERNEL_DO_SYSCALL(tid,res);
-               buf_and_len_post_check ( tid, res, name_p, namelen_p,
-                                        "socketcall.getpeername(namelen_out)" );
-               }
-               break;
-
-            case SYS_SHUTDOWN:
-               /* int shutdown(int s, int how); */
-               SYSCALL_TRACK( pre_mem_read, tid, "socketcall.shutdown(args)", 
-                                            arg2, 2*sizeof(Addr) );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-
-            case SYS_SENDMSG:
-               {
-                  /* int sendmsg(int s, const struct msghdr *msg, int flags); */
-
-                  /* this causes warnings, and I don't get why. glibc bug?
-                   * (after all it's glibc providing the arguments array)
-                  SYSCALL_TRACK( pre_mem_read, "socketcall.sendmsg(args)", 
-                                     arg2, 3*sizeof(Addr) );
-                  */
-
-                  struct msghdr *msg = (struct msghdr *)((UInt *)arg2)[ 1 ];
-                  msghdr_foreachfield ( tid, msg, pre_mem_read_sendmsg );
-
-                  KERNEL_DO_SYSCALL(tid,res);
-                  break;
-               }
-
-            case SYS_RECVMSG:
-               {
-                  /* int recvmsg(int s, struct msghdr *msg, int flags); */
-
-                  /* this causes warnings, and I don't get why. glibc bug?
-                   * (after all it's glibc providing the arguments array)
-                  SYSCALL_TRACK( pre_mem_read, "socketcall.recvmsg(args)", 
-                                     arg2, 3*sizeof(Addr) );
-                  */
-
-                  struct msghdr *msg = (struct msghdr *)((UInt *)arg2)[ 1 ];
-                  msghdr_foreachfield ( tid, msg, pre_mem_write_recvmsg );
-
-                  KERNEL_DO_SYSCALL(tid,res);
-
-                  if ( !VG_(is_kerror)( res ) )
-                     msghdr_foreachfield( tid, msg, post_mem_write_recvmsg );
-
-                  break;
-               }
-
-            default:
-               VG_(message)(Vg_DebugMsg,"FATAL: unhandled socketcall 0x%x",arg1);
-               VG_(core_panic)("... bye!\n");
-               break; /*NOTREACHED*/
-         }
-         break;
-
-      case __NR_stat: /* syscall 106 */
-         /* int stat(const char *file_name, struct stat *buf); */
-         MAYBE_PRINTF("stat ( %p, %p )\n",arg1,arg2);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "stat(file_name)", arg1 );
-         SYSCALL_TRACK( pre_mem_write, tid, "stat(buf)", 
-                                       arg2, sizeof(struct stat) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res))
-            VG_TRACK( post_mem_write, arg2, sizeof(struct stat) );
-         break;
-
-      case __NR_statfs: /* syscall 99 */
-         /* int statfs(const char *path, struct statfs *buf); */
-         MAYBE_PRINTF("statfs ( %p, %p )\n",arg1,arg2);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "statfs(path)", arg1 );
-         SYSCALL_TRACK( pre_mem_write, tid, "stat(buf)", 
-                                       arg2, sizeof(struct statfs) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res))
-            VG_TRACK( post_mem_write, arg2, sizeof(struct statfs) );
-         break;
-
-      case __NR_symlink: /* syscall 83 */
-         /* int symlink(const char *oldpath, const char *newpath); */
-         MAYBE_PRINTF("symlink ( %p, %p )\n",arg1,arg2);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "symlink(oldpath)", arg1 );
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "symlink(newpath)", arg2 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break; 
-
-#     if defined(__NR_stat64)
-      case __NR_stat64: /* syscall 195 */
-         /* int stat64(const char *file_name, struct stat64 *buf); */
-         MAYBE_PRINTF("stat64 ( %p, %p )\n",arg1,arg2);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "stat64(file_name)", arg1 );
-         SYSCALL_TRACK( pre_mem_write, tid, "stat64(buf)", 
-                                       arg2, sizeof(struct stat64) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res))
-            VG_TRACK( post_mem_write, arg2, sizeof(struct stat64) );
-         break;
-#     endif
-
-#     if defined(__NR_fstat64)
-      case __NR_fstat64: /* syscall 197 */
-         /* int fstat64(int filedes, struct stat64 *buf); */
-         MAYBE_PRINTF("fstat64 ( %d, %p )\n",arg1,arg2);
-         SYSCALL_TRACK( pre_mem_write, tid, "fstat64(buf)", 
-                                       arg2, sizeof(struct stat64) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res))
-            VG_TRACK( post_mem_write, arg2, sizeof(struct stat64) );
-         break;
-#     endif
-
-      case __NR_sysinfo: /* syscall 116 */
-         /* int sysinfo(struct sysinfo *info); */
-         MAYBE_PRINTF("sysinfo ( %p )\n",arg1);
-         SYSCALL_TRACK( pre_mem_write, tid, "sysinfo(info)", 
-                                       arg1, sizeof(struct sysinfo) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res))
-            VG_TRACK( post_mem_write, arg1, sizeof(struct sysinfo) );
-         break;
-
-      case __NR_time: /* syscall 13 */
-         /* time_t time(time_t *t); */
-         MAYBE_PRINTF("time ( %p )\n",arg1);
-         if (arg1 != (UInt)NULL) {
-            SYSCALL_TRACK( pre_mem_write, tid, "time", arg1, sizeof(time_t) );
-         }
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && arg1 != (UInt)NULL) {
-            VG_TRACK( post_mem_write, arg1, sizeof(time_t) );
-         }
-         break;
-
-      case __NR_times: /* syscall 43 */
-         /* clock_t times(struct tms *buf); */
-         MAYBE_PRINTF("times ( %p )\n",arg1);
-         SYSCALL_TRACK( pre_mem_write, tid, "times(buf)", 
-                                       arg1, sizeof(struct tms) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && arg1 != (UInt)NULL) {
-            VG_TRACK( post_mem_write, arg1, sizeof(struct tms) );
-         }
-         break;
-
-      case __NR_truncate: /* syscall 92 */
-         /* int truncate(const char *path, size_t length); */
-         MAYBE_PRINTF("truncate ( %p, %d )\n", arg1,arg2);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "truncate(path)", arg1 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_umask: /* syscall 60 */
-         /* mode_t umask(mode_t mask); */
-         MAYBE_PRINTF("umask ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_unlink: /* syscall 10 */
-         /* int unlink(const char *pathname) */
-         MAYBE_PRINTF("ulink ( %p )\n",arg1);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "unlink(pathname)", arg1 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_uname: /* syscall 122 */
-         /* int uname(struct utsname *buf); */
-         MAYBE_PRINTF("uname ( %p )\n",arg1);
-         SYSCALL_TRACK( pre_mem_write, tid, "uname(buf)", 
-                                       arg1, sizeof(struct utsname) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && arg1 != (UInt)NULL) {
-            VG_TRACK( post_mem_write, arg1, sizeof(struct utsname) );
-         }
-         break;
-
-      case __NR_utime: /* syscall 30 */
-         /* int utime(const char *filename, struct utimbuf *buf); */
-         MAYBE_PRINTF("utime ( %p, %p )\n", arg1,arg2);
-         SYSCALL_TRACK( pre_mem_read_asciiz, tid, "utime(filename)", arg1 );
-         if (arg2 != (UInt)NULL)
-            SYSCALL_TRACK( pre_mem_read, tid, "utime(buf)", arg2, 
-                                                 sizeof(struct utimbuf) );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_wait4: /* syscall 114 */
-         /* pid_t wait4(pid_t pid, int *status, int options,
-                        struct rusage *rusage) */
-         MAYBE_PRINTF("wait4 ( %d, %p, %d, %p )\n",
-                      arg1,arg2,arg3,arg4);
-         if (arg2 != (Addr)NULL)
-            SYSCALL_TRACK( pre_mem_write, tid, "wait4(status)", 
-                                          arg2, sizeof(int) );
-         if (arg4 != (Addr)NULL)
-            SYSCALL_TRACK( pre_mem_write, tid, "wait4(rusage)", arg4, 
-                              sizeof(struct rusage) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res)) {
-            if (arg2 != (Addr)NULL)
-               VG_TRACK( post_mem_write, arg2, sizeof(int) );
-            if (arg4 != (Addr)NULL)
-               VG_TRACK( post_mem_write, arg4, sizeof(struct rusage) );
-         }
-         break;
-
-      case __NR_waitpid: /* syscall 7 */
-         /* pid_t waitpid(pid_t pid, int *status, int options); */
-         
-         MAYBE_PRINTF("waitpid ( %d, %p, %d )\n",
-                        arg1,arg2,arg3);
-         if (arg2 != (Addr)NULL)
-            SYSCALL_TRACK( pre_mem_write, tid, "waitpid(status)",
-                                          arg2, sizeof(int) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res)) {
-            if (arg2 != (Addr)NULL)
-               VG_TRACK( post_mem_write, arg2, sizeof(int) );
-         }
-         break;
-
-      case __NR_writev: { /* syscall 146 */
-         /* int writev(int fd, const struct iovec * vector, size_t count); */
-         Int i;
-         struct iovec * vec;
-         MAYBE_PRINTF("writev ( %d, %p, %d )\n",arg1,arg2,arg3);
-         SYSCALL_TRACK( pre_mem_read, tid, "writev(vector)", 
-                           arg2, arg3 * sizeof(struct iovec) );
-         /* ToDo: don't do any of the following if the vector is invalid */
-         vec = (struct iovec *)arg2;
-         for (i = 0; i < (Int)arg3; i++)
-            SYSCALL_TRACK( pre_mem_read, tid, "writev(vector[...])",
-                              (UInt)vec[i].iov_base,vec[i].iov_len );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-      }
-
-#     if defined(__NR_prctl)
-      case __NR_prctl: /* syscall 172 */
-         /* int prctl(int option, unsigned long arg2, unsigned long arg3,
-                      unsigned long arg4, unsigned long arg5); */
-         MAYBE_PRINTF( "prctl ( %d, %d, %d, %d, %d )\n", arg1, arg2, arg3,
-			arg4, arg5 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-      /*-------------------------- SIGNALS --------------------------*/
-
-      /* Normally set to 1, so that Valgrind's signal-simulation machinery
-         is engaged.  Sometimes useful to disable (set to 0), for
-         debugging purposes, to make clients more deterministic. */
-#     define SIGNAL_SIMULATION 1
-
-      case __NR_sigaltstack: /* syscall 186 */
-         /* int sigaltstack(const stack_t *ss, stack_t *oss); */
-         MAYBE_PRINTF("sigaltstack ( %p, %p )\n",arg1,arg2);
-         if (arg1 != (UInt)NULL) {
-            SYSCALL_TRACK( pre_mem_read, tid, "sigaltstack(ss)", 
-                              arg1, sizeof(vki_kstack_t) );
-         }
-         if (arg2 != (UInt)NULL) {
-            SYSCALL_TRACK( pre_mem_write, tid, "sigaltstack(oss)", 
-                              arg2, sizeof(vki_kstack_t) );
-         }
-#        if SIGNAL_SIMULATION
-         VG_(do__NR_sigaltstack) (tid);
-         res = tst->m_eax;
-#        else
-         KERNEL_DO_SYSCALL(tid,res);
-#        endif
-         if (!VG_(is_kerror)(res) && res == 0 && arg2 != (UInt)NULL)
-            VG_TRACK( post_mem_write, arg2, sizeof(vki_kstack_t));
-         break;
-
-      case __NR_rt_sigaction: /* syscall 174 */
-      case __NR_sigaction:    /* syscall 67  */
-         /* int sigaction(int signum, struct k_sigaction *act, 
-                                      struct k_sigaction *oldact); */
-         MAYBE_PRINTF("sigaction ( %d, %p, %p )\n",arg1,arg2,arg3);
-         if (arg2 != (UInt)NULL)
-            SYSCALL_TRACK( pre_mem_read, tid, "sigaction(act)", 
-                              arg2, sizeof(vki_ksigaction));
-         if (arg3 != (UInt)NULL)
-            SYSCALL_TRACK( pre_mem_write, tid, "sigaction(oldact)", 
-                              arg3, sizeof(vki_ksigaction));
-         /* We do this one ourselves! */
-#        if SIGNAL_SIMULATION
-         VG_(do__NR_sigaction)(tid);
-         res = tst->m_eax;
-#        else
-         /* debugging signals; when we don't handle them. */
-         KERNEL_DO_SYSCALL(tid,res);
-#        endif
-         if (!VG_(is_kerror)(res) && res == 0 && arg3 != (UInt)NULL)
-            VG_TRACK( post_mem_write, arg3, sizeof(vki_ksigaction));
-         break;
-
-      case __NR_rt_sigprocmask: /* syscall 175 */
-      case __NR_sigprocmask:    /* syscall 126 */
-         /* int sigprocmask(int how, k_sigset_t *set, 
-                                     k_sigset_t *oldset); */
-         MAYBE_PRINTF("sigprocmask ( %d, %p, %p )\n",arg1,arg2,arg3);
-         if (arg2 != (UInt)NULL)
-            SYSCALL_TRACK( pre_mem_read, tid, "sigprocmask(set)", 
-                              arg2, sizeof(vki_ksigset_t));
-         if (arg3 != (UInt)NULL)
-            SYSCALL_TRACK( pre_mem_write, tid, "sigprocmask(oldset)", 
-                              arg3, sizeof(vki_ksigset_t));
-#        if SIGNAL_SIMULATION
-         VG_(do__NR_sigprocmask) ( tid, 
-                                   arg1 /*how*/, 
-                                   (vki_ksigset_t*) arg2,
-                                   (vki_ksigset_t*) arg3 );
-         res = tst->m_eax;
-#        else
-         KERNEL_DO_SYSCALL(tid,res);
-#        endif
-         if (!VG_(is_kerror)(res) && res == 0 && arg3 != (UInt)NULL)
-            VG_TRACK( post_mem_write, arg3, sizeof(vki_ksigset_t));
-         break;
-      case __NR_sigpending: /* syscall 73 */
-#     if defined(__NR_rt_sigpending)
-      case __NR_rt_sigpending: /* syscall 176 */
-#     endif
-         /* int sigpending( sigset_t *set ) ; */
-         MAYBE_PRINTF( "sigpending ( %p )\n", arg1 );
-         SYSCALL_TRACK( pre_mem_write, tid, "sigpending(set)", 
-                           arg1, sizeof(vki_ksigset_t));
-#        if SIGNAL_SIMULATION
-         VG_(do_sigpending)( tid, (vki_ksigset_t*)arg1 );
-         res = 0;
-	 SET_SYSCALL_RETVAL(tid, res);
-#        else
-         KERNEL_DO_SYSCALL(tid, res);
-#        endif
-         if ( !VG_( is_kerror )( res ) && res == 0 )
-            VG_TRACK( post_mem_write, arg1, sizeof( vki_ksigset_t ) ) ;
-         break ;
-
-      default:
-         VG_(message)
-            (Vg_DebugMsg,"FATAL: unhandled syscall: %d",syscallno);
-         VG_(message)
-            (Vg_DebugMsg,"Do not panic.  You may be able to fix this easily.");
-         VG_(message)
-            (Vg_DebugMsg,"Read the file README_MISSING_SYSCALL_OR_IOCTL.");
-         VG_(unimplemented)("no wrapper for the above system call");
-         vg_assert(3+3 == 7);
-         break; /*NOTREACHED*/
+   if (syscallno < MAX_SPECIAL_SYS && special_sys[syscallno].before != NULL) {
+      sys = &special_sys[syscallno];
+      special = True;
+   } else if (syscallno < MAX_SYS_INFO && sys_info[syscallno].before != NULL) {
+      sys = &sys_info[syscallno];
+   } else {
+      sys = &bad_sys;
+      special = True;
    }
 
-   /* { void zzzmemscan(void); zzzmemscan(); } */
+   /* Do any pre-syscall actions */
+   if (VG_(needs).syscall_wrapper) {
+      VGP_PUSHCC(VgpSkinSysWrap);
+      tst->sys_pre_res = SK_(pre_syscall)(tid, syscallno, /*isBlocking*/sys->may_block);
+      VGP_POPCC(VgpSkinSysWrap);
+   }
+
+   MAYBE_PRINTF("SYSCALL[%d,%d](%3d)%s%s:", 
+		VG_(getpid)(), tid, syscallno, 
+		special ? " special" : "",
+		sys->may_block ? " blocking" : "");
+
+   if (special) {
+      /* "Special" syscalls are implemented by Valgrind internally,
+	 and do not generate real kernel calls.  The expectation,
+	 therefore, is that the "before" function not only does the
+	 appropriate tests, but also performs the syscall itself and
+	 sets the result.  Special syscalls cannot block. */
+      vg_assert(sys->may_block == False);
+
+      (sys->before)(tst->tid, tst);
+
+      syscall_done = True;
+   } else {
+      (sys->before)(tst->tid, tst);
+
+      if ((Int)tst->m_eax <= 0) {
+	 /* "before" decided the syscall wasn't viable, so don't do
+	    anything - just pretend the syscall happened. */
+	 syscall_done = True;
+      } else if (sys->may_block) {
+	 /* issue to worker */
+	 VG_(sys_issue)(tid);
+      } else {
+	 /* run the syscall directly */
+	 tst->m_eax = VG_(do_syscall)(syscallno, 
+				      tst->m_ebx,
+				      tst->m_ecx, 
+				      tst->m_edx,
+				      tst->m_esi,
+				      tst->m_edi,
+				      tst->m_ebp);
+	 syscall_done = True;
+      }
+   }
+
+   VGP_POPCC(VgpCoreSysWrap);
+
+   return syscall_done;
+}
+
+
+void VG_(post_syscall) ( ThreadId tid )
+{
+   ThreadState* tst;
+   UInt syscallno;
+   const struct sys_info *sys;
+   Bool special = False;
+   void *pre_res;
+
+   VGP_PUSHCC(VgpCoreSysWrap);
+
+   tst = VG_(get_ThreadState)(tid);
+
+   /* Tell the skin about the syscall return value */
+   SET_SYSCALL_RETVAL(tst->tid, tst->m_eax);
+
+   syscallno = tst->syscallno;
+   pre_res = tst->sys_pre_res;
+
+   vg_assert(syscallno != -1);			/* must be a current syscall */
+   vg_assert(tst->status == VgTs_WaitSys);	/* should be blocked waiting */
+
+   if (syscallno < MAX_SPECIAL_SYS && special_sys[syscallno].before != NULL) {
+      sys = &special_sys[syscallno];
+      special = True;
+   } else if (syscallno < MAX_SYS_INFO && sys_info[syscallno].before != NULL) {
+      sys = &sys_info[syscallno];
+   } else {
+      sys = &bad_sys;
+      special = True;
+   }
+   
+   if (!VG_(is_kerror)(tst->m_eax) && sys->after != NULL)
+      (sys->after)(tst->tid, tst);
 
    /* Do any post-syscall actions */
    if (VG_(needs).syscall_wrapper) {
       VGP_PUSHCC(VgpSkinSysWrap);
-      SK_(post_syscall)(tid, syscallno, pre_res, res, /*isBlocking*/False);
+      SK_(post_syscall)(tid, syscallno, pre_res, tst->m_eax, /*isBlocking*/True); // did block
       VGP_POPCC(VgpSkinSysWrap);
    }
 
-   VGP_POPCC(VgpCoreSysWrap);
-}
-
-
-
-/* Perform pre-actions for a blocking syscall, but do not do the
-   syscall itself.
-
-   Because %eax is used both for the syscall number before the call
-   and the result value afterwards, we can't reliably use it to get
-   the syscall number.  So the caller has to pass it explicitly.  
-*/
-void* VG_(pre_known_blocking_syscall) ( ThreadId tid, Int syscallno )
-{
-   ThreadState* tst;
-   UInt         arg1, arg2, arg3;
-   void*        pre_res = 0;
-
-   VGP_PUSHCC(VgpCoreSysWrap);
-
-   vg_assert(VG_(is_valid_tid)(tid));
-   tst              = & VG_(threads)[tid];
-   arg1             = tst->m_ebx;
-   arg2             = tst->m_ecx;
-   arg3             = tst->m_edx;
-   /*
-   arg4             = tst->m_esi;
-   arg5             = tst->m_edi;
-   */
-
-   if (VG_(needs).syscall_wrapper) {
-      VGP_PUSHCC(VgpSkinSysWrap);
-      pre_res = SK_(pre_syscall)(tid, syscallno, /*isBlocking*/True);
-      VGP_POPCC(VgpSkinSysWrap);
-   }
-
-   switch (syscallno) {
-
-      case __NR_read: /* syscall 3 */
-         /* size_t read(int fd, void *buf, size_t count); */
-         MAYBE_PRINTF(
-               "SYSCALL--PRE[%d,%d]       read ( %d, %p, %d )\n", 
-               VG_(getpid)(), tid,
-               arg1, arg2, arg3);
-         SYSCALL_TRACK( pre_mem_write, tid, "read(buf)", arg2, arg3 );
-         break;
-
-      case __NR_write: /* syscall 4 */
-         /* size_t write(int fd, const void *buf, size_t count); */
-         MAYBE_PRINTF(
-               "SYSCALL--PRE[%d,%d]       write ( %d, %p, %d )\n", 
-               VG_(getpid)(), tid,
-               arg1, arg2, arg3);
-         SYSCALL_TRACK( pre_mem_read, tid, "write(buf)", arg2, arg3 );
-         break;
-
-      default:
-         VG_(printf)("pre_known_blocking_syscall: unexpected %d\n", syscallno);
-         VG_(core_panic)("pre_known_blocking_syscall");
-         /*NOTREACHED*/
-         break;
-   }
-   VGP_POPCC(VgpCoreSysWrap);
-
-   return pre_res;      /* 0 if SK_(pre_syscall)() not called */
-}
-
-
-/* Perform post-actions for a blocking syscall, but do not do the
-   syscall itself.  
-
-   Because %eax is used both for the syscall number before the call
-   and the result value afterwards, we can't reliably use it to get
-   the syscall number.  So the caller has to pass it explicitly.  
-*/
-void VG_(post_known_blocking_syscall) ( ThreadId tid,
-                                        Int syscallno,
-                                        void* pre_res,
-                                        Int res )
-{
-   ThreadState* tst;
-   UInt         arg1, arg2, arg3;
-
-   VGP_PUSHCC(VgpCoreSysWrap);
-
-   vg_assert(VG_(is_valid_tid)(tid));
-   tst              = & VG_(threads)[tid];
-   arg1             = tst->m_ebx;
-   arg2             = tst->m_ecx;
-   arg3             = tst->m_edx;
-   /*
-   arg4             = tst->m_esi;
-   arg5             = tst->m_edi;
-   */
-
-   switch (syscallno) {
-
-      case __NR_read: /* syscall 3 */
-         /* size_t read(int fd, void *buf, size_t count); */
-         MAYBE_PRINTF(
-               "SYSCALL-POST[%d,%d]       read ( %d, %p, %d ) --> %d\n", 
-               VG_(getpid)(), tid,
-               arg1, arg2, arg3, res);
-         if (!VG_(is_kerror)(res) && res > 0)
-            VG_TRACK( post_mem_write, arg2, res );
-         break;
-
-      case __NR_write: /* syscall 4 */
-         /* size_t write(int fd, const void *buf, size_t count); */
-         MAYBE_PRINTF(
-               "SYSCALL-POST[%d,%d]       write ( %d, %p, %d ) --> %d\n", 
-               VG_(getpid)(), tid,
-               arg1, arg2, arg3, res);
-         break;
-
-      default:
-         VG_(printf)("post_known_blocking_syscall: unexpected %d\n", 
-                     syscallno);
-         VG_(core_panic)("post_known_blocking_syscall");
-         /*NOTREACHED*/
-         break;
-   }
-
-   if (VG_(needs).syscall_wrapper) {
-      VGP_PUSHCC(VgpSkinSysWrap);
-      SK_(post_syscall)(tid, syscallno, pre_res, res, /*isBlocking*/True);
-      VGP_POPCC(VgpSkinSysWrap);
-   }
+   tst->status = VgTs_Runnable;	/* runnable again */
+   tst->syscallno = -1;
 
    VGP_POPCC(VgpCoreSysWrap);
 }
diff --git a/coregrind/vg_translate.c b/coregrind/vg_translate.c
index da39131..bb63816 100644
--- a/coregrind/vg_translate.c
+++ b/coregrind/vg_translate.c
@@ -2377,6 +2377,18 @@
    if (!debugging_translation)
       VG_TRACK( pre_mem_read, Vg_CoreTranslate, tid, "", orig_addr, 1 );
 
+   if (!VG_(is_addressable)(orig_addr, 1)) {
+      /* Code address is bad - deliver a signal instead */
+      vki_ksiginfo_t info;
+
+      info.si_signo = VKI_SIGSEGV;
+      info.si_code = 1;		/* address not mapped to object */
+      info._sifields._sigfault._addr = orig_addr;
+
+      VG_(deliver_signal)(tid, &info, False);
+      return;
+   }
+
    cb = VG_(alloc_UCodeBlock)();
    cb->orig_eip = orig_addr;
 
diff --git a/coregrind/vg_unistd.h b/coregrind/vg_unistd.h
new file mode 100644
index 0000000..29e5b4e
--- /dev/null
+++ b/coregrind/vg_unistd.h
@@ -0,0 +1,285 @@
+#ifndef _VG_ASM_I386_UNISTD_H_
+#define _VG_ASM_I386_UNISTD_H_
+/* Taken from Linux 2.6.0-test1 include/asm-i386/unistd.h */
+
+/*
+ * This file contains the system call numbers.
+ */
+
+#define __NR_restart_syscall      0
+#define __NR_exit		  1
+#define __NR_fork		  2
+#define __NR_read		  3
+#define __NR_write		  4
+#define __NR_open		  5
+#define __NR_close		  6
+#define __NR_waitpid		  7
+#define __NR_creat		  8
+#define __NR_link		  9
+#define __NR_unlink		 10
+#define __NR_execve		 11
+#define __NR_chdir		 12
+#define __NR_time		 13
+#define __NR_mknod		 14
+#define __NR_chmod		 15
+#define __NR_lchown		 16
+#define __NR_break		 17
+#define __NR_oldstat		 18
+#define __NR_lseek		 19
+#define __NR_getpid		 20
+#define __NR_mount		 21
+#define __NR_umount		 22
+#define __NR_setuid		 23
+#define __NR_getuid		 24
+#define __NR_stime		 25
+#define __NR_ptrace		 26
+#define __NR_alarm		 27
+#define __NR_oldfstat		 28
+#define __NR_pause		 29
+#define __NR_utime		 30
+#define __NR_stty		 31
+#define __NR_gtty		 32
+#define __NR_access		 33
+#define __NR_nice		 34
+#define __NR_ftime		 35
+#define __NR_sync		 36
+#define __NR_kill		 37
+#define __NR_rename		 38
+#define __NR_mkdir		 39
+#define __NR_rmdir		 40
+#define __NR_dup		 41
+#define __NR_pipe		 42
+#define __NR_times		 43
+#define __NR_prof		 44
+#define __NR_brk		 45
+#define __NR_setgid		 46
+#define __NR_getgid		 47
+#define __NR_signal		 48
+#define __NR_geteuid		 49
+#define __NR_getegid		 50
+#define __NR_acct		 51
+#define __NR_umount2		 52
+#define __NR_lock		 53
+#define __NR_ioctl		 54
+#define __NR_fcntl		 55
+#define __NR_mpx		 56
+#define __NR_setpgid		 57
+#define __NR_ulimit		 58
+#define __NR_oldolduname	 59
+#define __NR_umask		 60
+#define __NR_chroot		 61
+#define __NR_ustat		 62
+#define __NR_dup2		 63
+#define __NR_getppid		 64
+#define __NR_getpgrp		 65
+#define __NR_setsid		 66
+#define __NR_sigaction		 67
+#define __NR_sgetmask		 68
+#define __NR_ssetmask		 69
+#define __NR_setreuid		 70
+#define __NR_setregid		 71
+#define __NR_sigsuspend		 72
+#define __NR_sigpending		 73
+#define __NR_sethostname	 74
+#define __NR_setrlimit		 75
+#define __NR_getrlimit		 76	/* Back compatible 2Gig limited rlimit */
+#define __NR_getrusage		 77
+#define __NR_gettimeofday	 78
+#define __NR_settimeofday	 79
+#define __NR_getgroups		 80
+#define __NR_setgroups		 81
+#define __NR_select		 82
+#define __NR_symlink		 83
+#define __NR_oldlstat		 84
+#define __NR_readlink		 85
+#define __NR_uselib		 86
+#define __NR_swapon		 87
+#define __NR_reboot		 88
+#define __NR_readdir		 89
+#define __NR_mmap		 90
+#define __NR_munmap		 91
+#define __NR_truncate		 92
+#define __NR_ftruncate		 93
+#define __NR_fchmod		 94
+#define __NR_fchown		 95
+#define __NR_getpriority	 96
+#define __NR_setpriority	 97
+#define __NR_profil		 98
+#define __NR_statfs		 99
+#define __NR_fstatfs		100
+#define __NR_ioperm		101
+#define __NR_socketcall		102
+#define __NR_syslog		103
+#define __NR_setitimer		104
+#define __NR_getitimer		105
+#define __NR_stat		106
+#define __NR_lstat		107
+#define __NR_fstat		108
+#define __NR_olduname		109
+#define __NR_iopl		110
+#define __NR_vhangup		111
+#define __NR_idle		112
+#define __NR_vm86old		113
+#define __NR_wait4		114
+#define __NR_swapoff		115
+#define __NR_sysinfo		116
+#define __NR_ipc		117
+#define __NR_fsync		118
+#define __NR_sigreturn		119
+#define __NR_clone		120
+#define __NR_setdomainname	121
+#define __NR_uname		122
+#define __NR_modify_ldt		123
+#define __NR_adjtimex		124
+#define __NR_mprotect		125
+#define __NR_sigprocmask	126
+#define __NR_create_module	127
+#define __NR_init_module	128
+#define __NR_delete_module	129
+#define __NR_get_kernel_syms	130
+#define __NR_quotactl		131
+#define __NR_getpgid		132
+#define __NR_fchdir		133
+#define __NR_bdflush		134
+#define __NR_sysfs		135
+#define __NR_personality	136
+#define __NR_afs_syscall	137 /* Syscall for Andrew File System */
+#define __NR_setfsuid		138
+#define __NR_setfsgid		139
+#define __NR__llseek		140
+#define __NR_getdents		141
+#define __NR__newselect		142
+#define __NR_flock		143
+#define __NR_msync		144
+#define __NR_readv		145
+#define __NR_writev		146
+#define __NR_getsid		147
+#define __NR_fdatasync		148
+#define __NR__sysctl		149
+#define __NR_mlock		150
+#define __NR_munlock		151
+#define __NR_mlockall		152
+#define __NR_munlockall		153
+#define __NR_sched_setparam		154
+#define __NR_sched_getparam		155
+#define __NR_sched_setscheduler		156
+#define __NR_sched_getscheduler		157
+#define __NR_sched_yield		158
+#define __NR_sched_get_priority_max	159
+#define __NR_sched_get_priority_min	160
+#define __NR_sched_rr_get_interval	161
+#define __NR_nanosleep		162
+#define __NR_mremap		163
+#define __NR_setresuid		164
+#define __NR_getresuid		165
+#define __NR_vm86		166
+#define __NR_query_module	167
+#define __NR_poll		168
+#define __NR_nfsservctl		169
+#define __NR_setresgid		170
+#define __NR_getresgid		171
+#define __NR_prctl              172
+#define __NR_rt_sigreturn	173
+#define __NR_rt_sigaction	174
+#define __NR_rt_sigprocmask	175
+#define __NR_rt_sigpending	176
+#define __NR_rt_sigtimedwait	177
+#define __NR_rt_sigqueueinfo	178
+#define __NR_rt_sigsuspend	179
+#define __NR_pread64		180
+#define __NR_pwrite64		181
+#define __NR_chown		182
+#define __NR_getcwd		183
+#define __NR_capget		184
+#define __NR_capset		185
+#define __NR_sigaltstack	186
+#define __NR_sendfile		187
+#define __NR_getpmsg		188	/* some people actually want streams */
+#define __NR_putpmsg		189	/* some people actually want streams */
+#define __NR_vfork		190
+#define __NR_ugetrlimit		191	/* SuS compliant getrlimit */
+#define __NR_mmap2		192
+#define __NR_truncate64		193
+#define __NR_ftruncate64	194
+#define __NR_stat64		195
+#define __NR_lstat64		196
+#define __NR_fstat64		197
+#define __NR_lchown32		198
+#define __NR_getuid32		199
+#define __NR_getgid32		200
+#define __NR_geteuid32		201
+#define __NR_getegid32		202
+#define __NR_setreuid32		203
+#define __NR_setregid32		204
+#define __NR_getgroups32	205
+#define __NR_setgroups32	206
+#define __NR_fchown32		207
+#define __NR_setresuid32	208
+#define __NR_getresuid32	209
+#define __NR_setresgid32	210
+#define __NR_getresgid32	211
+#define __NR_chown32		212
+#define __NR_setuid32		213
+#define __NR_setgid32		214
+#define __NR_setfsuid32		215
+#define __NR_setfsgid32		216
+#define __NR_pivot_root		217
+#define __NR_mincore		218
+#define __NR_madvise		219
+#define __NR_madvise1		219	/* delete when C lib stub is removed */
+#define __NR_getdents64		220
+#define __NR_fcntl64		221
+/* 223 is unused */
+#define __NR_gettid		224
+#define __NR_readahead		225
+#define __NR_setxattr		226
+#define __NR_lsetxattr		227
+#define __NR_fsetxattr		228
+#define __NR_getxattr		229
+#define __NR_lgetxattr		230
+#define __NR_fgetxattr		231
+#define __NR_listxattr		232
+#define __NR_llistxattr		233
+#define __NR_flistxattr		234
+#define __NR_removexattr	235
+#define __NR_lremovexattr	236
+#define __NR_fremovexattr	237
+#define __NR_tkill		238
+#define __NR_sendfile64		239
+#define __NR_futex		240
+#define __NR_sched_setaffinity	241
+#define __NR_sched_getaffinity	242
+#define __NR_set_thread_area	243
+#define __NR_get_thread_area	244
+#define __NR_io_setup		245
+#define __NR_io_destroy		246
+#define __NR_io_getevents	247
+#define __NR_io_submit		248
+#define __NR_io_cancel		249
+#define __NR_fadvise64		250
+
+#define __NR_exit_group		252
+#define __NR_lookup_dcookie	253
+#define __NR_epoll_create	254
+#define __NR_epoll_ctl		255
+#define __NR_epoll_wait		256
+#define __NR_remap_file_pages	257
+#define __NR_set_tid_address	258
+#define __NR_timer_create	259
+#define __NR_timer_settime	(__NR_timer_create+1)
+#define __NR_timer_gettime	(__NR_timer_create+2)
+#define __NR_timer_getoverrun	(__NR_timer_create+3)
+#define __NR_timer_delete	(__NR_timer_create+4)
+#define __NR_clock_settime	(__NR_timer_create+5)
+#define __NR_clock_gettime	(__NR_timer_create+6)
+#define __NR_clock_getres	(__NR_timer_create+7)
+#define __NR_clock_nanosleep	(__NR_timer_create+8)
+#define __NR_statfs64		268
+#define __NR_fstatfs64		269
+#define __NR_tgkill		270
+#define __NR_utimes		271
+#define __NR_mknod64		272
+
+#define NR_syscalls 273
+
+#endif /* _VG_ASM_I386_UNISTD_H_ */
diff --git a/coregrind/vg_unsafe.h b/coregrind/vg_unsafe.h
index a0e5937..76afb1c 100644
--- a/coregrind/vg_unsafe.h
+++ b/coregrind/vg_unsafe.h
@@ -33,7 +33,7 @@
 
 /* These includes are only used for making sense of the args for
    system calls. */
-#include <asm/unistd.h>   /* for system call numbers */
+#include "vg_unistd.h"    /* for system call numbers */
 #include <sys/mman.h>     /* for PROT_* */
 #include <sys/utsname.h>  /* for uname */
 #include <sys/time.h>     /* for struct timeval & struct timezone */
@@ -57,7 +57,7 @@
 #include <linux/cdrom.h>  /* for cd-rom ioctls */
 #include <sys/user.h>     /* for struct user_regs_struct et al */
 #include <signal.h>       /* for siginfo_t */
-#include <sys/timex.h>    /* for struct timex */
+#include <linux/timex.h>  /* for adjtimex */
 
 #define __USE_LARGEFILE64
 #include <sys/stat.h>     /* for struct stat */