DO NOT UPDATE!  COMPILES BUT DOESN'T WORK.

Major overhaul to the way thread startup and exit is done.  Removes some
ugly gunk in the scheduler, and adds support for thread detaching and
cancellation.


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@326 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/coregrind/arch/x86-linux/vg_libpthread.c b/coregrind/arch/x86-linux/vg_libpthread.c
index 8d30fcf..5211106 100644
--- a/coregrind/arch/x86-linux/vg_libpthread.c
+++ b/coregrind/arch/x86-linux/vg_libpthread.c
@@ -268,6 +268,92 @@
    return 0;
 }
 
+/* --------------------------------------------------- 
+   Helper functions for running a thread 
+   and for clearing up afterwards.
+   ------------------------------------------------ */
+
+/* All exiting threads eventually pass through here, bearing the
+   return value, or PTHREAD_CANCELED, in ret_val. */
+static
+__attribute__((noreturn))
+void thread_exit_wrapper ( void* ret_val )
+{
+   int detached, res;
+   /* Run this thread's cleanup handlers. */
+   /* Run this thread's key finalizers. */
+
+   /* Decide on my final disposition. */
+   VALGRIND_MAGIC_SEQUENCE(detached, (-1) /* default */,
+                           VG_USERREQ__SET_OR_GET_DETACH, 
+                           2 /* get */, 0, 0, 0);
+   assert(detached == 0 || detached == 1);
+
+   if (detached) {
+      /* Detached; I just quit right now. */
+      VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
+                              VG_USERREQ__QUIT, 0, 0, 0, 0);
+   } else {
+      /* Not detached; so I wait for a joiner. */
+      VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
+                              VG_USERREQ__WAIT_JOINER, ret_val, 0, 0, 0);
+   }
+   /* NOTREACHED */
+   barf("thread_exit_wrapper: still alive?!");
+}
+
+
+/* This function is a wrapper function for running a thread.  It runs
+   the root function specified in pthread_create, and then, should the
+   root function return a value, it arranges to run the thread's
+   cleanup handlers and exit correctly. */
+
+/* Struct used to convey info from pthread_create to
+   thread_wrapper. */
+typedef
+   struct {
+      pthread_attr_t* attr;
+      void* (*root_fn) ( void* );
+      void* arg;
+   }
+   NewThreadInfo;
+
+
+/* This is passed to the VG_USERREQ__APPLY_IN_NEW_THREAD and so must
+   not return.  Note that this runs in the new thread, not the
+   parent. */
+static
+__attribute__((noreturn))
+void thread_wrapper ( NewThreadInfo* info )
+{
+   int res;
+   pthread_attr_t* attr;
+   void* (*root_fn) ( void* );
+   void* arg;
+   void* ret_val;
+
+   attr    = info->attr;
+   root_fn = info->root_fn;
+   arg     = info->arg;
+
+   if (attr)
+      kludged("pthread_create -- ignoring attributes");
+
+   /* Free up the arg block that pthread_create malloced. */
+   VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */,
+                           VG_USERREQ__FREE, info, 0, 0, 0);
+   assert(res == 0);
+
+   /* The root function might not return.  But if it does we simply
+      move along to thread_exit_wrapper.  All other ways out for the
+      thread (cancellation, or calling pthread_exit) lead there
+      too. */
+   ret_val = root_fn(arg);
+   thread_exit_wrapper(ret_val);
+   /* NOTREACHED */
+}
+
+
 /* ---------------------------------------------------
    THREADs
    ------------------------------------------------ */
@@ -289,20 +375,38 @@
 }
 
 
+/* Bundle up the args into a malloc'd block and create a new thread
+   consisting of thread_wrapper() applied to said malloc'd block. */
 int
 pthread_create (pthread_t *__restrict __thread,
                 __const pthread_attr_t *__restrict __attr,
                 void *(*__start_routine) (void *),
                 void *__restrict __arg)
 {
-   int res;
-   ensure_valgrind("pthread_create");
-   VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
-                           VG_USERREQ__PTHREAD_CREATE,
-                           __thread, __attr, __start_routine, __arg);
-   return res;
-}
+   int            tid_child;
+   NewThreadInfo* info;
 
+   ensure_valgrind("pthread_create");
+
+   /* Allocate space for the arg block.  thread_wrapper will free
+      it. */
+   VALGRIND_MAGIC_SEQUENCE(info, NULL /* default */,
+                           VG_USERREQ__MALLOC, 
+                           sizeof(NewThreadInfo), 0, 0, 0);
+   assert(info != NULL);
+
+   info->attr    = (pthread_attr_t*)__attr;
+   info->root_fn = __start_routine;
+   info->arg     = __arg;
+   VALGRIND_MAGIC_SEQUENCE(tid_child, VG_INVALID_THREADID /* default */,
+                           VG_USERREQ__APPLY_IN_NEW_THREAD,
+                           &thread_wrapper, info, 0, 0);
+   assert(tid_child != VG_INVALID_THREADID);
+
+   if (__thread)
+      *__thread = tid_child;
+   return 0; /* success */
+}
 
 
 int 
@@ -319,14 +423,9 @@
 
 void pthread_exit(void *retval)
 {
-   int res;
    ensure_valgrind("pthread_exit");
-   VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
-                           VG_USERREQ__PTHREAD_EXIT,
-                           retval, 0, 0, 0);
-   /* Doesn't return! */
-   /* However, we have to fool gcc into knowing that. */
-   barf("pthread_exit: still alive after request?!");
+   /* Simple! */
+   thread_exit_wrapper(retval);
 }
 
 
@@ -345,9 +444,12 @@
 
 int pthread_detach(pthread_t th)
 {
-   static int moans = N_MOANS;
-   if (moans-- > 0) 
-      ignored("pthread_detach");
+   int res;
+   ensure_valgrind("pthread_detach");
+   VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */,
+                           VG_USERREQ__SET_OR_GET_DETACH,
+                           1 /* set */, 0, 0, 0);
+   assert(res == 0);
    return 0;
 }
 
@@ -601,17 +703,37 @@
 
 int pthread_setcancelstate(int state, int *oldstate)
 {
-   static int moans = N_MOANS;
-   if (moans-- > 0) 
-      ignored("pthread_setcancelstate");
+   int res;
+   ensure_valgrind("pthread_setcancelstate");
+   if (state != PTHREAD_CANCEL_ENABLE
+       && state != PTHREAD_CANCEL_DISABLE) 
+      return EINVAL;
+   assert(-1 != PTHREAD_CANCEL_ENABLE);
+   assert(-1 != PTHREAD_CANCEL_DISABLE);
+   VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */,
+                           VG_USERREQ__SET_CANCELSTATE,
+                           state, 0, 0, 0);
+   assert(res != -1);
+   if (oldstate) 
+      *oldstate = res;
    return 0;
 }
 
 int pthread_setcanceltype(int type, int *oldtype)
 {
-   static int moans = N_MOANS;
-   if (moans-- > 0) 
-      ignored("pthread_setcanceltype");
+   int res;
+   ensure_valgrind("pthread_setcanceltype");
+   if (type != PTHREAD_CANCEL_DEFERRED
+       && type != PTHREAD_CANCEL_ASYNCHRONOUS) 
+      return EINVAL;
+   assert(-1 != PTHREAD_CANCEL_DEFERRED);
+   assert(-1 != PTHREAD_CANCEL_ASYNCHRONOUS);
+   VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */,
+                           VG_USERREQ__SET_CANCELTYPE,
+                           type, 0, 0, 0);
+   assert(res != -1);
+   if (oldtype) 
+      *oldtype = res;
    return 0;
 }
 
@@ -619,16 +741,24 @@
 {
    int res;
    ensure_valgrind("pthread_cancel");
-   VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
-                           VG_USERREQ__PTHREAD_CANCEL,
-                           thread, 0, 0, 0);
+   VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */,
+                           VG_USERREQ__SET_CANCELPEND,
+                           thread, &thread_exit_wrapper, 0, 0);
+   assert(res != -1);
    return res;
 }
 
+__inline__
 void pthread_testcancel(void)
 {
+   int res;
+   VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */,
+                           VG_USERREQ__TESTCANCEL,
+                           0, 0, 0, 0);
+   assert(res == 0);
 }
 
+
 /*-------------------*/
 static pthread_mutex_t massacre_mx = PTHREAD_MUTEX_INITIALIZER;
 
@@ -1598,7 +1728,6 @@
 
 /* This is a terrible way to do the remapping.  Plan is to import an
    AVL tree at some point. */
-#define VG_N_SEMAPHORES 50
 
 typedef
    struct {
@@ -1772,8 +1901,6 @@
  * initialize/create and destroy/free the reader/writer lock.
  */
 
-#define VG_N_RWLOCKS 50
-
 /*
  * Structure describing a read-write lock.
  */
diff --git a/coregrind/vg_constants.h b/coregrind/vg_constants.h
index 252353c..d3da14b 100644
--- a/coregrind/vg_constants.h
+++ b/coregrind/vg_constants.h
@@ -90,9 +90,8 @@
 /* Constants for the fast original-code-write check cache. */
 
 
-/* Assembly code stubs make these requests ... */
+/* Assembly code stubs make this request */
 #define VG_USERREQ__SIGNAL_RETURNS          0x4001
-#define VG_USERREQ__PTHREAD_RETURNS         0x4002
 
 #endif /* ndef __VG_INCLUDE_H */
 
diff --git a/coregrind/vg_helpers.S b/coregrind/vg_helpers.S
index 2968922..b2654bb 100644
--- a/coregrind/vg_helpers.S
+++ b/coregrind/vg_helpers.S
@@ -32,7 +32,7 @@
 #include "vg_constants.h"
 
 /* ------------------ SIMULATED CPU HELPERS ------------------ */
-/* A couple of stubs for returns which we want to catch: signal
+/* A stubs for a return which we want to catch: a signal return.
    returns and pthread returns.  In the latter case, the thread's
    return value is in %EAX, so we pass this as the first argument
    to the request.  In both cases we use the user request mechanism.
@@ -68,36 +68,6 @@
 	
 
 
-.global VG_(pthreadreturn_bogusRA)
-VG_(pthreadreturn_bogusRA):
-	subl	$20, %esp	# allocate arg block
-	movl	%esp, %edx	# %edx == &_zzq_args[0]
-	movl	$VG_USERREQ__PTHREAD_RETURNS, 0(%edx)	# request
-	movl	%eax, 4(%edx)	# arg1 == thread return value
-	movl	$0, 8(%edx)	# arg2
-	movl	$0, 12(%edx)	# arg3
-	movl	$0, 16(%edx)	# arg4
-	movl	%edx, %eax
-	# and now the magic sequence itself:
-	roll $29, %eax
-	roll $3, %eax
-	rorl $27, %eax
-	rorl $5, %eax
-	roll $13, %eax
-	roll $19, %eax
-	# should never get here
-	pushl	$pthreadreturn_bogusRA_panic_msg
-	call	VG_(panic)
-	
-.data
-pthreadreturn_bogusRA_panic_msg:
-.ascii	"vg_pthreadreturn_bogusRA: VG_USERREQ__PTHREAD_RETURNS was missed"
-.byte	0
-.text	
-	
-
-
-
 	
 /* ------------------ REAL CPU HELPERS ------------------ */
 /* The rest of this lot run on the real CPU. */
diff --git a/coregrind/vg_include.h b/coregrind/vg_include.h
index 9431c8e..e81fe77 100644
--- a/coregrind/vg_include.h
+++ b/coregrind/vg_include.h
@@ -146,6 +146,12 @@
    beyond it. */
 #define VG_PTHREAD_STACK_SIZE 65536
 
+/* Number of entries in the semaphore-remapping table. */
+#define VG_N_SEMAPHORES 50
+
+/* Number of entries in the rwlock-remapping table. */
+#define VG_N_RWLOCKS 50
+
 
 /* ---------------------------------------------------------------------
    Basic types
@@ -416,27 +422,54 @@
 #define VG_USERREQ__MEMALIGN            0x2009
 
 
-#define VG_USERREQ__PTHREAD_CREATE          0x3001
-#define VG_USERREQ__PTHREAD_JOIN            0x3002
-#define VG_USERREQ__PTHREAD_GET_THREADID    0x3003
-#define VG_USERREQ__PTHREAD_MUTEX_LOCK      0x3004
-#define VG_USERREQ__PTHREAD_MUTEX_TRYLOCK   0x3005
-#define VG_USERREQ__PTHREAD_MUTEX_UNLOCK    0x3006
-#define VG_USERREQ__PTHREAD_CANCEL          0x3007
-#define VG_USERREQ__PTHREAD_EXIT            0x3008
-#define VG_USERREQ__PTHREAD_COND_WAIT       0x3009
-#define VG_USERREQ__PTHREAD_COND_TIMEDWAIT  0x300A
-#define VG_USERREQ__PTHREAD_COND_SIGNAL     0x300B
-#define VG_USERREQ__PTHREAD_COND_BROADCAST  0x300C
-#define VG_USERREQ__PTHREAD_KEY_CREATE      0x300D
-#define VG_USERREQ__PTHREAD_KEY_DELETE      0x300E
-#define VG_USERREQ__PTHREAD_SETSPECIFIC     0x300F
-#define VG_USERREQ__PTHREAD_GETSPECIFIC     0x3010
-#define VG_USERREQ__READ_MILLISECOND_TIMER  0x3011
-#define VG_USERREQ__PTHREAD_SIGMASK         0x3012
-#define VG_USERREQ__SIGWAIT                 0x3013
-#define VG_USERREQ__PTHREAD_KILL            0x3014
-#define VG_USERREQ__PTHREAD_YIELD           0x3015
+/* (Fn, Arg): Create a new thread and run Fn applied to Arg in it.  Fn
+   MUST NOT return -- ever.  Eventually it will do either __QUIT or
+   __WAIT_JOINER.  */
+#define VG_USERREQ__APPLY_IN_NEW_THREAD     0x3001
+
+/* ( no-args ): calling thread disappears from the system forever.
+   Reclaim resources. */
+#define VG_USERREQ__QUIT                    0x3002
+
+/* ( void* ): calling thread waits for joiner and returns the void* to
+   it. */
+#define VG_USERREQ__WAIT_JOINER             0x3003
+
+/* ( ThreadId, void** ): wait to join a thread. */
+#define VG_USERREQ__PTHREAD_JOIN            0x3004
+
+/* Set cancellation state and type for this thread. */
+#define VG_USERREQ__SET_CANCELSTATE         0x3005
+#define VG_USERREQ__SET_CANCELTYPE          0x3006
+
+/* ( no-args ): Test if we are at a cancellation point. */
+#define VG_USERREQ__TESTCANCEL              0x3007
+
+/* ( ThreadId, &thread_exit_wrapper is the only allowable arg ): call
+   with this arg to indicate that a cancel is now pending for the
+   specified thread. */
+#define VG_USERREQ__SET_CANCELPEND          0x3008
+
+/* Set/get detach state for this thread. */
+#define VG_USERREQ__SET_OR_GET_DETACH       0x3009
+
+#define VG_USERREQ__PTHREAD_GET_THREADID    0x300B
+#define VG_USERREQ__PTHREAD_MUTEX_LOCK      0x300C
+#define VG_USERREQ__PTHREAD_MUTEX_TRYLOCK   0x300D
+#define VG_USERREQ__PTHREAD_MUTEX_UNLOCK    0x300E
+#define VG_USERREQ__PTHREAD_COND_WAIT       0x300F
+#define VG_USERREQ__PTHREAD_COND_TIMEDWAIT  0x3010
+#define VG_USERREQ__PTHREAD_COND_SIGNAL     0x3011
+#define VG_USERREQ__PTHREAD_COND_BROADCAST  0x3012
+#define VG_USERREQ__PTHREAD_KEY_CREATE      0x3013
+#define VG_USERREQ__PTHREAD_KEY_DELETE      0x3014
+#define VG_USERREQ__PTHREAD_SETSPECIFIC     0x3015
+#define VG_USERREQ__PTHREAD_GETSPECIFIC     0x3016
+#define VG_USERREQ__READ_MILLISECOND_TIMER  0x3017
+#define VG_USERREQ__PTHREAD_SIGMASK         0x3018
+#define VG_USERREQ__SIGWAIT                 0x3019
+#define VG_USERREQ__PTHREAD_KILL            0x301A
+#define VG_USERREQ__PTHREAD_YIELD           0x301B
 
 /* Cosmetic ... */
 #define VG_USERREQ__GET_PTHREAD_TRACE_LEVEL 0x3101
@@ -444,7 +477,6 @@
 /* 
 In vg_constants.h:
 #define VG_USERREQ__SIGNAL_RETURNS          0x4001
-#define VG_USERREQ__PTHREAD_RETURNS         0x4002
 */
 
 
@@ -506,10 +538,6 @@
          the mutex finally gets unblocked. */
       ThreadStatus status;
 
-      /* Identity of joiner (thread who called join on me), or
-         VG_INVALID_THREADID if no one asked to join yet. */
-      ThreadId joiner;
-
       /* When .status == WaitMX, points to the mutex I am waiting for.
          When .status == WaitCV, points to the mutex associated with
          the condition variable indicated by the .associated_cv field.
@@ -529,8 +557,26 @@
          pthread_cond_wait. */
       UInt awaken_at;
 
-      /* return value */
-      void* retval;
+      /* If VgTs_WaitJoiner, return value, as generated by joinees. */
+      void* joinee_retval;
+
+      /* If VgTs_WaitJoinee, place to copy the return value to, and
+         the identity of the thread we're waiting for. */
+      void**   joiner_thread_return;
+      ThreadId joiner_jee_tid;      
+
+      /* Cancelability state and type. */
+      Bool cancel_st; /* False==PTH_CANCEL_DISABLE; True==.._ENABLE */
+      Bool cancel_ty; /* False==PTH_CANC_ASYNCH; True==..._DEFERRED */
+     
+      /* Pointer to fn to call to do cancellation.  Indicates whether
+         or not cancellation is pending.  If NULL, not pending.  Else
+         should be &thread_exit_wrapper(), indicating that
+         cancallation is pending. */
+      void (*cancel_pend)(void*);
+
+      /* Whether or not detached. */
+      Bool detached;
 
       /* thread-specific data */
       void* specifics[VG_N_THREAD_KEYS];
@@ -1694,9 +1740,9 @@
 extern void VG_(helper_value_check1_fail);
 extern void VG_(helper_value_check0_fail);
 
-/* NOT FUNCTIONS; these are bogus RETURN ADDRESS. */
+/* NOT A FUNCTION; this is a bogus RETURN ADDRESS. */
 extern void VG_(signalreturn_bogusRA)( void );
-extern void VG_(pthreadreturn_bogusRA)( void );
+
 
 /* ---------------------------------------------------------------------
    Exports of vg_cachesim.c
diff --git a/coregrind/vg_libpthread.c b/coregrind/vg_libpthread.c
index 8d30fcf..5211106 100644
--- a/coregrind/vg_libpthread.c
+++ b/coregrind/vg_libpthread.c
@@ -268,6 +268,92 @@
    return 0;
 }
 
+/* --------------------------------------------------- 
+   Helper functions for running a thread 
+   and for clearing up afterwards.
+   ------------------------------------------------ */
+
+/* All exiting threads eventually pass through here, bearing the
+   return value, or PTHREAD_CANCELED, in ret_val. */
+static
+__attribute__((noreturn))
+void thread_exit_wrapper ( void* ret_val )
+{
+   int detached, res;
+   /* Run this thread's cleanup handlers. */
+   /* Run this thread's key finalizers. */
+
+   /* Decide on my final disposition. */
+   VALGRIND_MAGIC_SEQUENCE(detached, (-1) /* default */,
+                           VG_USERREQ__SET_OR_GET_DETACH, 
+                           2 /* get */, 0, 0, 0);
+   assert(detached == 0 || detached == 1);
+
+   if (detached) {
+      /* Detached; I just quit right now. */
+      VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
+                              VG_USERREQ__QUIT, 0, 0, 0, 0);
+   } else {
+      /* Not detached; so I wait for a joiner. */
+      VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
+                              VG_USERREQ__WAIT_JOINER, ret_val, 0, 0, 0);
+   }
+   /* NOTREACHED */
+   barf("thread_exit_wrapper: still alive?!");
+}
+
+
+/* This function is a wrapper function for running a thread.  It runs
+   the root function specified in pthread_create, and then, should the
+   root function return a value, it arranges to run the thread's
+   cleanup handlers and exit correctly. */
+
+/* Struct used to convey info from pthread_create to
+   thread_wrapper. */
+typedef
+   struct {
+      pthread_attr_t* attr;
+      void* (*root_fn) ( void* );
+      void* arg;
+   }
+   NewThreadInfo;
+
+
+/* This is passed to the VG_USERREQ__APPLY_IN_NEW_THREAD and so must
+   not return.  Note that this runs in the new thread, not the
+   parent. */
+static
+__attribute__((noreturn))
+void thread_wrapper ( NewThreadInfo* info )
+{
+   int res;
+   pthread_attr_t* attr;
+   void* (*root_fn) ( void* );
+   void* arg;
+   void* ret_val;
+
+   attr    = info->attr;
+   root_fn = info->root_fn;
+   arg     = info->arg;
+
+   if (attr)
+      kludged("pthread_create -- ignoring attributes");
+
+   /* Free up the arg block that pthread_create malloced. */
+   VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */,
+                           VG_USERREQ__FREE, info, 0, 0, 0);
+   assert(res == 0);
+
+   /* The root function might not return.  But if it does we simply
+      move along to thread_exit_wrapper.  All other ways out for the
+      thread (cancellation, or calling pthread_exit) lead there
+      too. */
+   ret_val = root_fn(arg);
+   thread_exit_wrapper(ret_val);
+   /* NOTREACHED */
+}
+
+
 /* ---------------------------------------------------
    THREADs
    ------------------------------------------------ */
@@ -289,20 +375,38 @@
 }
 
 
+/* Bundle up the args into a malloc'd block and create a new thread
+   consisting of thread_wrapper() applied to said malloc'd block. */
 int
 pthread_create (pthread_t *__restrict __thread,
                 __const pthread_attr_t *__restrict __attr,
                 void *(*__start_routine) (void *),
                 void *__restrict __arg)
 {
-   int res;
-   ensure_valgrind("pthread_create");
-   VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
-                           VG_USERREQ__PTHREAD_CREATE,
-                           __thread, __attr, __start_routine, __arg);
-   return res;
-}
+   int            tid_child;
+   NewThreadInfo* info;
 
+   ensure_valgrind("pthread_create");
+
+   /* Allocate space for the arg block.  thread_wrapper will free
+      it. */
+   VALGRIND_MAGIC_SEQUENCE(info, NULL /* default */,
+                           VG_USERREQ__MALLOC, 
+                           sizeof(NewThreadInfo), 0, 0, 0);
+   assert(info != NULL);
+
+   info->attr    = (pthread_attr_t*)__attr;
+   info->root_fn = __start_routine;
+   info->arg     = __arg;
+   VALGRIND_MAGIC_SEQUENCE(tid_child, VG_INVALID_THREADID /* default */,
+                           VG_USERREQ__APPLY_IN_NEW_THREAD,
+                           &thread_wrapper, info, 0, 0);
+   assert(tid_child != VG_INVALID_THREADID);
+
+   if (__thread)
+      *__thread = tid_child;
+   return 0; /* success */
+}
 
 
 int 
@@ -319,14 +423,9 @@
 
 void pthread_exit(void *retval)
 {
-   int res;
    ensure_valgrind("pthread_exit");
-   VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
-                           VG_USERREQ__PTHREAD_EXIT,
-                           retval, 0, 0, 0);
-   /* Doesn't return! */
-   /* However, we have to fool gcc into knowing that. */
-   barf("pthread_exit: still alive after request?!");
+   /* Simple! */
+   thread_exit_wrapper(retval);
 }
 
 
@@ -345,9 +444,12 @@
 
 int pthread_detach(pthread_t th)
 {
-   static int moans = N_MOANS;
-   if (moans-- > 0) 
-      ignored("pthread_detach");
+   int res;
+   ensure_valgrind("pthread_detach");
+   VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */,
+                           VG_USERREQ__SET_OR_GET_DETACH,
+                           1 /* set */, 0, 0, 0);
+   assert(res == 0);
    return 0;
 }
 
@@ -601,17 +703,37 @@
 
 int pthread_setcancelstate(int state, int *oldstate)
 {
-   static int moans = N_MOANS;
-   if (moans-- > 0) 
-      ignored("pthread_setcancelstate");
+   int res;
+   ensure_valgrind("pthread_setcancelstate");
+   if (state != PTHREAD_CANCEL_ENABLE
+       && state != PTHREAD_CANCEL_DISABLE) 
+      return EINVAL;
+   assert(-1 != PTHREAD_CANCEL_ENABLE);
+   assert(-1 != PTHREAD_CANCEL_DISABLE);
+   VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */,
+                           VG_USERREQ__SET_CANCELSTATE,
+                           state, 0, 0, 0);
+   assert(res != -1);
+   if (oldstate) 
+      *oldstate = res;
    return 0;
 }
 
 int pthread_setcanceltype(int type, int *oldtype)
 {
-   static int moans = N_MOANS;
-   if (moans-- > 0) 
-      ignored("pthread_setcanceltype");
+   int res;
+   ensure_valgrind("pthread_setcanceltype");
+   if (type != PTHREAD_CANCEL_DEFERRED
+       && type != PTHREAD_CANCEL_ASYNCHRONOUS) 
+      return EINVAL;
+   assert(-1 != PTHREAD_CANCEL_DEFERRED);
+   assert(-1 != PTHREAD_CANCEL_ASYNCHRONOUS);
+   VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */,
+                           VG_USERREQ__SET_CANCELTYPE,
+                           type, 0, 0, 0);
+   assert(res != -1);
+   if (oldtype) 
+      *oldtype = res;
    return 0;
 }
 
@@ -619,16 +741,24 @@
 {
    int res;
    ensure_valgrind("pthread_cancel");
-   VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
-                           VG_USERREQ__PTHREAD_CANCEL,
-                           thread, 0, 0, 0);
+   VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */,
+                           VG_USERREQ__SET_CANCELPEND,
+                           thread, &thread_exit_wrapper, 0, 0);
+   assert(res != -1);
    return res;
 }
 
+__inline__
 void pthread_testcancel(void)
 {
+   int res;
+   VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */,
+                           VG_USERREQ__TESTCANCEL,
+                           0, 0, 0, 0);
+   assert(res == 0);
 }
 
+
 /*-------------------*/
 static pthread_mutex_t massacre_mx = PTHREAD_MUTEX_INITIALIZER;
 
@@ -1598,7 +1728,6 @@
 
 /* This is a terrible way to do the remapping.  Plan is to import an
    AVL tree at some point. */
-#define VG_N_SEMAPHORES 50
 
 typedef
    struct {
@@ -1772,8 +1901,6 @@
  * initialize/create and destroy/free the reader/writer lock.
  */
 
-#define VG_N_RWLOCKS 50
-
 /*
  * Structure describing a read-write lock.
  */
diff --git a/coregrind/vg_scheduler.c b/coregrind/vg_scheduler.c
index c4876f8..7dbb89b 100644
--- a/coregrind/vg_scheduler.c
+++ b/coregrind/vg_scheduler.c
@@ -213,9 +213,10 @@
       switch (VG_(threads)[i].status) {
          case VgTs_Runnable:   VG_(printf)("Runnable"); break;
          case VgTs_WaitFD:     VG_(printf)("WaitFD"); break;
-         case VgTs_WaitJoiner: VG_(printf)("WaitJoiner(%d)", 
-                                           VG_(threads)[i].joiner); break;
-         case VgTs_WaitJoinee: VG_(printf)("WaitJoinee"); break;
+         case VgTs_WaitJoinee: VG_(printf)("WaitJoinee(%d)", 
+                                           VG_(threads)[i].joiner_jee_tid);
+                               break;
+         case VgTs_WaitJoiner: VG_(printf)("WaitJoiner"); break;
          case VgTs_Sleeping:   VG_(printf)("Sleeping"); break;
          case VgTs_WaitMX:     VG_(printf)("WaitMX"); break;
          case VgTs_WaitCV:     VG_(printf)("WaitCV"); break;
@@ -506,6 +507,30 @@
 }
 
 
+static 
+void mostly_clear_thread_record ( ThreadId tid )
+{
+   Int j;
+   vg_assert(tid >= 0 && tid < VG_N_THREADS);
+   VG_(threads)[tid].tid                  = tid;
+   VG_(threads)[tid].status               = VgTs_Empty;
+   VG_(threads)[tid].associated_mx        = NULL;
+   VG_(threads)[tid].associated_cv        = NULL;
+   VG_(threads)[tid].awaken_at            = 0;
+   VG_(threads)[tid].joinee_retval        = NULL;
+   VG_(threads)[tid].joiner_thread_return = NULL;
+   VG_(threads)[tid].joiner_jee_tid       = VG_INVALID_THREADID;
+   VG_(threads)[tid].cancel_st   = True; /* PTHREAD_CANCEL_ENABLE */
+   VG_(threads)[tid].cancel_ty   = True; /* PTHREAD_CANCEL_DEFERRED */
+   VG_(threads)[tid].cancel_pend = NULL; /* not pending */
+   VG_(threads)[tid].detached             = False;
+   VG_(ksigemptyset)(&VG_(threads)[tid].sig_mask);
+   VG_(ksigemptyset)(&VG_(threads)[tid].sigs_waited_for);
+   for (j = 0; j < VG_N_THREAD_KEYS; j++)
+      VG_(threads)[tid].specifics[j] = NULL;
+}
+
+
 /* Initialise the scheduler.  Create a single "main" thread ready to
    run, with special ThreadId of one.  This is called at startup; the
    caller takes care to park the client's state is parked in
@@ -531,12 +556,10 @@
    }
 
    for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) {
-      VG_(threads)[i].status     = VgTs_Empty;
-      VG_(threads)[i].stack_size = 0;
-      VG_(threads)[i].stack_base = (Addr)NULL;
-      VG_(threads)[i].tid        = i;
-      VG_(ksigemptyset)(&VG_(threads)[i].sig_mask);
-      VG_(ksigemptyset)(&VG_(threads)[i].sigs_waited_for);
+      mostly_clear_thread_record(i);
+      VG_(threads)[i].stack_size           = 0;
+      VG_(threads)[i].stack_base           = (Addr)NULL;
+      VG_(threads)[i].stack_highest_word   = (Addr)NULL;
    }
 
    for (i = 0; i < VG_N_WAITING_FDS; i++)
@@ -551,14 +574,7 @@
       properties. */
    tid_main = vg_alloc_ThreadState();
    vg_assert(tid_main == 1); 
-
-   VG_(threads)[tid_main].status        = VgTs_Runnable;
-   VG_(threads)[tid_main].joiner        = VG_INVALID_THREADID;
-   VG_(threads)[tid_main].associated_mx = NULL;
-   VG_(threads)[tid_main].associated_cv = NULL;
-   VG_(threads)[tid_main].retval        = NULL; /* not important */
-   for (i = 0; i < VG_N_THREAD_KEYS; i++)
-      VG_(threads)[tid_main].specifics[i] = NULL;
+   VG_(threads)[tid_main].status = VgTs_Runnable;
 
    /* Copy VG_(baseBlock) state to tid_main's slot. */
    vg_tid_currently_in_baseBlock = tid_main;
@@ -1544,9 +1560,38 @@
 
 
 /* -----------------------------------------------------------
-   Thread CREATION, JOINAGE and CANCELLATION.
+   Thread CREATION, JOINAGE and CANCELLATION: HELPER FNS
    -------------------------------------------------------- */
 
+/* We've decided to action a cancellation on tid.  Make it jump to
+   thread_exit_wrapper() in vg_libpthread.c, passing PTHREAD_CANCELED
+   as the arg. */
+static
+void make_thread_jump_to_cancelhdlr ( ThreadId tid )
+{
+   Char msg_buf[100];
+   vg_assert(VG_(is_valid_tid)(tid));
+   /* Push PTHREAD_CANCELED on the stack and jump to the cancellation
+      handler -- which is really thread_exit_wrapper() in
+      vg_libpthread.c. */
+   vg_assert(VG_(threads)[tid].cancel_pend != NULL);
+   VG_(threads)[tid].m_esp -= 4;
+   * (UInt*)(VG_(threads)[tid].m_esp) = (UInt)PTHREAD_CANCELED;
+   VG_(threads)[tid].m_eip = (UInt)VG_(threads)[tid].cancel_pend;
+   VG_(threads)[tid].status = VgTs_Runnable;
+   /* Make sure we aren't cancelled again whilst handling this
+      cancellation. */
+   VG_(threads)[tid].cancel_st = False;
+   if (VG_(clo_trace_sched)) {
+      VG_(sprintf)(msg_buf, 
+         "jump to cancellation handler (hdlr = %p)", 
+         VG_(threads)[tid].cancel_pend);
+      print_sched_event(tid, msg_buf);
+   }
+}
+
+
+
 /* Release resources and generally clean up once a thread has finally
    disappeared. */
 static
@@ -1567,6 +1612,61 @@
 }
 
 
+/* Look for matching pairs of threads waiting for joiners and threads
+   waiting for joinees.  For each such pair copy the return value of
+   the joinee into the joiner, let the joiner resume and discard the
+   joinee. */
+static
+void maybe_rendezvous_joiners_and_joinees ( void )
+{
+   Char     msg_buf[100];
+   void**   thread_return;
+   ThreadId jnr, jee;
+
+   for (jnr = 1; jnr < VG_N_THREADS; jnr++) {
+      if (VG_(threads)[jnr].status != VgTs_WaitJoinee)
+         continue;
+      jee = VG_(threads)[jnr].joiner_jee_tid;
+      if (jee == VG_INVALID_THREADID) 
+         continue;
+      vg_assert(VG_(is_valid_tid)(jee));
+      if (VG_(threads)[jee].status != VgTs_WaitJoiner)
+         continue;
+      /* ok!  jnr is waiting to join with jee, and jee is waiting to be
+         joined by ... well, any thread.  So let's do it! */
+
+      /* Copy return value to where joiner wants it. */
+      thread_return = VG_(threads)[jnr].joiner_thread_return;
+      if (thread_return != NULL) {
+         /* CHECK thread_return writable */
+         *thread_return = VG_(threads)[jee].joinee_retval;
+         /* Not really right, since it makes the thread's return value
+            appear to be defined even if it isn't. */
+         if (VG_(clo_instrument))
+            VGM_(make_readable)( (Addr)thread_return, sizeof(void*) );
+      }
+
+      /* Joinee is discarded */
+      VG_(threads)[jee].status = VgTs_Empty; /* bye! */
+      cleanup_after_thread_exited ( jee );
+         if (VG_(clo_trace_sched)) {
+            VG_(sprintf)(msg_buf,
+               "rendezvous with joinee %d.  %d resumes, %d exits.",
+               jee, jnr, jee );
+         print_sched_event(jnr, msg_buf);
+      }
+
+      /* joiner returns with success */
+      VG_(threads)[jnr].status = VgTs_Runnable;
+      SET_EDX(jnr, 0);
+   }
+}
+
+
+/* -----------------------------------------------------------
+   Thread CREATION, JOINAGE and CANCELLATION: REQUESTS
+   -------------------------------------------------------- */
+
 static
 void do_pthread_yield ( ThreadId tid )
 {
@@ -1582,122 +1682,117 @@
 
 
 static
-void do_pthread_cancel ( ThreadId  tid,
-                         pthread_t tid_cancellee )
+void do__testcancel ( ThreadId tid )
 {
-   Char msg_buf[100];
-
    vg_assert(VG_(is_valid_tid)(tid));
-   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
-
-   if (!VG_(is_valid_tid)(tid_cancellee)
-       || VG_(threads)[tid_cancellee].status == VgTs_Empty) {
-      SET_EDX(tid, ESRCH);
-      return;
-   }
-
-   /* We want make is appear that this thread has returned to
-      do_pthread_create_bogusRA with PTHREAD_CANCELED as the
-      return value.  So: simple: put PTHREAD_CANCELED into %EAX
-      and &do_pthread_create_bogusRA into %EIP and keep going! */
-   if (VG_(clo_trace_sched)) {
-      VG_(sprintf)(msg_buf, "cancelled by %d", tid);
-      print_sched_event(tid_cancellee, msg_buf);
-   }
-   VG_(threads)[tid_cancellee].m_eax  = (UInt)PTHREAD_CANCELED;
-   VG_(threads)[tid_cancellee].m_eip  = (UInt)&VG_(pthreadreturn_bogusRA);
-   VG_(threads)[tid_cancellee].status = VgTs_Runnable;
-
-   /* We return with success (0). */
-   SET_EDX(tid, 0);
-}
-
-
-static
-void do_pthread_exit ( ThreadId tid, void* retval )
-{
-   Char msg_buf[100];
-   /* We want make is appear that this thread has returned to
-      do_pthread_create_bogusRA with retval as the
-      return value.  So: simple: put retval into %EAX
-      and &do_pthread_create_bogusRA into %EIP and keep going! */
-   if (VG_(clo_trace_sched)) {
-      VG_(sprintf)(msg_buf, "exiting with %p", retval);
-      print_sched_event(tid, msg_buf);
-   }
-   VG_(threads)[tid].m_eax  = (UInt)retval;
-   VG_(threads)[tid].m_eip  = (UInt)&VG_(pthreadreturn_bogusRA);
-   VG_(threads)[tid].status = VgTs_Runnable;
-}
-
-
-/* Thread tid is exiting, by returning from the function it was
-   created with.  Or possibly due to pthread_exit or cancellation.
-   The main complication here is to resume any thread waiting to join
-   with this one. */
-static 
-void handle_pthread_return ( ThreadId tid, void* retval )
-{
-   ThreadId jnr; /* joiner, the thread calling pthread_join. */
-   UInt*    jnr_args;
-   void**   jnr_thread_return;
-   Char     msg_buf[100];
-
-   /* Mark it as not in use.  Leave the stack in place so the next
-      user of this slot doesn't reallocate it. */
-   vg_assert(VG_(is_valid_tid)(tid));
-   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
-
-   VG_(threads)[tid].retval = retval;
-
-   if (VG_(threads)[tid].joiner == VG_INVALID_THREADID) {
-      /* No one has yet done a join on me */
-      VG_(threads)[tid].status = VgTs_WaitJoiner;
-      if (VG_(clo_trace_sched)) {
-         VG_(sprintf)(msg_buf, 
-            "root fn returns, waiting for a call pthread_join(%d)", 
-            tid);
-         print_sched_event(tid, msg_buf);
-      }
+   if (/* is there a cancellation pending on this thread? */
+       VG_(threads)[tid].cancel_pend != NULL
+       && /* is this thread accepting cancellations? */
+          VG_(threads)[tid].cancel_st) {
+     /* Ok, let's do the cancellation. */
+     make_thread_jump_to_cancelhdlr ( tid );
    } else {
-      /* Some is waiting; make their join call return with success,
-         putting my exit code in the place specified by the caller's
-         thread_return param.  This is all very horrible, since we
-         need to consult the joiner's arg block -- pointed to by its
-         %EAX -- in order to extract the 2nd param of its pthread_join
-         call.  TODO: free properly the slot (also below). 
-      */
-      jnr = VG_(threads)[tid].joiner;
-      vg_assert(VG_(is_valid_tid)(jnr));
-      vg_assert(VG_(threads)[jnr].status == VgTs_WaitJoinee);
-      jnr_args = (UInt*)VG_(threads)[jnr].m_eax;
-      jnr_thread_return = (void**)(jnr_args[2]);
-      if (jnr_thread_return != NULL)
-         *jnr_thread_return = VG_(threads)[tid].retval;
-      SET_EDX(jnr, 0); /* success */
-      VG_(threads)[jnr].status = VgTs_Runnable;
-      VG_(threads)[tid].status = VgTs_Empty; /* bye! */
-      cleanup_after_thread_exited ( tid );
-      if (VG_(clo_trace_sched)) {
-         VG_(sprintf)(msg_buf, 
-            "root fn returns, to find a waiting pthread_join(%d)", tid);
-         print_sched_event(tid, msg_buf);
-         VG_(sprintf)(msg_buf, 
-            "my pthread_join(%d) returned; resuming", tid);
-         print_sched_event(jnr, msg_buf);
-      }
+      /* No, we keep going. */
+      SET_EDX(tid, 0);
    }
-
-   /* Return value is irrelevant; this thread will not get
-      rescheduled. */
 }
 
 
 static
-void do_pthread_join ( ThreadId tid, ThreadId jee, void** thread_return )
+void do__set_cancelstate ( ThreadId tid, Int state )
+{
+   Bool old_st;
+   vg_assert(VG_(is_valid_tid)(tid));
+   old_st = VG_(threads)[tid].cancel_st;
+   if (state == PTHREAD_CANCEL_ENABLE) {
+      VG_(threads)[tid].cancel_st = True;
+   } else
+   if (state == PTHREAD_CANCEL_DISABLE) {
+      VG_(threads)[tid].cancel_st = False;
+   } else {
+      VG_(panic)("do__set_cancelstate");
+   }
+   SET_EDX(tid, old_st ? PTHREAD_CANCEL_ENABLE 
+                       : PTHREAD_CANCEL_DISABLE);
+}
+
+
+static
+void do__set_canceltype ( ThreadId tid, Int type )
+{
+   Bool old_ty;
+   vg_assert(VG_(is_valid_tid)(tid));
+   old_ty = VG_(threads)[tid].cancel_ty;
+   if (type == PTHREAD_CANCEL_ASYNCHRONOUS) {
+      VG_(threads)[tid].cancel_ty = False;
+   } else
+   if (type == PTHREAD_CANCEL_DEFERRED) {
+      VG_(threads)[tid].cancel_st = True;
+   } else {
+      VG_(panic)("do__set_canceltype");
+   }
+   SET_EDX(tid, old_ty ? PTHREAD_CANCEL_DEFERRED 
+                       : PTHREAD_CANCEL_ASYNCHRONOUS);
+}
+
+
+static
+void do__set_or_get_detach ( ThreadId tid, Int what )
+{
+   vg_assert(VG_(is_valid_tid)(tid));
+   switch (what) {
+      case 2: /* get */
+         SET_EDX(tid, VG_(threads)[tid].detached ? 1 : 0);
+         return;
+      case 1: /* set detached */
+         VG_(threads)[tid].detached = True;
+         SET_EDX(tid, 0); 
+         return;
+      case 0: /* set not detached */
+         VG_(threads)[tid].detached = False;
+         SET_EDX(tid, 0);
+         return;
+      default:
+         VG_(panic)("do__set_or_get_detach");
+   }
+}
+
+
+static
+void do__set_cancelpend ( ThreadId tid, 
+                          ThreadId cee,
+			  void (*cancelpend_hdlr)(void*) )
 {
    Char msg_buf[100];
 
+   vg_assert(VG_(is_valid_tid)(tid));
+   vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
+
+   vg_assert(VG_(is_valid_tid)(cee));
+
+   VG_(threads)[cee].cancel_pend = cancelpend_hdlr;
+
+   if (VG_(clo_trace_sched)) {
+      VG_(sprintf)(msg_buf, 
+         "set cancel pending (hdlr = %p, canceller tid = %d)", 
+         cancelpend_hdlr, tid);
+      print_sched_event(cee, msg_buf);
+   }
+
+   /* Thread doing the cancelling returns with success. */
+   SET_EDX(tid, 0);
+
+   /* Perhaps we can nuke the cancellee right now? */
+   do__testcancel(cee);
+}
+
+
+static
+void do_pthread_join ( ThreadId tid, 
+                       ThreadId jee, void** thread_return )
+{
+   Char     msg_buf[100];
+   ThreadId i;
    /* jee, the joinee, is the thread specified as an arg in thread
       tid's call to pthread_join.  So tid is the join-er. */
    vg_assert(VG_(is_valid_tid)(tid));
@@ -1709,6 +1804,11 @@
       return;
    }
 
+   /* Flush any completed pairs, so as to make sure what we're looking
+      at is up-to-date. */
+   maybe_rendezvous_joiners_and_joinees();
+
+   /* Is this a sane request? */
    if (jee < 0 
        || jee >= VG_N_THREADS
        || VG_(threads)[jee].status == VgTs_Empty) {
@@ -1718,63 +1818,94 @@
       return;
    }
 
-   if (VG_(threads)[jee].joiner != VG_INVALID_THREADID) {
-      /* Someone already did join on this thread */
-      SET_EDX(tid, EINVAL);
-      VG_(threads)[tid].status = VgTs_Runnable;
-      return;
+   /* Is anyone else already in a join-wait for jee? */
+   for (i = 1; i < VG_N_THREADS; i++) {
+      if (i == tid) continue;
+      if (VG_(threads)[i].status == VgTs_WaitJoinee
+          && VG_(threads)[i].joiner_jee_tid == jee) {
+         /* Someone already did join on this thread */
+         SET_EDX(tid, EINVAL);
+         VG_(threads)[tid].status = VgTs_Runnable;
+         return;
+      }
    }
 
-   /* if (VG_(threads)[jee].detached) ... */
-
-   /* Perhaps the joinee has already finished?  If so return
-      immediately with its return code, and free up the slot. TODO:
-      free it properly (also above). */
-   if (VG_(threads)[jee].status == VgTs_WaitJoiner) {
-      vg_assert(VG_(threads)[jee].joiner == VG_INVALID_THREADID);
-      SET_EDX(tid, 0); /* success */
-      if (thread_return != NULL) {
-         *thread_return = VG_(threads)[jee].retval;
-	 /* Not really right, since it makes the thread's return value
-            appear to be defined even if it isn't. */
-         if (VG_(clo_instrument))
-            VGM_(make_readable)( (Addr)thread_return, sizeof(void*) );
-      }
-      VG_(threads)[tid].status = VgTs_Runnable;
-      VG_(threads)[jee].status = VgTs_Empty; /* bye! */
-      cleanup_after_thread_exited ( jee );
-      if (VG_(clo_trace_sched)) {
-	 VG_(sprintf)(msg_buf,
-		      "someone called pthread_join() on me; bye!");
-         print_sched_event(jee, msg_buf);
-	 VG_(sprintf)(msg_buf,
-            "my pthread_join(%d) returned immediately", 
-            jee );
-         print_sched_event(tid, msg_buf);
-      }
-      return;
-   }
-
-   /* Ok, so we'll have to wait on jee. */
-   VG_(threads)[jee].joiner = tid;
+   /* Mark this thread as waiting for the joinee. */
    VG_(threads)[tid].status = VgTs_WaitJoinee;
+   VG_(threads)[tid].joiner_thread_return = thread_return;
+   VG_(threads)[tid].joiner_jee_tid = jee;
+
+   /* Look for matching joiners and joinees and do the right thing. */
+   maybe_rendezvous_joiners_and_joinees();
+
+   /* Return value is irrelevant since this this thread becomes
+      non-runnable.  maybe_resume_joiner() will cause it to return the
+      right value when it resumes. */
+
    if (VG_(clo_trace_sched)) {
-      VG_(sprintf)(msg_buf,
-         "blocking on call of pthread_join(%d)", jee );
+      VG_(sprintf)(msg_buf, 
+         "wait for joinee %d (may already be ready)", jee);
       print_sched_event(tid, msg_buf);
    }
-   /* So tid's join call does not return just now. */
 }
 
 
+/* ( void* ): calling thread waits for joiner and returns the void* to
+   it.  This is one of two ways in which a thread can finally exit --
+   the other is do__quit. */
 static
-void do_pthread_create ( ThreadId parent_tid,
-                         pthread_t* thread, 
-                         pthread_attr_t* attr, 
-                         void* (*start_routine)(void *), 
-                         void* arg )
+void do__wait_joiner ( ThreadId tid, void* retval )
 {
-   Int      i;
+   Char msg_buf[100];
+   vg_assert(VG_(is_valid_tid)(tid));
+   vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
+   if (VG_(clo_trace_sched)) {
+      VG_(sprintf)(msg_buf, 
+         "WAIT_JOINER(%p) (non-detached thread exit)", retval);
+      print_sched_event(tid, msg_buf);
+   }
+   VG_(threads)[tid].status = VgTs_WaitJoiner;
+   VG_(threads)[tid].joinee_retval = retval;
+   maybe_rendezvous_joiners_and_joinees();
+}
+
+
+/* ( no-args ): calling thread disappears from the system forever.
+   Reclaim resources. */
+static
+void do__quit ( ThreadId tid )
+{
+   Char msg_buf[100];
+   vg_assert(VG_(is_valid_tid)(tid));
+   vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
+   VG_(threads)[tid].status = VgTs_Empty; /* bye! */
+   cleanup_after_thread_exited ( tid );
+
+   if (VG_(clo_trace_sched)) {
+      VG_(sprintf)(msg_buf, "QUIT (detached thread exit)");
+      print_sched_event(tid, msg_buf);
+   }
+   /* Return value is irrelevant; this thread will not get
+      rescheduled. */
+}
+
+
+/* Should never be entered.  If it is, will be on the simulated
+   CPU. */
+static 
+void do__apply_in_new_thread_bogusRA ( void )
+{
+   VG_(panic)("do__apply_in_new_thread_bogusRA");
+}
+
+/* (Fn, Arg): Create a new thread and run Fn applied to Arg in it.  Fn
+   MUST NOT return -- ever.  Eventually it will do either __QUIT or
+   __WAIT_JOINER.  Return the child tid to the parent. */
+static
+void do__apply_in_new_thread ( ThreadId parent_tid,
+                               void* (*fn)(void *), 
+                               void* arg )
+{
    Addr     new_stack;
    UInt     new_stk_szb;
    ThreadId tid;
@@ -1829,15 +1960,16 @@
    VG_(threads)[tid].m_esp -= 4;
    * (UInt*)(VG_(threads)[tid].m_esp) = (UInt)arg;
 
-   /* push (magical) return address */
+   /* push (bogus) return address */
    VG_(threads)[tid].m_esp -= 4;
-   * (UInt*)(VG_(threads)[tid].m_esp) = (UInt)VG_(pthreadreturn_bogusRA);
+   * (UInt*)(VG_(threads)[tid].m_esp) 
+      = (UInt)&do__apply_in_new_thread_bogusRA;
 
    if (VG_(clo_instrument))
       VGM_(make_readable)( VG_(threads)[tid].m_esp, 2 * 4 );
 
    /* this is where we start */
-   VG_(threads)[tid].m_eip = (UInt)start_routine;
+   VG_(threads)[tid].m_eip = (UInt)fn;
 
    if (VG_(clo_trace_sched)) {
       VG_(sprintf)(msg_buf,
@@ -1845,27 +1977,18 @@
       print_sched_event(tid, msg_buf);
    }
 
-   /* store the thread id in *thread. */
-   //   if (VG_(clo_instrument))
-   // ***** CHECK *thread is writable
-   *thread = (pthread_t)tid;
-   if (VG_(clo_instrument))
-      VGM_(make_readable)( (Addr)thread, sizeof(pthread_t) );
-
-   VG_(threads)[tid].associated_mx = NULL;
-   VG_(threads)[tid].associated_cv = NULL;
-   VG_(threads)[tid].joiner        = VG_INVALID_THREADID;
-   VG_(threads)[tid].status        = VgTs_Runnable;
-
-   for (i = 0; i < VG_N_THREAD_KEYS; i++)
-      VG_(threads)[tid].specifics[i] = NULL;
+   /* Create new thread with default attrs:
+      deferred cancellation, not detached 
+   */
+   mostly_clear_thread_record(tid);
+   VG_(threads)[tid].status = VgTs_Runnable;
 
    /* We inherit our parent's signal mask. */
    VG_(threads)[tid].sig_mask = VG_(threads)[parent_tid].sig_mask;
-   VG_(ksigemptyset)(&VG_(threads)[i].sigs_waited_for);
+   VG_(ksigemptyset)(&VG_(threads)[tid].sigs_waited_for);
 
-   /* return zero */
-   SET_EDX(parent_tid, 0); /* success */
+   /* return child's tid to parent */
+   SET_EDX(parent_tid, tid); /* success */
 }
 
 
@@ -2625,30 +2748,10 @@
    UInt  req_no = arg[0];
    switch (req_no) {
 
-      case VG_USERREQ__PTHREAD_CREATE:
-         do_pthread_create( tid, 
-                            (pthread_t*)arg[1], 
-                            (pthread_attr_t*)arg[2], 
-                            (void*(*)(void*))arg[3], 
-                            (void*)arg[4] );
-         break;
-
-      case VG_USERREQ__PTHREAD_RETURNS:
-         handle_pthread_return( tid, (void*)arg[1] );
-         break;
-
       case VG_USERREQ__PTHREAD_JOIN:
          do_pthread_join( tid, arg[1], (void**)(arg[2]) );
          break;
 
-      case VG_USERREQ__PTHREAD_CANCEL:
-         do_pthread_cancel( tid, (pthread_t)(arg[1]) );
-         break;
-
-      case VG_USERREQ__PTHREAD_EXIT:
-         do_pthread_exit( tid, (void*)(arg[1]) );
-         break;
-
       case VG_USERREQ__PTHREAD_COND_WAIT:
          do_pthread_cond_wait( tid, 
                                (pthread_cond_t *)(arg[1]),
diff --git a/vg_constants.h b/vg_constants.h
index 252353c..d3da14b 100644
--- a/vg_constants.h
+++ b/vg_constants.h
@@ -90,9 +90,8 @@
 /* Constants for the fast original-code-write check cache. */
 
 
-/* Assembly code stubs make these requests ... */
+/* Assembly code stubs make this request */
 #define VG_USERREQ__SIGNAL_RETURNS          0x4001
-#define VG_USERREQ__PTHREAD_RETURNS         0x4002
 
 #endif /* ndef __VG_INCLUDE_H */
 
diff --git a/vg_helpers.S b/vg_helpers.S
index 2968922..b2654bb 100644
--- a/vg_helpers.S
+++ b/vg_helpers.S
@@ -32,7 +32,7 @@
 #include "vg_constants.h"
 
 /* ------------------ SIMULATED CPU HELPERS ------------------ */
-/* A couple of stubs for returns which we want to catch: signal
+/* A stubs for a return which we want to catch: a signal return.
    returns and pthread returns.  In the latter case, the thread's
    return value is in %EAX, so we pass this as the first argument
    to the request.  In both cases we use the user request mechanism.
@@ -68,36 +68,6 @@
 	
 
 
-.global VG_(pthreadreturn_bogusRA)
-VG_(pthreadreturn_bogusRA):
-	subl	$20, %esp	# allocate arg block
-	movl	%esp, %edx	# %edx == &_zzq_args[0]
-	movl	$VG_USERREQ__PTHREAD_RETURNS, 0(%edx)	# request
-	movl	%eax, 4(%edx)	# arg1 == thread return value
-	movl	$0, 8(%edx)	# arg2
-	movl	$0, 12(%edx)	# arg3
-	movl	$0, 16(%edx)	# arg4
-	movl	%edx, %eax
-	# and now the magic sequence itself:
-	roll $29, %eax
-	roll $3, %eax
-	rorl $27, %eax
-	rorl $5, %eax
-	roll $13, %eax
-	roll $19, %eax
-	# should never get here
-	pushl	$pthreadreturn_bogusRA_panic_msg
-	call	VG_(panic)
-	
-.data
-pthreadreturn_bogusRA_panic_msg:
-.ascii	"vg_pthreadreturn_bogusRA: VG_USERREQ__PTHREAD_RETURNS was missed"
-.byte	0
-.text	
-	
-
-
-
 	
 /* ------------------ REAL CPU HELPERS ------------------ */
 /* The rest of this lot run on the real CPU. */
diff --git a/vg_include.h b/vg_include.h
index 9431c8e..e81fe77 100644
--- a/vg_include.h
+++ b/vg_include.h
@@ -146,6 +146,12 @@
    beyond it. */
 #define VG_PTHREAD_STACK_SIZE 65536
 
+/* Number of entries in the semaphore-remapping table. */
+#define VG_N_SEMAPHORES 50
+
+/* Number of entries in the rwlock-remapping table. */
+#define VG_N_RWLOCKS 50
+
 
 /* ---------------------------------------------------------------------
    Basic types
@@ -416,27 +422,54 @@
 #define VG_USERREQ__MEMALIGN            0x2009
 
 
-#define VG_USERREQ__PTHREAD_CREATE          0x3001
-#define VG_USERREQ__PTHREAD_JOIN            0x3002
-#define VG_USERREQ__PTHREAD_GET_THREADID    0x3003
-#define VG_USERREQ__PTHREAD_MUTEX_LOCK      0x3004
-#define VG_USERREQ__PTHREAD_MUTEX_TRYLOCK   0x3005
-#define VG_USERREQ__PTHREAD_MUTEX_UNLOCK    0x3006
-#define VG_USERREQ__PTHREAD_CANCEL          0x3007
-#define VG_USERREQ__PTHREAD_EXIT            0x3008
-#define VG_USERREQ__PTHREAD_COND_WAIT       0x3009
-#define VG_USERREQ__PTHREAD_COND_TIMEDWAIT  0x300A
-#define VG_USERREQ__PTHREAD_COND_SIGNAL     0x300B
-#define VG_USERREQ__PTHREAD_COND_BROADCAST  0x300C
-#define VG_USERREQ__PTHREAD_KEY_CREATE      0x300D
-#define VG_USERREQ__PTHREAD_KEY_DELETE      0x300E
-#define VG_USERREQ__PTHREAD_SETSPECIFIC     0x300F
-#define VG_USERREQ__PTHREAD_GETSPECIFIC     0x3010
-#define VG_USERREQ__READ_MILLISECOND_TIMER  0x3011
-#define VG_USERREQ__PTHREAD_SIGMASK         0x3012
-#define VG_USERREQ__SIGWAIT                 0x3013
-#define VG_USERREQ__PTHREAD_KILL            0x3014
-#define VG_USERREQ__PTHREAD_YIELD           0x3015
+/* (Fn, Arg): Create a new thread and run Fn applied to Arg in it.  Fn
+   MUST NOT return -- ever.  Eventually it will do either __QUIT or
+   __WAIT_JOINER.  */
+#define VG_USERREQ__APPLY_IN_NEW_THREAD     0x3001
+
+/* ( no-args ): calling thread disappears from the system forever.
+   Reclaim resources. */
+#define VG_USERREQ__QUIT                    0x3002
+
+/* ( void* ): calling thread waits for joiner and returns the void* to
+   it. */
+#define VG_USERREQ__WAIT_JOINER             0x3003
+
+/* ( ThreadId, void** ): wait to join a thread. */
+#define VG_USERREQ__PTHREAD_JOIN            0x3004
+
+/* Set cancellation state and type for this thread. */
+#define VG_USERREQ__SET_CANCELSTATE         0x3005
+#define VG_USERREQ__SET_CANCELTYPE          0x3006
+
+/* ( no-args ): Test if we are at a cancellation point. */
+#define VG_USERREQ__TESTCANCEL              0x3007
+
+/* ( ThreadId, &thread_exit_wrapper is the only allowable arg ): call
+   with this arg to indicate that a cancel is now pending for the
+   specified thread. */
+#define VG_USERREQ__SET_CANCELPEND          0x3008
+
+/* Set/get detach state for this thread. */
+#define VG_USERREQ__SET_OR_GET_DETACH       0x3009
+
+#define VG_USERREQ__PTHREAD_GET_THREADID    0x300B
+#define VG_USERREQ__PTHREAD_MUTEX_LOCK      0x300C
+#define VG_USERREQ__PTHREAD_MUTEX_TRYLOCK   0x300D
+#define VG_USERREQ__PTHREAD_MUTEX_UNLOCK    0x300E
+#define VG_USERREQ__PTHREAD_COND_WAIT       0x300F
+#define VG_USERREQ__PTHREAD_COND_TIMEDWAIT  0x3010
+#define VG_USERREQ__PTHREAD_COND_SIGNAL     0x3011
+#define VG_USERREQ__PTHREAD_COND_BROADCAST  0x3012
+#define VG_USERREQ__PTHREAD_KEY_CREATE      0x3013
+#define VG_USERREQ__PTHREAD_KEY_DELETE      0x3014
+#define VG_USERREQ__PTHREAD_SETSPECIFIC     0x3015
+#define VG_USERREQ__PTHREAD_GETSPECIFIC     0x3016
+#define VG_USERREQ__READ_MILLISECOND_TIMER  0x3017
+#define VG_USERREQ__PTHREAD_SIGMASK         0x3018
+#define VG_USERREQ__SIGWAIT                 0x3019
+#define VG_USERREQ__PTHREAD_KILL            0x301A
+#define VG_USERREQ__PTHREAD_YIELD           0x301B
 
 /* Cosmetic ... */
 #define VG_USERREQ__GET_PTHREAD_TRACE_LEVEL 0x3101
@@ -444,7 +477,6 @@
 /* 
 In vg_constants.h:
 #define VG_USERREQ__SIGNAL_RETURNS          0x4001
-#define VG_USERREQ__PTHREAD_RETURNS         0x4002
 */
 
 
@@ -506,10 +538,6 @@
          the mutex finally gets unblocked. */
       ThreadStatus status;
 
-      /* Identity of joiner (thread who called join on me), or
-         VG_INVALID_THREADID if no one asked to join yet. */
-      ThreadId joiner;
-
       /* When .status == WaitMX, points to the mutex I am waiting for.
          When .status == WaitCV, points to the mutex associated with
          the condition variable indicated by the .associated_cv field.
@@ -529,8 +557,26 @@
          pthread_cond_wait. */
       UInt awaken_at;
 
-      /* return value */
-      void* retval;
+      /* If VgTs_WaitJoiner, return value, as generated by joinees. */
+      void* joinee_retval;
+
+      /* If VgTs_WaitJoinee, place to copy the return value to, and
+         the identity of the thread we're waiting for. */
+      void**   joiner_thread_return;
+      ThreadId joiner_jee_tid;      
+
+      /* Cancelability state and type. */
+      Bool cancel_st; /* False==PTH_CANCEL_DISABLE; True==.._ENABLE */
+      Bool cancel_ty; /* False==PTH_CANC_ASYNCH; True==..._DEFERRED */
+     
+      /* Pointer to fn to call to do cancellation.  Indicates whether
+         or not cancellation is pending.  If NULL, not pending.  Else
+         should be &thread_exit_wrapper(), indicating that
+         cancallation is pending. */
+      void (*cancel_pend)(void*);
+
+      /* Whether or not detached. */
+      Bool detached;
 
       /* thread-specific data */
       void* specifics[VG_N_THREAD_KEYS];
@@ -1694,9 +1740,9 @@
 extern void VG_(helper_value_check1_fail);
 extern void VG_(helper_value_check0_fail);
 
-/* NOT FUNCTIONS; these are bogus RETURN ADDRESS. */
+/* NOT A FUNCTION; this is a bogus RETURN ADDRESS. */
 extern void VG_(signalreturn_bogusRA)( void );
-extern void VG_(pthreadreturn_bogusRA)( void );
+
 
 /* ---------------------------------------------------------------------
    Exports of vg_cachesim.c
diff --git a/vg_libpthread.c b/vg_libpthread.c
index 8d30fcf..5211106 100644
--- a/vg_libpthread.c
+++ b/vg_libpthread.c
@@ -268,6 +268,92 @@
    return 0;
 }
 
+/* --------------------------------------------------- 
+   Helper functions for running a thread 
+   and for clearing up afterwards.
+   ------------------------------------------------ */
+
+/* All exiting threads eventually pass through here, bearing the
+   return value, or PTHREAD_CANCELED, in ret_val. */
+static
+__attribute__((noreturn))
+void thread_exit_wrapper ( void* ret_val )
+{
+   int detached, res;
+   /* Run this thread's cleanup handlers. */
+   /* Run this thread's key finalizers. */
+
+   /* Decide on my final disposition. */
+   VALGRIND_MAGIC_SEQUENCE(detached, (-1) /* default */,
+                           VG_USERREQ__SET_OR_GET_DETACH, 
+                           2 /* get */, 0, 0, 0);
+   assert(detached == 0 || detached == 1);
+
+   if (detached) {
+      /* Detached; I just quit right now. */
+      VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
+                              VG_USERREQ__QUIT, 0, 0, 0, 0);
+   } else {
+      /* Not detached; so I wait for a joiner. */
+      VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
+                              VG_USERREQ__WAIT_JOINER, ret_val, 0, 0, 0);
+   }
+   /* NOTREACHED */
+   barf("thread_exit_wrapper: still alive?!");
+}
+
+
+/* This function is a wrapper function for running a thread.  It runs
+   the root function specified in pthread_create, and then, should the
+   root function return a value, it arranges to run the thread's
+   cleanup handlers and exit correctly. */
+
+/* Struct used to convey info from pthread_create to
+   thread_wrapper. */
+typedef
+   struct {
+      pthread_attr_t* attr;
+      void* (*root_fn) ( void* );
+      void* arg;
+   }
+   NewThreadInfo;
+
+
+/* This is passed to the VG_USERREQ__APPLY_IN_NEW_THREAD and so must
+   not return.  Note that this runs in the new thread, not the
+   parent. */
+static
+__attribute__((noreturn))
+void thread_wrapper ( NewThreadInfo* info )
+{
+   int res;
+   pthread_attr_t* attr;
+   void* (*root_fn) ( void* );
+   void* arg;
+   void* ret_val;
+
+   attr    = info->attr;
+   root_fn = info->root_fn;
+   arg     = info->arg;
+
+   if (attr)
+      kludged("pthread_create -- ignoring attributes");
+
+   /* Free up the arg block that pthread_create malloced. */
+   VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */,
+                           VG_USERREQ__FREE, info, 0, 0, 0);
+   assert(res == 0);
+
+   /* The root function might not return.  But if it does we simply
+      move along to thread_exit_wrapper.  All other ways out for the
+      thread (cancellation, or calling pthread_exit) lead there
+      too. */
+   ret_val = root_fn(arg);
+   thread_exit_wrapper(ret_val);
+   /* NOTREACHED */
+}
+
+
 /* ---------------------------------------------------
    THREADs
    ------------------------------------------------ */
@@ -289,20 +375,38 @@
 }
 
 
+/* Bundle up the args into a malloc'd block and create a new thread
+   consisting of thread_wrapper() applied to said malloc'd block. */
 int
 pthread_create (pthread_t *__restrict __thread,
                 __const pthread_attr_t *__restrict __attr,
                 void *(*__start_routine) (void *),
                 void *__restrict __arg)
 {
-   int res;
-   ensure_valgrind("pthread_create");
-   VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
-                           VG_USERREQ__PTHREAD_CREATE,
-                           __thread, __attr, __start_routine, __arg);
-   return res;
-}
+   int            tid_child;
+   NewThreadInfo* info;
 
+   ensure_valgrind("pthread_create");
+
+   /* Allocate space for the arg block.  thread_wrapper will free
+      it. */
+   VALGRIND_MAGIC_SEQUENCE(info, NULL /* default */,
+                           VG_USERREQ__MALLOC, 
+                           sizeof(NewThreadInfo), 0, 0, 0);
+   assert(info != NULL);
+
+   info->attr    = (pthread_attr_t*)__attr;
+   info->root_fn = __start_routine;
+   info->arg     = __arg;
+   VALGRIND_MAGIC_SEQUENCE(tid_child, VG_INVALID_THREADID /* default */,
+                           VG_USERREQ__APPLY_IN_NEW_THREAD,
+                           &thread_wrapper, info, 0, 0);
+   assert(tid_child != VG_INVALID_THREADID);
+
+   if (__thread)
+      *__thread = tid_child;
+   return 0; /* success */
+}
 
 
 int 
@@ -319,14 +423,9 @@
 
 void pthread_exit(void *retval)
 {
-   int res;
    ensure_valgrind("pthread_exit");
-   VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
-                           VG_USERREQ__PTHREAD_EXIT,
-                           retval, 0, 0, 0);
-   /* Doesn't return! */
-   /* However, we have to fool gcc into knowing that. */
-   barf("pthread_exit: still alive after request?!");
+   /* Simple! */
+   thread_exit_wrapper(retval);
 }
 
 
@@ -345,9 +444,12 @@
 
 int pthread_detach(pthread_t th)
 {
-   static int moans = N_MOANS;
-   if (moans-- > 0) 
-      ignored("pthread_detach");
+   int res;
+   ensure_valgrind("pthread_detach");
+   VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */,
+                           VG_USERREQ__SET_OR_GET_DETACH,
+                           1 /* set */, 0, 0, 0);
+   assert(res == 0);
    return 0;
 }
 
@@ -601,17 +703,37 @@
 
 int pthread_setcancelstate(int state, int *oldstate)
 {
-   static int moans = N_MOANS;
-   if (moans-- > 0) 
-      ignored("pthread_setcancelstate");
+   int res;
+   ensure_valgrind("pthread_setcancelstate");
+   if (state != PTHREAD_CANCEL_ENABLE
+       && state != PTHREAD_CANCEL_DISABLE) 
+      return EINVAL;
+   assert(-1 != PTHREAD_CANCEL_ENABLE);
+   assert(-1 != PTHREAD_CANCEL_DISABLE);
+   VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */,
+                           VG_USERREQ__SET_CANCELSTATE,
+                           state, 0, 0, 0);
+   assert(res != -1);
+   if (oldstate) 
+      *oldstate = res;
    return 0;
 }
 
 int pthread_setcanceltype(int type, int *oldtype)
 {
-   static int moans = N_MOANS;
-   if (moans-- > 0) 
-      ignored("pthread_setcanceltype");
+   int res;
+   ensure_valgrind("pthread_setcanceltype");
+   if (type != PTHREAD_CANCEL_DEFERRED
+       && type != PTHREAD_CANCEL_ASYNCHRONOUS) 
+      return EINVAL;
+   assert(-1 != PTHREAD_CANCEL_DEFERRED);
+   assert(-1 != PTHREAD_CANCEL_ASYNCHRONOUS);
+   VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */,
+                           VG_USERREQ__SET_CANCELTYPE,
+                           type, 0, 0, 0);
+   assert(res != -1);
+   if (oldtype) 
+      *oldtype = res;
    return 0;
 }
 
@@ -619,16 +741,24 @@
 {
    int res;
    ensure_valgrind("pthread_cancel");
-   VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
-                           VG_USERREQ__PTHREAD_CANCEL,
-                           thread, 0, 0, 0);
+   VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */,
+                           VG_USERREQ__SET_CANCELPEND,
+                           thread, &thread_exit_wrapper, 0, 0);
+   assert(res != -1);
    return res;
 }
 
+__inline__
 void pthread_testcancel(void)
 {
+   int res;
+   VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */,
+                           VG_USERREQ__TESTCANCEL,
+                           0, 0, 0, 0);
+   assert(res == 0);
 }
 
+
 /*-------------------*/
 static pthread_mutex_t massacre_mx = PTHREAD_MUTEX_INITIALIZER;
 
@@ -1598,7 +1728,6 @@
 
 /* This is a terrible way to do the remapping.  Plan is to import an
    AVL tree at some point. */
-#define VG_N_SEMAPHORES 50
 
 typedef
    struct {
@@ -1772,8 +1901,6 @@
  * initialize/create and destroy/free the reader/writer lock.
  */
 
-#define VG_N_RWLOCKS 50
-
 /*
  * Structure describing a read-write lock.
  */
diff --git a/vg_scheduler.c b/vg_scheduler.c
index c4876f8..7dbb89b 100644
--- a/vg_scheduler.c
+++ b/vg_scheduler.c
@@ -213,9 +213,10 @@
       switch (VG_(threads)[i].status) {
          case VgTs_Runnable:   VG_(printf)("Runnable"); break;
          case VgTs_WaitFD:     VG_(printf)("WaitFD"); break;
-         case VgTs_WaitJoiner: VG_(printf)("WaitJoiner(%d)", 
-                                           VG_(threads)[i].joiner); break;
-         case VgTs_WaitJoinee: VG_(printf)("WaitJoinee"); break;
+         case VgTs_WaitJoinee: VG_(printf)("WaitJoinee(%d)", 
+                                           VG_(threads)[i].joiner_jee_tid);
+                               break;
+         case VgTs_WaitJoiner: VG_(printf)("WaitJoiner"); break;
          case VgTs_Sleeping:   VG_(printf)("Sleeping"); break;
          case VgTs_WaitMX:     VG_(printf)("WaitMX"); break;
          case VgTs_WaitCV:     VG_(printf)("WaitCV"); break;
@@ -506,6 +507,30 @@
 }
 
 
+static 
+void mostly_clear_thread_record ( ThreadId tid )
+{
+   Int j;
+   vg_assert(tid >= 0 && tid < VG_N_THREADS);
+   VG_(threads)[tid].tid                  = tid;
+   VG_(threads)[tid].status               = VgTs_Empty;
+   VG_(threads)[tid].associated_mx        = NULL;
+   VG_(threads)[tid].associated_cv        = NULL;
+   VG_(threads)[tid].awaken_at            = 0;
+   VG_(threads)[tid].joinee_retval        = NULL;
+   VG_(threads)[tid].joiner_thread_return = NULL;
+   VG_(threads)[tid].joiner_jee_tid       = VG_INVALID_THREADID;
+   VG_(threads)[tid].cancel_st   = True; /* PTHREAD_CANCEL_ENABLE */
+   VG_(threads)[tid].cancel_ty   = True; /* PTHREAD_CANCEL_DEFERRED */
+   VG_(threads)[tid].cancel_pend = NULL; /* not pending */
+   VG_(threads)[tid].detached             = False;
+   VG_(ksigemptyset)(&VG_(threads)[tid].sig_mask);
+   VG_(ksigemptyset)(&VG_(threads)[tid].sigs_waited_for);
+   for (j = 0; j < VG_N_THREAD_KEYS; j++)
+      VG_(threads)[tid].specifics[j] = NULL;
+}
+
+
 /* Initialise the scheduler.  Create a single "main" thread ready to
    run, with special ThreadId of one.  This is called at startup; the
    caller takes care to park the client's state is parked in
@@ -531,12 +556,10 @@
    }
 
    for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) {
-      VG_(threads)[i].status     = VgTs_Empty;
-      VG_(threads)[i].stack_size = 0;
-      VG_(threads)[i].stack_base = (Addr)NULL;
-      VG_(threads)[i].tid        = i;
-      VG_(ksigemptyset)(&VG_(threads)[i].sig_mask);
-      VG_(ksigemptyset)(&VG_(threads)[i].sigs_waited_for);
+      mostly_clear_thread_record(i);
+      VG_(threads)[i].stack_size           = 0;
+      VG_(threads)[i].stack_base           = (Addr)NULL;
+      VG_(threads)[i].stack_highest_word   = (Addr)NULL;
    }
 
    for (i = 0; i < VG_N_WAITING_FDS; i++)
@@ -551,14 +574,7 @@
       properties. */
    tid_main = vg_alloc_ThreadState();
    vg_assert(tid_main == 1); 
-
-   VG_(threads)[tid_main].status        = VgTs_Runnable;
-   VG_(threads)[tid_main].joiner        = VG_INVALID_THREADID;
-   VG_(threads)[tid_main].associated_mx = NULL;
-   VG_(threads)[tid_main].associated_cv = NULL;
-   VG_(threads)[tid_main].retval        = NULL; /* not important */
-   for (i = 0; i < VG_N_THREAD_KEYS; i++)
-      VG_(threads)[tid_main].specifics[i] = NULL;
+   VG_(threads)[tid_main].status = VgTs_Runnable;
 
    /* Copy VG_(baseBlock) state to tid_main's slot. */
    vg_tid_currently_in_baseBlock = tid_main;
@@ -1544,9 +1560,38 @@
 
 
 /* -----------------------------------------------------------
-   Thread CREATION, JOINAGE and CANCELLATION.
+   Thread CREATION, JOINAGE and CANCELLATION: HELPER FNS
    -------------------------------------------------------- */
 
+/* We've decided to action a cancellation on tid.  Make it jump to
+   thread_exit_wrapper() in vg_libpthread.c, passing PTHREAD_CANCELED
+   as the arg. */
+static
+void make_thread_jump_to_cancelhdlr ( ThreadId tid )
+{
+   Char msg_buf[100];
+   vg_assert(VG_(is_valid_tid)(tid));
+   /* Push PTHREAD_CANCELED on the stack and jump to the cancellation
+      handler -- which is really thread_exit_wrapper() in
+      vg_libpthread.c. */
+   vg_assert(VG_(threads)[tid].cancel_pend != NULL);
+   VG_(threads)[tid].m_esp -= 4;
+   * (UInt*)(VG_(threads)[tid].m_esp) = (UInt)PTHREAD_CANCELED;
+   VG_(threads)[tid].m_eip = (UInt)VG_(threads)[tid].cancel_pend;
+   VG_(threads)[tid].status = VgTs_Runnable;
+   /* Make sure we aren't cancelled again whilst handling this
+      cancellation. */
+   VG_(threads)[tid].cancel_st = False;
+   if (VG_(clo_trace_sched)) {
+      VG_(sprintf)(msg_buf, 
+         "jump to cancellation handler (hdlr = %p)", 
+         VG_(threads)[tid].cancel_pend);
+      print_sched_event(tid, msg_buf);
+   }
+}
+
+
+
 /* Release resources and generally clean up once a thread has finally
    disappeared. */
 static
@@ -1567,6 +1612,61 @@
 }
 
 
+/* Look for matching pairs of threads waiting for joiners and threads
+   waiting for joinees.  For each such pair copy the return value of
+   the joinee into the joiner, let the joiner resume and discard the
+   joinee. */
+static
+void maybe_rendezvous_joiners_and_joinees ( void )
+{
+   Char     msg_buf[100];
+   void**   thread_return;
+   ThreadId jnr, jee;
+
+   for (jnr = 1; jnr < VG_N_THREADS; jnr++) {
+      if (VG_(threads)[jnr].status != VgTs_WaitJoinee)
+         continue;
+      jee = VG_(threads)[jnr].joiner_jee_tid;
+      if (jee == VG_INVALID_THREADID) 
+         continue;
+      vg_assert(VG_(is_valid_tid)(jee));
+      if (VG_(threads)[jee].status != VgTs_WaitJoiner)
+         continue;
+      /* ok!  jnr is waiting to join with jee, and jee is waiting to be
+         joined by ... well, any thread.  So let's do it! */
+
+      /* Copy return value to where joiner wants it. */
+      thread_return = VG_(threads)[jnr].joiner_thread_return;
+      if (thread_return != NULL) {
+         /* CHECK thread_return writable */
+         *thread_return = VG_(threads)[jee].joinee_retval;
+         /* Not really right, since it makes the thread's return value
+            appear to be defined even if it isn't. */
+         if (VG_(clo_instrument))
+            VGM_(make_readable)( (Addr)thread_return, sizeof(void*) );
+      }
+
+      /* Joinee is discarded */
+      VG_(threads)[jee].status = VgTs_Empty; /* bye! */
+      cleanup_after_thread_exited ( jee );
+         if (VG_(clo_trace_sched)) {
+            VG_(sprintf)(msg_buf,
+               "rendezvous with joinee %d.  %d resumes, %d exits.",
+               jee, jnr, jee );
+         print_sched_event(jnr, msg_buf);
+      }
+
+      /* joiner returns with success */
+      VG_(threads)[jnr].status = VgTs_Runnable;
+      SET_EDX(jnr, 0);
+   }
+}
+
+
+/* -----------------------------------------------------------
+   Thread CREATION, JOINAGE and CANCELLATION: REQUESTS
+   -------------------------------------------------------- */
+
 static
 void do_pthread_yield ( ThreadId tid )
 {
@@ -1582,122 +1682,117 @@
 
 
 static
-void do_pthread_cancel ( ThreadId  tid,
-                         pthread_t tid_cancellee )
+void do__testcancel ( ThreadId tid )
 {
-   Char msg_buf[100];
-
    vg_assert(VG_(is_valid_tid)(tid));
-   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
-
-   if (!VG_(is_valid_tid)(tid_cancellee)
-       || VG_(threads)[tid_cancellee].status == VgTs_Empty) {
-      SET_EDX(tid, ESRCH);
-      return;
-   }
-
-   /* We want make is appear that this thread has returned to
-      do_pthread_create_bogusRA with PTHREAD_CANCELED as the
-      return value.  So: simple: put PTHREAD_CANCELED into %EAX
-      and &do_pthread_create_bogusRA into %EIP and keep going! */
-   if (VG_(clo_trace_sched)) {
-      VG_(sprintf)(msg_buf, "cancelled by %d", tid);
-      print_sched_event(tid_cancellee, msg_buf);
-   }
-   VG_(threads)[tid_cancellee].m_eax  = (UInt)PTHREAD_CANCELED;
-   VG_(threads)[tid_cancellee].m_eip  = (UInt)&VG_(pthreadreturn_bogusRA);
-   VG_(threads)[tid_cancellee].status = VgTs_Runnable;
-
-   /* We return with success (0). */
-   SET_EDX(tid, 0);
-}
-
-
-static
-void do_pthread_exit ( ThreadId tid, void* retval )
-{
-   Char msg_buf[100];
-   /* We want make is appear that this thread has returned to
-      do_pthread_create_bogusRA with retval as the
-      return value.  So: simple: put retval into %EAX
-      and &do_pthread_create_bogusRA into %EIP and keep going! */
-   if (VG_(clo_trace_sched)) {
-      VG_(sprintf)(msg_buf, "exiting with %p", retval);
-      print_sched_event(tid, msg_buf);
-   }
-   VG_(threads)[tid].m_eax  = (UInt)retval;
-   VG_(threads)[tid].m_eip  = (UInt)&VG_(pthreadreturn_bogusRA);
-   VG_(threads)[tid].status = VgTs_Runnable;
-}
-
-
-/* Thread tid is exiting, by returning from the function it was
-   created with.  Or possibly due to pthread_exit or cancellation.
-   The main complication here is to resume any thread waiting to join
-   with this one. */
-static 
-void handle_pthread_return ( ThreadId tid, void* retval )
-{
-   ThreadId jnr; /* joiner, the thread calling pthread_join. */
-   UInt*    jnr_args;
-   void**   jnr_thread_return;
-   Char     msg_buf[100];
-
-   /* Mark it as not in use.  Leave the stack in place so the next
-      user of this slot doesn't reallocate it. */
-   vg_assert(VG_(is_valid_tid)(tid));
-   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
-
-   VG_(threads)[tid].retval = retval;
-
-   if (VG_(threads)[tid].joiner == VG_INVALID_THREADID) {
-      /* No one has yet done a join on me */
-      VG_(threads)[tid].status = VgTs_WaitJoiner;
-      if (VG_(clo_trace_sched)) {
-         VG_(sprintf)(msg_buf, 
-            "root fn returns, waiting for a call pthread_join(%d)", 
-            tid);
-         print_sched_event(tid, msg_buf);
-      }
+   if (/* is there a cancellation pending on this thread? */
+       VG_(threads)[tid].cancel_pend != NULL
+       && /* is this thread accepting cancellations? */
+          VG_(threads)[tid].cancel_st) {
+     /* Ok, let's do the cancellation. */
+     make_thread_jump_to_cancelhdlr ( tid );
    } else {
-      /* Some is waiting; make their join call return with success,
-         putting my exit code in the place specified by the caller's
-         thread_return param.  This is all very horrible, since we
-         need to consult the joiner's arg block -- pointed to by its
-         %EAX -- in order to extract the 2nd param of its pthread_join
-         call.  TODO: free properly the slot (also below). 
-      */
-      jnr = VG_(threads)[tid].joiner;
-      vg_assert(VG_(is_valid_tid)(jnr));
-      vg_assert(VG_(threads)[jnr].status == VgTs_WaitJoinee);
-      jnr_args = (UInt*)VG_(threads)[jnr].m_eax;
-      jnr_thread_return = (void**)(jnr_args[2]);
-      if (jnr_thread_return != NULL)
-         *jnr_thread_return = VG_(threads)[tid].retval;
-      SET_EDX(jnr, 0); /* success */
-      VG_(threads)[jnr].status = VgTs_Runnable;
-      VG_(threads)[tid].status = VgTs_Empty; /* bye! */
-      cleanup_after_thread_exited ( tid );
-      if (VG_(clo_trace_sched)) {
-         VG_(sprintf)(msg_buf, 
-            "root fn returns, to find a waiting pthread_join(%d)", tid);
-         print_sched_event(tid, msg_buf);
-         VG_(sprintf)(msg_buf, 
-            "my pthread_join(%d) returned; resuming", tid);
-         print_sched_event(jnr, msg_buf);
-      }
+      /* No, we keep going. */
+      SET_EDX(tid, 0);
    }
-
-   /* Return value is irrelevant; this thread will not get
-      rescheduled. */
 }
 
 
 static
-void do_pthread_join ( ThreadId tid, ThreadId jee, void** thread_return )
+void do__set_cancelstate ( ThreadId tid, Int state )
+{
+   Bool old_st;
+   vg_assert(VG_(is_valid_tid)(tid));
+   old_st = VG_(threads)[tid].cancel_st;
+   if (state == PTHREAD_CANCEL_ENABLE) {
+      VG_(threads)[tid].cancel_st = True;
+   } else
+   if (state == PTHREAD_CANCEL_DISABLE) {
+      VG_(threads)[tid].cancel_st = False;
+   } else {
+      VG_(panic)("do__set_cancelstate");
+   }
+   SET_EDX(tid, old_st ? PTHREAD_CANCEL_ENABLE 
+                       : PTHREAD_CANCEL_DISABLE);
+}
+
+
+static
+void do__set_canceltype ( ThreadId tid, Int type )
+{
+   Bool old_ty;
+   vg_assert(VG_(is_valid_tid)(tid));
+   old_ty = VG_(threads)[tid].cancel_ty;
+   if (type == PTHREAD_CANCEL_ASYNCHRONOUS) {
+      VG_(threads)[tid].cancel_ty = False;
+   } else
+   if (type == PTHREAD_CANCEL_DEFERRED) {
+      VG_(threads)[tid].cancel_st = True;
+   } else {
+      VG_(panic)("do__set_canceltype");
+   }
+   SET_EDX(tid, old_ty ? PTHREAD_CANCEL_DEFERRED 
+                       : PTHREAD_CANCEL_ASYNCHRONOUS);
+}
+
+
+static
+void do__set_or_get_detach ( ThreadId tid, Int what )
+{
+   vg_assert(VG_(is_valid_tid)(tid));
+   switch (what) {
+      case 2: /* get */
+         SET_EDX(tid, VG_(threads)[tid].detached ? 1 : 0);
+         return;
+      case 1: /* set detached */
+         VG_(threads)[tid].detached = True;
+         SET_EDX(tid, 0); 
+         return;
+      case 0: /* set not detached */
+         VG_(threads)[tid].detached = False;
+         SET_EDX(tid, 0);
+         return;
+      default:
+         VG_(panic)("do__set_or_get_detach");
+   }
+}
+
+
+static
+void do__set_cancelpend ( ThreadId tid, 
+                          ThreadId cee,
+			  void (*cancelpend_hdlr)(void*) )
 {
    Char msg_buf[100];
 
+   vg_assert(VG_(is_valid_tid)(tid));
+   vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
+
+   vg_assert(VG_(is_valid_tid)(cee));
+
+   VG_(threads)[cee].cancel_pend = cancelpend_hdlr;
+
+   if (VG_(clo_trace_sched)) {
+      VG_(sprintf)(msg_buf, 
+         "set cancel pending (hdlr = %p, canceller tid = %d)", 
+         cancelpend_hdlr, tid);
+      print_sched_event(cee, msg_buf);
+   }
+
+   /* Thread doing the cancelling returns with success. */
+   SET_EDX(tid, 0);
+
+   /* Perhaps we can nuke the cancellee right now? */
+   do__testcancel(cee);
+}
+
+
+static
+void do_pthread_join ( ThreadId tid, 
+                       ThreadId jee, void** thread_return )
+{
+   Char     msg_buf[100];
+   ThreadId i;
    /* jee, the joinee, is the thread specified as an arg in thread
       tid's call to pthread_join.  So tid is the join-er. */
    vg_assert(VG_(is_valid_tid)(tid));
@@ -1709,6 +1804,11 @@
       return;
    }
 
+   /* Flush any completed pairs, so as to make sure what we're looking
+      at is up-to-date. */
+   maybe_rendezvous_joiners_and_joinees();
+
+   /* Is this a sane request? */
    if (jee < 0 
        || jee >= VG_N_THREADS
        || VG_(threads)[jee].status == VgTs_Empty) {
@@ -1718,63 +1818,94 @@
       return;
    }
 
-   if (VG_(threads)[jee].joiner != VG_INVALID_THREADID) {
-      /* Someone already did join on this thread */
-      SET_EDX(tid, EINVAL);
-      VG_(threads)[tid].status = VgTs_Runnable;
-      return;
+   /* Is anyone else already in a join-wait for jee? */
+   for (i = 1; i < VG_N_THREADS; i++) {
+      if (i == tid) continue;
+      if (VG_(threads)[i].status == VgTs_WaitJoinee
+          && VG_(threads)[i].joiner_jee_tid == jee) {
+         /* Someone already did join on this thread */
+         SET_EDX(tid, EINVAL);
+         VG_(threads)[tid].status = VgTs_Runnable;
+         return;
+      }
    }
 
-   /* if (VG_(threads)[jee].detached) ... */
-
-   /* Perhaps the joinee has already finished?  If so return
-      immediately with its return code, and free up the slot. TODO:
-      free it properly (also above). */
-   if (VG_(threads)[jee].status == VgTs_WaitJoiner) {
-      vg_assert(VG_(threads)[jee].joiner == VG_INVALID_THREADID);
-      SET_EDX(tid, 0); /* success */
-      if (thread_return != NULL) {
-         *thread_return = VG_(threads)[jee].retval;
-	 /* Not really right, since it makes the thread's return value
-            appear to be defined even if it isn't. */
-         if (VG_(clo_instrument))
-            VGM_(make_readable)( (Addr)thread_return, sizeof(void*) );
-      }
-      VG_(threads)[tid].status = VgTs_Runnable;
-      VG_(threads)[jee].status = VgTs_Empty; /* bye! */
-      cleanup_after_thread_exited ( jee );
-      if (VG_(clo_trace_sched)) {
-	 VG_(sprintf)(msg_buf,
-		      "someone called pthread_join() on me; bye!");
-         print_sched_event(jee, msg_buf);
-	 VG_(sprintf)(msg_buf,
-            "my pthread_join(%d) returned immediately", 
-            jee );
-         print_sched_event(tid, msg_buf);
-      }
-      return;
-   }
-
-   /* Ok, so we'll have to wait on jee. */
-   VG_(threads)[jee].joiner = tid;
+   /* Mark this thread as waiting for the joinee. */
    VG_(threads)[tid].status = VgTs_WaitJoinee;
+   VG_(threads)[tid].joiner_thread_return = thread_return;
+   VG_(threads)[tid].joiner_jee_tid = jee;
+
+   /* Look for matching joiners and joinees and do the right thing. */
+   maybe_rendezvous_joiners_and_joinees();
+
+   /* Return value is irrelevant since this this thread becomes
+      non-runnable.  maybe_resume_joiner() will cause it to return the
+      right value when it resumes. */
+
    if (VG_(clo_trace_sched)) {
-      VG_(sprintf)(msg_buf,
-         "blocking on call of pthread_join(%d)", jee );
+      VG_(sprintf)(msg_buf, 
+         "wait for joinee %d (may already be ready)", jee);
       print_sched_event(tid, msg_buf);
    }
-   /* So tid's join call does not return just now. */
 }
 
 
+/* ( void* ): calling thread waits for joiner and returns the void* to
+   it.  This is one of two ways in which a thread can finally exit --
+   the other is do__quit. */
 static
-void do_pthread_create ( ThreadId parent_tid,
-                         pthread_t* thread, 
-                         pthread_attr_t* attr, 
-                         void* (*start_routine)(void *), 
-                         void* arg )
+void do__wait_joiner ( ThreadId tid, void* retval )
 {
-   Int      i;
+   Char msg_buf[100];
+   vg_assert(VG_(is_valid_tid)(tid));
+   vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
+   if (VG_(clo_trace_sched)) {
+      VG_(sprintf)(msg_buf, 
+         "WAIT_JOINER(%p) (non-detached thread exit)", retval);
+      print_sched_event(tid, msg_buf);
+   }
+   VG_(threads)[tid].status = VgTs_WaitJoiner;
+   VG_(threads)[tid].joinee_retval = retval;
+   maybe_rendezvous_joiners_and_joinees();
+}
+
+
+/* ( no-args ): calling thread disappears from the system forever.
+   Reclaim resources. */
+static
+void do__quit ( ThreadId tid )
+{
+   Char msg_buf[100];
+   vg_assert(VG_(is_valid_tid)(tid));
+   vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
+   VG_(threads)[tid].status = VgTs_Empty; /* bye! */
+   cleanup_after_thread_exited ( tid );
+
+   if (VG_(clo_trace_sched)) {
+      VG_(sprintf)(msg_buf, "QUIT (detached thread exit)");
+      print_sched_event(tid, msg_buf);
+   }
+   /* Return value is irrelevant; this thread will not get
+      rescheduled. */
+}
+
+
+/* Should never be entered.  If it is, will be on the simulated
+   CPU. */
+static 
+void do__apply_in_new_thread_bogusRA ( void )
+{
+   VG_(panic)("do__apply_in_new_thread_bogusRA");
+}
+
+/* (Fn, Arg): Create a new thread and run Fn applied to Arg in it.  Fn
+   MUST NOT return -- ever.  Eventually it will do either __QUIT or
+   __WAIT_JOINER.  Return the child tid to the parent. */
+static
+void do__apply_in_new_thread ( ThreadId parent_tid,
+                               void* (*fn)(void *), 
+                               void* arg )
+{
    Addr     new_stack;
    UInt     new_stk_szb;
    ThreadId tid;
@@ -1829,15 +1960,16 @@
    VG_(threads)[tid].m_esp -= 4;
    * (UInt*)(VG_(threads)[tid].m_esp) = (UInt)arg;
 
-   /* push (magical) return address */
+   /* push (bogus) return address */
    VG_(threads)[tid].m_esp -= 4;
-   * (UInt*)(VG_(threads)[tid].m_esp) = (UInt)VG_(pthreadreturn_bogusRA);
+   * (UInt*)(VG_(threads)[tid].m_esp) 
+      = (UInt)&do__apply_in_new_thread_bogusRA;
 
    if (VG_(clo_instrument))
       VGM_(make_readable)( VG_(threads)[tid].m_esp, 2 * 4 );
 
    /* this is where we start */
-   VG_(threads)[tid].m_eip = (UInt)start_routine;
+   VG_(threads)[tid].m_eip = (UInt)fn;
 
    if (VG_(clo_trace_sched)) {
       VG_(sprintf)(msg_buf,
@@ -1845,27 +1977,18 @@
       print_sched_event(tid, msg_buf);
    }
 
-   /* store the thread id in *thread. */
-   //   if (VG_(clo_instrument))
-   // ***** CHECK *thread is writable
-   *thread = (pthread_t)tid;
-   if (VG_(clo_instrument))
-      VGM_(make_readable)( (Addr)thread, sizeof(pthread_t) );
-
-   VG_(threads)[tid].associated_mx = NULL;
-   VG_(threads)[tid].associated_cv = NULL;
-   VG_(threads)[tid].joiner        = VG_INVALID_THREADID;
-   VG_(threads)[tid].status        = VgTs_Runnable;
-
-   for (i = 0; i < VG_N_THREAD_KEYS; i++)
-      VG_(threads)[tid].specifics[i] = NULL;
+   /* Create new thread with default attrs:
+      deferred cancellation, not detached 
+   */
+   mostly_clear_thread_record(tid);
+   VG_(threads)[tid].status = VgTs_Runnable;
 
    /* We inherit our parent's signal mask. */
    VG_(threads)[tid].sig_mask = VG_(threads)[parent_tid].sig_mask;
-   VG_(ksigemptyset)(&VG_(threads)[i].sigs_waited_for);
+   VG_(ksigemptyset)(&VG_(threads)[tid].sigs_waited_for);
 
-   /* return zero */
-   SET_EDX(parent_tid, 0); /* success */
+   /* return child's tid to parent */
+   SET_EDX(parent_tid, tid); /* success */
 }
 
 
@@ -2625,30 +2748,10 @@
    UInt  req_no = arg[0];
    switch (req_no) {
 
-      case VG_USERREQ__PTHREAD_CREATE:
-         do_pthread_create( tid, 
-                            (pthread_t*)arg[1], 
-                            (pthread_attr_t*)arg[2], 
-                            (void*(*)(void*))arg[3], 
-                            (void*)arg[4] );
-         break;
-
-      case VG_USERREQ__PTHREAD_RETURNS:
-         handle_pthread_return( tid, (void*)arg[1] );
-         break;
-
       case VG_USERREQ__PTHREAD_JOIN:
          do_pthread_join( tid, arg[1], (void**)(arg[2]) );
          break;
 
-      case VG_USERREQ__PTHREAD_CANCEL:
-         do_pthread_cancel( tid, (pthread_t)(arg[1]) );
-         break;
-
-      case VG_USERREQ__PTHREAD_EXIT:
-         do_pthread_exit( tid, (void*)(arg[1]) );
-         break;
-
       case VG_USERREQ__PTHREAD_COND_WAIT:
          do_pthread_cond_wait( tid, 
                                (pthread_cond_t *)(arg[1]),