Finally get around to implementing pthread_atfork.


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@473 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/coregrind/arch/x86-linux/vg_libpthread.c b/coregrind/arch/x86-linux/vg_libpthread.c
index cca2d88..c3a11b2 100644
--- a/coregrind/arch/x86-linux/vg_libpthread.c
+++ b/coregrind/arch/x86-linux/vg_libpthread.c
@@ -1269,13 +1269,45 @@
    MISC
    ------------------------------------------------ */
 
+static pthread_mutex_t pthread_atfork_lock 
+   = PTHREAD_MUTEX_INITIALIZER;
+
 int __pthread_atfork ( void (*prepare)(void),
                        void (*parent)(void),
                        void (*child)(void) )
 {
-   /* We have to do this properly or not at all; faking it isn't an
-      option. */
-   vgPlain_unimp("__pthread_atfork");
+   int n, res;
+   ForkHandlerEntry entry;
+
+   ensure_valgrind("pthread_atfork");
+   __pthread_mutex_lock(&pthread_atfork_lock);
+
+   /* Fetch old counter */
+   VALGRIND_MAGIC_SEQUENCE(n, -2 /* default */,
+                           VG_USERREQ__GET_FHSTACK_USED,
+                           0, 0, 0, 0);
+   my_assert(n >= 0 && n < VG_N_FORKHANDLERSTACK);
+   if (n == VG_N_FORKHANDLERSTACK-1)
+      barf("pthread_atfork: VG_N_FORKHANDLERSTACK is too low; "
+           "increase and recompile");
+
+   /* Add entry */
+   entry.prepare = *prepare;
+   entry.parent  = *parent;
+   entry.child   = *child;   
+   VALGRIND_MAGIC_SEQUENCE(res, -2 /* default */,
+                           VG_USERREQ__SET_FHSTACK_ENTRY,
+                           n, &entry, 0, 0);
+   my_assert(res == 0);
+
+   /* Bump counter */
+   VALGRIND_MAGIC_SEQUENCE(res, -2 /* default */,
+                           VG_USERREQ__SET_FHSTACK_USED,
+                           n+1, 0, 0, 0);
+   my_assert(res == 0);
+
+   __pthread_mutex_unlock(&pthread_atfork_lock);
+   return 0;
 }
 
 
@@ -1560,15 +1592,6 @@
 
 
 extern
-pid_t __libc_fork(void);
-pid_t __fork(void)
-{
-   __my_pthread_testcancel();
-   return __libc_fork();
-}
-
-
-extern
 pid_t __libc_waitpid(pid_t pid, int *status, int options);
 __attribute__((weak))
 pid_t waitpid(pid_t pid, int *status, int options)
@@ -1776,6 +1799,80 @@
 }
 
 
+/*--- fork and its helper ---*/
+
+static
+void run_fork_handlers ( int what )
+{
+   ForkHandlerEntry entry;
+   int n_h, n_handlers, i, res;
+
+   my_assert(what == 0 || what == 1 || what == 2);
+
+   /* Fetch old counter */
+   VALGRIND_MAGIC_SEQUENCE(n_handlers, -2 /* default */,
+                           VG_USERREQ__GET_FHSTACK_USED,
+                           0, 0, 0, 0);
+   my_assert(n_handlers >= 0 && n_handlers < VG_N_FORKHANDLERSTACK);
+
+   /* Prepare handlers (what == 0) are called in opposite order of
+      calls to pthread_atfork.  Parent and child handlers are called
+      in the same order as calls to pthread_atfork. */
+   if (what == 0)
+      n_h = n_handlers - 1;
+   else
+      n_h = 0;
+
+   for (i = 0; i < n_handlers; i++) {
+      VALGRIND_MAGIC_SEQUENCE(res, -2 /* default */,
+                              VG_USERREQ__GET_FHSTACK_ENTRY,
+                              n_h, &entry, 0, 0);
+      my_assert(res == 0);
+      switch (what) {
+         case 0:  if (entry.prepare) entry.prepare(); 
+                  n_h--; break;
+         case 1:  if (entry.parent) entry.parent(); 
+                  n_h++; break;
+         case 2:  if (entry.child) entry.child(); 
+                  n_h++; break;
+         default: barf("run_fork_handlers: invalid what");
+      }
+   }
+
+   if (what != 0 /* prepare */) {
+      /* Empty out the stack. */
+      VALGRIND_MAGIC_SEQUENCE(res, -2 /* default */,
+                              VG_USERREQ__SET_FHSTACK_USED,
+                              0, 0, 0, 0);
+      my_assert(res == 0);
+   }
+}
+
+extern
+pid_t __libc_fork(void);
+pid_t __fork(void)
+{
+   pid_t pid;
+   __my_pthread_testcancel();
+   __pthread_mutex_lock(&pthread_atfork_lock);
+
+   run_fork_handlers(0 /* prepare */);
+   pid = __libc_fork();
+   if (pid == 0) {
+      /* I am the child */
+      run_fork_handlers(2 /* child */);
+      __pthread_mutex_init(&pthread_atfork_lock, NULL);
+   } else {
+      /* I am the parent */
+      run_fork_handlers(1 /* parent */);
+      __pthread_mutex_unlock(&pthread_atfork_lock);
+   }
+   return pid;
+}
+
+
+
+
 /* ---------------------------------------------------------------------
    Nonblocking implementations of select() and poll().  This stuff will
    surely rot your mind.
diff --git a/coregrind/vg_include.h b/coregrind/vg_include.h
index 7264581..d489be3 100644
--- a/coregrind/vg_include.h
+++ b/coregrind/vg_include.h
@@ -162,6 +162,9 @@
 /* Number of entries in each thread's cleanup stack. */
 #define VG_N_CLEANUPSTACK 8
 
+/* Number of entries in each thread's fork-handler stack. */
+#define VG_N_FORKHANDLERSTACK 2
+
 
 /* ---------------------------------------------------------------------
    Basic types
@@ -510,8 +513,11 @@
    thread. */
 #define VG_USERREQ__GET_N_SIGS_RETURNED     0x3024
 
-
-
+/* Get/set entries for a thread's pthread_atfork stack. */
+#define VG_USERREQ__SET_FHSTACK_USED        0x3025
+#define VG_USERREQ__GET_FHSTACK_USED        0x3026
+#define VG_USERREQ__SET_FHSTACK_ENTRY       0x3027
+#define VG_USERREQ__GET_FHSTACK_ENTRY       0x3028
 
 /* Cosmetic ... */
 #define VG_USERREQ__GET_PTHREAD_TRACE_LEVEL 0x3101
@@ -570,7 +576,17 @@
       void* arg;
    }
    CleanupEntry;
- 
+
+/* An entry in a thread's fork-handler stack. */
+typedef
+   struct {
+      void (*prepare)(void);
+      void (*parent)(void);
+      void (*child)(void);
+   }
+   ForkHandlerEntry;
+
+
 typedef
    struct {
       /* ThreadId == 0 (and hence vg_threads[0]) is NEVER USED.
diff --git a/coregrind/vg_libpthread.c b/coregrind/vg_libpthread.c
index cca2d88..c3a11b2 100644
--- a/coregrind/vg_libpthread.c
+++ b/coregrind/vg_libpthread.c
@@ -1269,13 +1269,45 @@
    MISC
    ------------------------------------------------ */
 
+static pthread_mutex_t pthread_atfork_lock 
+   = PTHREAD_MUTEX_INITIALIZER;
+
 int __pthread_atfork ( void (*prepare)(void),
                        void (*parent)(void),
                        void (*child)(void) )
 {
-   /* We have to do this properly or not at all; faking it isn't an
-      option. */
-   vgPlain_unimp("__pthread_atfork");
+   int n, res;
+   ForkHandlerEntry entry;
+
+   ensure_valgrind("pthread_atfork");
+   __pthread_mutex_lock(&pthread_atfork_lock);
+
+   /* Fetch old counter */
+   VALGRIND_MAGIC_SEQUENCE(n, -2 /* default */,
+                           VG_USERREQ__GET_FHSTACK_USED,
+                           0, 0, 0, 0);
+   my_assert(n >= 0 && n < VG_N_FORKHANDLERSTACK);
+   if (n == VG_N_FORKHANDLERSTACK-1)
+      barf("pthread_atfork: VG_N_FORKHANDLERSTACK is too low; "
+           "increase and recompile");
+
+   /* Add entry */
+   entry.prepare = *prepare;
+   entry.parent  = *parent;
+   entry.child   = *child;   
+   VALGRIND_MAGIC_SEQUENCE(res, -2 /* default */,
+                           VG_USERREQ__SET_FHSTACK_ENTRY,
+                           n, &entry, 0, 0);
+   my_assert(res == 0);
+
+   /* Bump counter */
+   VALGRIND_MAGIC_SEQUENCE(res, -2 /* default */,
+                           VG_USERREQ__SET_FHSTACK_USED,
+                           n+1, 0, 0, 0);
+   my_assert(res == 0);
+
+   __pthread_mutex_unlock(&pthread_atfork_lock);
+   return 0;
 }
 
 
@@ -1560,15 +1592,6 @@
 
 
 extern
-pid_t __libc_fork(void);
-pid_t __fork(void)
-{
-   __my_pthread_testcancel();
-   return __libc_fork();
-}
-
-
-extern
 pid_t __libc_waitpid(pid_t pid, int *status, int options);
 __attribute__((weak))
 pid_t waitpid(pid_t pid, int *status, int options)
@@ -1776,6 +1799,80 @@
 }
 
 
+/*--- fork and its helper ---*/
+
+static
+void run_fork_handlers ( int what )
+{
+   ForkHandlerEntry entry;
+   int n_h, n_handlers, i, res;
+
+   my_assert(what == 0 || what == 1 || what == 2);
+
+   /* Fetch old counter */
+   VALGRIND_MAGIC_SEQUENCE(n_handlers, -2 /* default */,
+                           VG_USERREQ__GET_FHSTACK_USED,
+                           0, 0, 0, 0);
+   my_assert(n_handlers >= 0 && n_handlers < VG_N_FORKHANDLERSTACK);
+
+   /* Prepare handlers (what == 0) are called in opposite order of
+      calls to pthread_atfork.  Parent and child handlers are called
+      in the same order as calls to pthread_atfork. */
+   if (what == 0)
+      n_h = n_handlers - 1;
+   else
+      n_h = 0;
+
+   for (i = 0; i < n_handlers; i++) {
+      VALGRIND_MAGIC_SEQUENCE(res, -2 /* default */,
+                              VG_USERREQ__GET_FHSTACK_ENTRY,
+                              n_h, &entry, 0, 0);
+      my_assert(res == 0);
+      switch (what) {
+         case 0:  if (entry.prepare) entry.prepare(); 
+                  n_h--; break;
+         case 1:  if (entry.parent) entry.parent(); 
+                  n_h++; break;
+         case 2:  if (entry.child) entry.child(); 
+                  n_h++; break;
+         default: barf("run_fork_handlers: invalid what");
+      }
+   }
+
+   if (what != 0 /* prepare */) {
+      /* Empty out the stack. */
+      VALGRIND_MAGIC_SEQUENCE(res, -2 /* default */,
+                              VG_USERREQ__SET_FHSTACK_USED,
+                              0, 0, 0, 0);
+      my_assert(res == 0);
+   }
+}
+
+extern
+pid_t __libc_fork(void);
+pid_t __fork(void)
+{
+   pid_t pid;
+   __my_pthread_testcancel();
+   __pthread_mutex_lock(&pthread_atfork_lock);
+
+   run_fork_handlers(0 /* prepare */);
+   pid = __libc_fork();
+   if (pid == 0) {
+      /* I am the child */
+      run_fork_handlers(2 /* child */);
+      __pthread_mutex_init(&pthread_atfork_lock, NULL);
+   } else {
+      /* I am the parent */
+      run_fork_handlers(1 /* parent */);
+      __pthread_mutex_unlock(&pthread_atfork_lock);
+   }
+   return pid;
+}
+
+
+
+
 /* ---------------------------------------------------------------------
    Nonblocking implementations of select() and poll().  This stuff will
    surely rot your mind.
diff --git a/coregrind/vg_scheduler.c b/coregrind/vg_scheduler.c
index fc260c9..796ca7e 100644
--- a/coregrind/vg_scheduler.c
+++ b/coregrind/vg_scheduler.c
@@ -88,6 +88,11 @@
    LinuxThreads. */
 ThreadState VG_(threads)[VG_N_THREADS];
 
+/* The process' fork-handler stack. */
+static Int              vg_fhstack_used = 0;
+static ForkHandlerEntry vg_fhstack[VG_N_FORKHANDLERSTACK];
+
+
 /* The tid of the thread currently in VG_(baseBlock). */
 static Int vg_tid_currently_in_baseBlock = VG_INVALID_THREADID;
 
@@ -574,6 +579,8 @@
       vg_thread_keys[i].destructor = NULL;
    }
 
+   vg_fhstack_used = 0;
+
    /* Assert this is thread zero, which has certain magic
       properties. */
    tid_main = vg_alloc_ThreadState();
@@ -2848,6 +2855,118 @@
 }
 
 
+/* -----------------------------------------------------------
+   FORK HANDLERS.
+   -------------------------------------------------------- */
+
+static 
+void do__set_fhstack_used ( ThreadId tid, Int n )
+{
+   Char msg_buf[100];
+   if (VG_(clo_trace_sched)) {
+      VG_(sprintf)(msg_buf, "set_fhstack_used to %d", n );
+      print_pthread_event(tid, msg_buf);
+   }
+
+   vg_assert(VG_(is_valid_tid)(tid) 
+             && VG_(threads)[tid].status == VgTs_Runnable);
+
+   if (n >= 0 && n < VG_N_FORKHANDLERSTACK) {
+      vg_fhstack_used = n;
+      SET_EDX(tid, 0);
+   } else {
+      SET_EDX(tid, -1);
+   }
+}
+
+
+static
+void do__get_fhstack_used ( ThreadId tid )
+{
+   Int  n;
+   Char msg_buf[100];
+   if (VG_(clo_trace_sched)) {
+      VG_(sprintf)(msg_buf, "get_fhstack_used" );
+      print_pthread_event(tid, msg_buf);
+   }
+
+   vg_assert(VG_(is_valid_tid)(tid) 
+             && VG_(threads)[tid].status == VgTs_Runnable);
+
+   n = vg_fhstack_used;
+   vg_assert(n >= 0 && n < VG_N_FORKHANDLERSTACK);
+   SET_EDX(tid, n);
+}
+
+static
+void do__set_fhstack_entry ( ThreadId tid, Int n, ForkHandlerEntry* fh )
+{
+   Char msg_buf[100];
+   if (VG_(clo_trace_sched)) {
+      VG_(sprintf)(msg_buf, "set_fhstack_entry %d to %p", n, fh );
+      print_pthread_event(tid, msg_buf);
+   }
+
+   vg_assert(VG_(is_valid_tid)(tid) 
+             && VG_(threads)[tid].status == VgTs_Runnable);
+
+   if (VG_(clo_instrument)) {
+      /* check fh is addressible/defined */
+      if (!VGM_(check_readable)( (Addr)fh,
+                                 sizeof(ForkHandlerEntry), NULL)) {
+         VG_(record_pthread_err)( tid, 
+            "pthread_atfork: prepare/parent/child contains "
+            "unaddressible or undefined bytes");
+      }
+   }
+
+   if (n < 0 && n >= VG_N_FORKHANDLERSTACK) {
+      SET_EDX(tid, -1);
+      return;
+   } 
+
+   vg_fhstack[n] = *fh;
+   SET_EDX(tid, 0);
+}
+
+
+static
+void do__get_fhstack_entry ( ThreadId tid, Int n, /*OUT*/
+                                                  ForkHandlerEntry* fh )
+{
+   Char msg_buf[100];
+   if (VG_(clo_trace_sched)) {
+      VG_(sprintf)(msg_buf, "get_fhstack_entry %d", n );
+      print_pthread_event(tid, msg_buf);
+   }
+
+   vg_assert(VG_(is_valid_tid)(tid) 
+             && VG_(threads)[tid].status == VgTs_Runnable);
+
+   if (VG_(clo_instrument)) {
+      /* check fh is addressible/defined */
+      if (!VGM_(check_writable)( (Addr)fh,
+                                 sizeof(ForkHandlerEntry), NULL)) {
+         VG_(record_pthread_err)( tid, 
+            "fork: prepare/parent/child contains "
+            "unaddressible bytes");
+      }
+   }
+
+   if (n < 0 && n >= VG_N_FORKHANDLERSTACK) {
+      SET_EDX(tid, -1);
+      return;
+   } 
+
+   *fh = vg_fhstack[n];
+   SET_EDX(tid, 0);
+
+   if (VG_(clo_instrument)) {
+      VGM_(make_readable)( (Addr)fh, sizeof(ForkHandlerEntry) );
+   }
+}
+
+
 /* ---------------------------------------------------------------------
    Handle client requests.
    ------------------------------------------------------------------ */
@@ -3090,6 +3209,24 @@
          SET_EDX(tid, 0);
          break;
 
+      case VG_USERREQ__SET_FHSTACK_USED:
+         do__set_fhstack_used( tid, (Int)(arg[1]) );
+         break;
+
+      case VG_USERREQ__GET_FHSTACK_USED:
+         do__get_fhstack_used( tid );
+         break;
+
+      case VG_USERREQ__SET_FHSTACK_ENTRY:
+         do__set_fhstack_entry( tid, (Int)(arg[1]),
+                                     (ForkHandlerEntry*)(arg[2]) );
+         break;
+
+      case VG_USERREQ__GET_FHSTACK_ENTRY:
+         do__get_fhstack_entry( tid, (Int)(arg[1]),
+                                     (ForkHandlerEntry*)(arg[2]) );
+         break;
+
       case VG_USERREQ__MAKE_NOACCESS:
       case VG_USERREQ__MAKE_WRITABLE:
       case VG_USERREQ__MAKE_READABLE:
diff --git a/tests/Makefile.am b/tests/Makefile.am
index ac0400b..3839614 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -31,4 +31,4 @@
 	discard.c pth_semaphore1.c new_override.cpp pth_yield.c \
 	sigaltstack.c erringfds.c sigwait_all.c \
 	pth_cancel1.c pth_cancel2.c pth_signal_gober.c nanoleak.c \
-	pth_pause.c pth_sigpending.c
+	pth_pause.c pth_sigpending.c pth_atfork1.c
diff --git a/tests/pth_atfork1.c b/tests/pth_atfork1.c
new file mode 100644
index 0000000..f0876b8
--- /dev/null
+++ b/tests/pth_atfork1.c
@@ -0,0 +1,105 @@
+/* Tests for fork in multi-threaded environment.
+   Copyright (C) 2000 Free Software Foundation, Inc.
+   Contributed by Ulrich Drepper <drepper@cygnus.com>, 2000.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include <errno.h>
+#include <error.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <sys/wait.h>
+#include <stdio.h>
+
+enum
+{
+  PREPARE_BIT = 1,
+  PARENT_BIT = 2,
+  CHILD_BIT = 4
+};
+
+static int var;
+
+static void
+prepare (void)
+{
+  printf("prepare\n");
+  var |= PREPARE_BIT;
+}
+
+static void
+parent (void)
+{
+  printf("parent\n");
+  var |= PARENT_BIT;
+}
+
+static void
+child (void)
+{
+  printf("child\n");
+  var |= CHILD_BIT;
+}
+
+
+static void *thread (void *arg);
+
+
+int
+main (void)
+{
+  pthread_t th;
+  void *res;
+
+  pthread_atfork (prepare, parent, child);
+
+  if (pthread_create (&th, NULL, thread, NULL) != 0)
+    error (EXIT_FAILURE, 0, "cannot create thread");
+
+  pthread_join (th, &res);
+
+  return (int) (long int) res;
+}
+
+
+static void *
+thread (void *arg)
+{
+  int status;
+  pid_t pid;
+
+  pid = fork ();
+  if (pid == 0)
+    {
+      /* We check whether the `prepare' and `child' function ran.  */
+      exit (var != (PREPARE_BIT | CHILD_BIT));
+    }
+  else if (pid == (pid_t) -1)
+    error (EXIT_FAILURE, errno, "cannot fork");
+
+  if (waitpid (pid, &status, 0) != pid)
+    error (EXIT_FAILURE, errno, "wrong child");
+
+  if (WTERMSIG (status) != 0)
+    error (EXIT_FAILURE, 0, "Child terminated incorrectly");
+  status = WEXITSTATUS (status);
+
+  if (status == 0)
+    status = var != (PREPARE_BIT | PARENT_BIT);
+
+  return (void *) (long int) status;
+}
diff --git a/vg_include.h b/vg_include.h
index 7264581..d489be3 100644
--- a/vg_include.h
+++ b/vg_include.h
@@ -162,6 +162,9 @@
 /* Number of entries in each thread's cleanup stack. */
 #define VG_N_CLEANUPSTACK 8
 
+/* Number of entries in each thread's fork-handler stack. */
+#define VG_N_FORKHANDLERSTACK 2
+
 
 /* ---------------------------------------------------------------------
    Basic types
@@ -510,8 +513,11 @@
    thread. */
 #define VG_USERREQ__GET_N_SIGS_RETURNED     0x3024
 
-
-
+/* Get/set entries for a thread's pthread_atfork stack. */
+#define VG_USERREQ__SET_FHSTACK_USED        0x3025
+#define VG_USERREQ__GET_FHSTACK_USED        0x3026
+#define VG_USERREQ__SET_FHSTACK_ENTRY       0x3027
+#define VG_USERREQ__GET_FHSTACK_ENTRY       0x3028
 
 /* Cosmetic ... */
 #define VG_USERREQ__GET_PTHREAD_TRACE_LEVEL 0x3101
@@ -570,7 +576,17 @@
       void* arg;
    }
    CleanupEntry;
- 
+
+/* An entry in a thread's fork-handler stack. */
+typedef
+   struct {
+      void (*prepare)(void);
+      void (*parent)(void);
+      void (*child)(void);
+   }
+   ForkHandlerEntry;
+
+
 typedef
    struct {
       /* ThreadId == 0 (and hence vg_threads[0]) is NEVER USED.
diff --git a/vg_libpthread.c b/vg_libpthread.c
index cca2d88..c3a11b2 100644
--- a/vg_libpthread.c
+++ b/vg_libpthread.c
@@ -1269,13 +1269,45 @@
    MISC
    ------------------------------------------------ */
 
+static pthread_mutex_t pthread_atfork_lock 
+   = PTHREAD_MUTEX_INITIALIZER;
+
 int __pthread_atfork ( void (*prepare)(void),
                        void (*parent)(void),
                        void (*child)(void) )
 {
-   /* We have to do this properly or not at all; faking it isn't an
-      option. */
-   vgPlain_unimp("__pthread_atfork");
+   int n, res;
+   ForkHandlerEntry entry;
+
+   ensure_valgrind("pthread_atfork");
+   __pthread_mutex_lock(&pthread_atfork_lock);
+
+   /* Fetch old counter */
+   VALGRIND_MAGIC_SEQUENCE(n, -2 /* default */,
+                           VG_USERREQ__GET_FHSTACK_USED,
+                           0, 0, 0, 0);
+   my_assert(n >= 0 && n < VG_N_FORKHANDLERSTACK);
+   if (n == VG_N_FORKHANDLERSTACK-1)
+      barf("pthread_atfork: VG_N_FORKHANDLERSTACK is too low; "
+           "increase and recompile");
+
+   /* Add entry */
+   entry.prepare = *prepare;
+   entry.parent  = *parent;
+   entry.child   = *child;   
+   VALGRIND_MAGIC_SEQUENCE(res, -2 /* default */,
+                           VG_USERREQ__SET_FHSTACK_ENTRY,
+                           n, &entry, 0, 0);
+   my_assert(res == 0);
+
+   /* Bump counter */
+   VALGRIND_MAGIC_SEQUENCE(res, -2 /* default */,
+                           VG_USERREQ__SET_FHSTACK_USED,
+                           n+1, 0, 0, 0);
+   my_assert(res == 0);
+
+   __pthread_mutex_unlock(&pthread_atfork_lock);
+   return 0;
 }
 
 
@@ -1560,15 +1592,6 @@
 
 
 extern
-pid_t __libc_fork(void);
-pid_t __fork(void)
-{
-   __my_pthread_testcancel();
-   return __libc_fork();
-}
-
-
-extern
 pid_t __libc_waitpid(pid_t pid, int *status, int options);
 __attribute__((weak))
 pid_t waitpid(pid_t pid, int *status, int options)
@@ -1776,6 +1799,80 @@
 }
 
 
+/*--- fork and its helper ---*/
+
+static
+void run_fork_handlers ( int what )
+{
+   ForkHandlerEntry entry;
+   int n_h, n_handlers, i, res;
+
+   my_assert(what == 0 || what == 1 || what == 2);
+
+   /* Fetch old counter */
+   VALGRIND_MAGIC_SEQUENCE(n_handlers, -2 /* default */,
+                           VG_USERREQ__GET_FHSTACK_USED,
+                           0, 0, 0, 0);
+   my_assert(n_handlers >= 0 && n_handlers < VG_N_FORKHANDLERSTACK);
+
+   /* Prepare handlers (what == 0) are called in opposite order of
+      calls to pthread_atfork.  Parent and child handlers are called
+      in the same order as calls to pthread_atfork. */
+   if (what == 0)
+      n_h = n_handlers - 1;
+   else
+      n_h = 0;
+
+   for (i = 0; i < n_handlers; i++) {
+      VALGRIND_MAGIC_SEQUENCE(res, -2 /* default */,
+                              VG_USERREQ__GET_FHSTACK_ENTRY,
+                              n_h, &entry, 0, 0);
+      my_assert(res == 0);
+      switch (what) {
+         case 0:  if (entry.prepare) entry.prepare(); 
+                  n_h--; break;
+         case 1:  if (entry.parent) entry.parent(); 
+                  n_h++; break;
+         case 2:  if (entry.child) entry.child(); 
+                  n_h++; break;
+         default: barf("run_fork_handlers: invalid what");
+      }
+   }
+
+   if (what != 0 /* prepare */) {
+      /* Empty out the stack. */
+      VALGRIND_MAGIC_SEQUENCE(res, -2 /* default */,
+                              VG_USERREQ__SET_FHSTACK_USED,
+                              0, 0, 0, 0);
+      my_assert(res == 0);
+   }
+}
+
+extern
+pid_t __libc_fork(void);
+pid_t __fork(void)
+{
+   pid_t pid;
+   __my_pthread_testcancel();
+   __pthread_mutex_lock(&pthread_atfork_lock);
+
+   run_fork_handlers(0 /* prepare */);
+   pid = __libc_fork();
+   if (pid == 0) {
+      /* I am the child */
+      run_fork_handlers(2 /* child */);
+      __pthread_mutex_init(&pthread_atfork_lock, NULL);
+   } else {
+      /* I am the parent */
+      run_fork_handlers(1 /* parent */);
+      __pthread_mutex_unlock(&pthread_atfork_lock);
+   }
+   return pid;
+}
+
+
+
+
 /* ---------------------------------------------------------------------
    Nonblocking implementations of select() and poll().  This stuff will
    surely rot your mind.
diff --git a/vg_scheduler.c b/vg_scheduler.c
index fc260c9..796ca7e 100644
--- a/vg_scheduler.c
+++ b/vg_scheduler.c
@@ -88,6 +88,11 @@
    LinuxThreads. */
 ThreadState VG_(threads)[VG_N_THREADS];
 
+/* The process' fork-handler stack. */
+static Int              vg_fhstack_used = 0;
+static ForkHandlerEntry vg_fhstack[VG_N_FORKHANDLERSTACK];
+
+
 /* The tid of the thread currently in VG_(baseBlock). */
 static Int vg_tid_currently_in_baseBlock = VG_INVALID_THREADID;
 
@@ -574,6 +579,8 @@
       vg_thread_keys[i].destructor = NULL;
    }
 
+   vg_fhstack_used = 0;
+
    /* Assert this is thread zero, which has certain magic
       properties. */
    tid_main = vg_alloc_ThreadState();
@@ -2848,6 +2855,118 @@
 }
 
 
+/* -----------------------------------------------------------
+   FORK HANDLERS.
+   -------------------------------------------------------- */
+
+static 
+void do__set_fhstack_used ( ThreadId tid, Int n )
+{
+   Char msg_buf[100];
+   if (VG_(clo_trace_sched)) {
+      VG_(sprintf)(msg_buf, "set_fhstack_used to %d", n );
+      print_pthread_event(tid, msg_buf);
+   }
+
+   vg_assert(VG_(is_valid_tid)(tid) 
+             && VG_(threads)[tid].status == VgTs_Runnable);
+
+   if (n >= 0 && n < VG_N_FORKHANDLERSTACK) {
+      vg_fhstack_used = n;
+      SET_EDX(tid, 0);
+   } else {
+      SET_EDX(tid, -1);
+   }
+}
+
+
+static
+void do__get_fhstack_used ( ThreadId tid )
+{
+   Int  n;
+   Char msg_buf[100];
+   if (VG_(clo_trace_sched)) {
+      VG_(sprintf)(msg_buf, "get_fhstack_used" );
+      print_pthread_event(tid, msg_buf);
+   }
+
+   vg_assert(VG_(is_valid_tid)(tid) 
+             && VG_(threads)[tid].status == VgTs_Runnable);
+
+   n = vg_fhstack_used;
+   vg_assert(n >= 0 && n < VG_N_FORKHANDLERSTACK);
+   SET_EDX(tid, n);
+}
+
+static
+void do__set_fhstack_entry ( ThreadId tid, Int n, ForkHandlerEntry* fh )
+{
+   Char msg_buf[100];
+   if (VG_(clo_trace_sched)) {
+      VG_(sprintf)(msg_buf, "set_fhstack_entry %d to %p", n, fh );
+      print_pthread_event(tid, msg_buf);
+   }
+
+   vg_assert(VG_(is_valid_tid)(tid) 
+             && VG_(threads)[tid].status == VgTs_Runnable);
+
+   if (VG_(clo_instrument)) {
+      /* check fh is addressible/defined */
+      if (!VGM_(check_readable)( (Addr)fh,
+                                 sizeof(ForkHandlerEntry), NULL)) {
+         VG_(record_pthread_err)( tid, 
+            "pthread_atfork: prepare/parent/child contains "
+            "unaddressible or undefined bytes");
+      }
+   }
+
+   if (n < 0 && n >= VG_N_FORKHANDLERSTACK) {
+      SET_EDX(tid, -1);
+      return;
+   } 
+
+   vg_fhstack[n] = *fh;
+   SET_EDX(tid, 0);
+}
+
+
+static
+void do__get_fhstack_entry ( ThreadId tid, Int n, /*OUT*/
+                                                  ForkHandlerEntry* fh )
+{
+   Char msg_buf[100];
+   if (VG_(clo_trace_sched)) {
+      VG_(sprintf)(msg_buf, "get_fhstack_entry %d", n );
+      print_pthread_event(tid, msg_buf);
+   }
+
+   vg_assert(VG_(is_valid_tid)(tid) 
+             && VG_(threads)[tid].status == VgTs_Runnable);
+
+   if (VG_(clo_instrument)) {
+      /* check fh is addressible/defined */
+      if (!VGM_(check_writable)( (Addr)fh,
+                                 sizeof(ForkHandlerEntry), NULL)) {
+         VG_(record_pthread_err)( tid, 
+            "fork: prepare/parent/child contains "
+            "unaddressible bytes");
+      }
+   }
+
+   if (n < 0 && n >= VG_N_FORKHANDLERSTACK) {
+      SET_EDX(tid, -1);
+      return;
+   } 
+
+   *fh = vg_fhstack[n];
+   SET_EDX(tid, 0);
+
+   if (VG_(clo_instrument)) {
+      VGM_(make_readable)( (Addr)fh, sizeof(ForkHandlerEntry) );
+   }
+}
+
+
 /* ---------------------------------------------------------------------
    Handle client requests.
    ------------------------------------------------------------------ */
@@ -3090,6 +3209,24 @@
          SET_EDX(tid, 0);
          break;
 
+      case VG_USERREQ__SET_FHSTACK_USED:
+         do__set_fhstack_used( tid, (Int)(arg[1]) );
+         break;
+
+      case VG_USERREQ__GET_FHSTACK_USED:
+         do__get_fhstack_used( tid );
+         break;
+
+      case VG_USERREQ__SET_FHSTACK_ENTRY:
+         do__set_fhstack_entry( tid, (Int)(arg[1]),
+                                     (ForkHandlerEntry*)(arg[2]) );
+         break;
+
+      case VG_USERREQ__GET_FHSTACK_ENTRY:
+         do__get_fhstack_entry( tid, (Int)(arg[1]),
+                                     (ForkHandlerEntry*)(arg[2]) );
+         break;
+
       case VG_USERREQ__MAKE_NOACCESS:
       case VG_USERREQ__MAKE_WRITABLE:
       case VG_USERREQ__MAKE_READABLE: