Added support for OpenMP barriers -- if libgomp.so has been built with debug information. More in general, added support for nested synchronization constructs.

git-svn-id: svn://svn.valgrind.org/valgrind/trunk@7642 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/exp-drd/Makefile.am b/exp-drd/Makefile.am
index 651bac4..46d677a 100644
--- a/exp-drd/Makefile.am
+++ b/exp-drd/Makefile.am
@@ -20,7 +20,11 @@
   noinst_PROGRAMS += exp-drd-ppc64-aix5 vgpreload_exp-drd-ppc64-aix5.so
 endif
 
+if HAVE_GCC_FOPENMP
+VGPRELOAD_DRD_SOURCES_COMMON = drd_pthread_intercepts.c drd_gomp_intercepts.c
+else
 VGPRELOAD_DRD_SOURCES_COMMON = drd_pthread_intercepts.c
+endif
 
 vgpreload_exp_drd_x86_linux_so_SOURCES      = $(VGPRELOAD_DRD_SOURCES_COMMON)
 vgpreload_exp_drd_x86_linux_so_CPPFLAGS     = $(AM_CPPFLAGS_X86_LINUX)
diff --git a/exp-drd/TODO.txt b/exp-drd/TODO.txt
index 5a3baf1..717a6c3 100644
--- a/exp-drd/TODO.txt
+++ b/exp-drd/TODO.txt
@@ -30,15 +30,17 @@
 - Add test programs for destroying a condition variable / semaphore / barrier
   being waited upon.
 - Add OpenMP examples to the regression tests.
+- Make sure an error message is printed if no debug information is found in
+  libopenmp.so.1.
 - Make sure tc14_laog_dinphils is run during exp-drd regression tests.
 - Find a way for suppressing races reported on _IO_2_1_stdout. This race is
   triggered by calling printf() from more than one thread. Examples (AMD64):
   ./vg-in-place --tool=exp-drd exp-drd/tests/tc21_pthonce
   ./vg-in-place --tool=exp-drd exp-drd/tests/pth_detached 10 10 1
-- Performance testing and tuning.
 - testing on PPC and AIX (current implementation is only tested on X86 and
   AMD64).
 - Convert the array in drd_thread.c with thread information into an OSet.
+- Performance testing and tuning.
 - Optimize run time of the following test case:
   ./vg-in-place --tool=exp-drd exp-drd/tests/matinv 100 
 
diff --git a/exp-drd/drd_barrier.c b/exp-drd/drd_barrier.c
index b121105..05f4fc4 100644
--- a/exp-drd/drd_barrier.c
+++ b/exp-drd/drd_barrier.c
@@ -91,14 +91,13 @@
 static
 void barrier_initialize(struct barrier_info* const p,
                         const Addr barrier,
-                        const SizeT size,
+                        const BarrierT barrier_type,
                         const Word count)
 {
   tl_assert(barrier != 0);
-  tl_assert(size > 0);
+  tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
   tl_assert(count > 0);
   tl_assert(p->a1 == barrier);
-  tl_assert(p->a2 - p->a1 == size);
 
   p->cleanup           = (void(*)(DrdClientobj*))barrier_cleanup;
   p->count             = count;
@@ -146,16 +145,19 @@
  *  found, add it. */
 static
 struct barrier_info*
-barrier_get_or_allocate(const Addr barrier, const SizeT size, const Word count)
+barrier_get_or_allocate(const Addr barrier,
+                        const BarrierT barrier_type, const Word count)
 {
   struct barrier_info *p;
 
+  tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
+
   tl_assert(offsetof(DrdClientobj, barrier) == 0);
   p = &clientobj_get(barrier, ClientBarrier)->barrier;
   if (p == 0)
   {
-    p = &clientobj_add(barrier, barrier + size, ClientBarrier)->barrier;
-    barrier_initialize(p, barrier, size, count);
+    p = &clientobj_add(barrier, ClientBarrier)->barrier;
+    barrier_initialize(p, barrier, barrier_type, count);
   }
   return p;
 }
@@ -172,8 +174,9 @@
  *  where count threads participate in each barrier.
  *  Called before pthread_barrier_init().
  */
-struct barrier_info*
-barrier_init(const Addr barrier, const SizeT size, const Word count)
+void barrier_init(const Addr barrier,
+                  const BarrierT barrier_type, const Word count,
+                  const Bool reinitialization)
 {
   if (s_trace_barrier)
   {
@@ -184,11 +187,12 @@
                  barrier);
   }
   tl_assert(barrier_get(barrier) == 0);
-  return barrier_get_or_allocate(barrier, size, count);
+  tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
+  barrier_get_or_allocate(barrier, barrier_type, count);
 }
 
 /** Called after pthread_barrier_destroy(). */
-void barrier_destroy(const Addr barrier)
+void barrier_destroy(const Addr barrier, const BarrierT barrier_type)
 {
   struct barrier_info* p;
 
@@ -217,7 +221,8 @@
 }
 
 /** Called before pthread_barrier_wait(). */
-void barrier_pre_wait(const DrdThreadId tid, const Addr barrier)
+void barrier_pre_wait(const DrdThreadId tid, const Addr barrier,
+                      const BarrierT barrier_type)
 {
   struct barrier_info* p;
   struct barrier_thread_info* q;
@@ -256,7 +261,7 @@
 
 /** Called after pthread_barrier_wait(). */
 void barrier_post_wait(const DrdThreadId tid, const Addr barrier,
-                       const Bool waited)
+                       const BarrierT barrier_type, const Bool waited)
 {
   struct barrier_info* p;
 
diff --git a/exp-drd/drd_barrier.h b/exp-drd/drd_barrier.h
index 4351d04..54deb49 100644
--- a/exp-drd/drd_barrier.h
+++ b/exp-drd/drd_barrier.h
@@ -30,21 +30,24 @@
 #define __DRD_BARRIER_H
 
 
-#include "drd_thread.h"           // DrdThreadId
+#include "drd_clientreq.h"    // BarrierT
+#include "drd_thread.h"       // DrdThreadId
 #include "drd_vc.h"
-#include "pub_tool_basics.h"      // Addr, SizeT
+#include "pub_tool_basics.h"  // Addr
 
 
 struct barrier_info;
 
 
 void barrier_set_trace(const Bool trace_barrier);
-struct barrier_info* barrier_init(const Addr barrier, const SizeT size,
-                                  const Word count);
-void barrier_destroy(const Addr barrier);
-void barrier_pre_wait(const DrdThreadId tid, const Addr barrier);
+void barrier_init(const Addr barrier,
+                  const BarrierT barrier_type, const Word count,
+                  const Bool reinitialization);
+void barrier_destroy(const Addr barrier, const BarrierT barrier_type);
+void barrier_pre_wait(const DrdThreadId tid, const Addr barrier,
+                      const BarrierT barrier_type);
 void barrier_post_wait(const DrdThreadId tid, const Addr barrier,
-                       const Bool waited);
+                       const BarrierT barrier_type, const Bool waited);
 void barrier_thread_delete(const DrdThreadId threadid);
 void barrier_stop_using_mem(const Addr a1, const Addr a2);
 
diff --git a/exp-drd/drd_bitmap.c b/exp-drd/drd_bitmap.c
index e479e6a..d0160e7 100644
--- a/exp-drd/drd_bitmap.c
+++ b/exp-drd/drd_bitmap.c
@@ -151,9 +151,9 @@
  * bitmap bm.
  */
 void bm_access_range(struct bitmap* const bm,
-		     const Addr a,
-		     const SizeT size,
-		     const BmAccessTypeT access_type)
+                     const Addr a,
+                     const SizeT size,
+                     const BmAccessTypeT access_type)
 {
    tl_assert(bm);
    tl_assert(size > 0);
diff --git a/exp-drd/drd_bitmap.h b/exp-drd/drd_bitmap.h
index d70252f..7212ccf 100644
--- a/exp-drd/drd_bitmap.h
+++ b/exp-drd/drd_bitmap.h
@@ -46,12 +46,12 @@
 
 #define ADDR0_MASK (ADDR0_COUNT - 1)
 
-#define SPLIT_ADDRESS(a)						\
-  UWord a##0 = ((a) & ADDR0_MASK);                                      \
+#define SPLIT_ADDRESS(a)            \
+  UWord a##0 = ((a) & ADDR0_MASK);  \
   UWord a##1 = ((a) >> ADDR0_BITS);
 
 // Assumption: sizeof(Addr) == sizeof(UWord).
-#define MAKE_ADDRESS(a1, a0)			\
+#define MAKE_ADDRESS(a1, a0)  \
   (Addr)(((UWord)(a1) << (ADDR0_BITS)) | ((UWord)(a0)))
 
 #define BITS_PER_UWORD (8UL*sizeof(UWord))
diff --git a/exp-drd/drd_clientobj.c b/exp-drd/drd_clientobj.c
index 0ccf2bf..c473de9 100644
--- a/exp-drd/drd_clientobj.c
+++ b/exp-drd/drd_clientobj.c
@@ -92,8 +92,7 @@
   VG_(OSetGen_ResetIter)(s_clientobj);
   for ( ; (p = VG_(OSetGen_Next)(s_clientobj)) != 0; )
   {
-    if ((a1 <= p->any.a1 && p->any.a1 < a2)
-        || (a1 < p->any.a2 && p->any.a2 <= a2))
+    if (a1 <= p->any.a1 && p->any.a1 < a2)
     {
       return True;  
     }
@@ -106,12 +105,11 @@
  *  @pre No other client object is present in the address range [addr,addr+size[.
  */
 DrdClientobj*
-clientobj_add(const Addr a1, const Addr a2, const ObjType t)
+clientobj_add(const Addr a1, const ObjType t)
 {
   DrdClientobj* p;
 
-  tl_assert(a1 < a2 && a1 + 4096 > a2);
-  tl_assert(! clientobj_present(a1, a2));
+  tl_assert(! clientobj_present(a1, a1 + 1));
   tl_assert(VG_(OSetGen_Lookup)(s_clientobj, &a1) == 0);
 
   if (s_trace_clientobj)
@@ -122,11 +120,9 @@
   p = VG_(OSetGen_AllocNode)(s_clientobj, sizeof(*p));
   VG_(memset)(p, 0, sizeof(*p));
   p->any.a1   = a1;
-  p->any.a2   = a2;
   p->any.type = t;
   VG_(OSetGen_Insert)(s_clientobj, p);
   tl_assert(VG_(OSetGen_Lookup)(s_clientobj, &a1) == p);
-  drd_start_suppression(p->any.a1, p->any.a2, "client object");
   return p;
 }
 
@@ -150,7 +146,6 @@
   if (p)
   {
     tl_assert(VG_(OSetGen_Lookup)(s_clientobj, &addr) == 0);
-    drd_finish_suppression(p->any.a1, p->any.a2);
     tl_assert(p->any.cleanup);
     (*p->any.cleanup)(p);
     VG_(OSetGen_FreeNode)(s_clientobj, p);
@@ -169,8 +164,7 @@
   p = VG_(OSetGen_Next)(s_clientobj);
   for ( ; p != 0; )
   {
-    if ((a1 <= p->any.a1 && p->any.a1 < a2)
-        || (a1 < p->any.a2 && p->any.a2 <= a2))
+    if (a1 <= p->any.a1 && p->any.a1 < a2)
     {
       removed_at = p->any.a1;
       clientobj_remove(p->any.a1, p->any.type);
diff --git a/exp-drd/drd_clientobj.h b/exp-drd/drd_clientobj.h
index c893ca0..3a91ac7 100644
--- a/exp-drd/drd_clientobj.h
+++ b/exp-drd/drd_clientobj.h
@@ -51,7 +51,6 @@
 struct any
 {
   Addr    a1;
-  Addr    a2;
   ObjType type;
   void    (*cleanup)(union drd_clientobj*);
 };
@@ -59,7 +58,6 @@
 struct mutex_info
 {
   Addr        a1;
-  Addr        a2;
   ObjType     type;
   void        (*cleanup)(union drd_clientobj*);
   MutexT      mutex_type;      // pthread_mutex_t or pthread_spinlock_t.
@@ -71,7 +69,6 @@
 struct cond_info
 {
   Addr    a1;
-  Addr    a2;
   ObjType type;
   void    (*cleanup)(union drd_clientobj*);
   int     waiter_count;
@@ -82,7 +79,6 @@
 struct semaphore_info
 {
   Addr        a1;
-  Addr        a2;
   ObjType     type;
   void        (*cleanup)(union drd_clientobj*);
   UWord       value;             // Semaphore value.
@@ -94,7 +90,6 @@
 struct barrier_info
 {
   Addr    a1;
-  Addr    a2;
   ObjType type;
   void    (*cleanup)(union drd_clientobj*);
   Word     count;             // Participant count in a barrier wait.
@@ -109,7 +104,6 @@
 struct rwlock_info
 {
   Addr    a1;
-  Addr    a2;
   ObjType type;
   void    (*cleanup)(union drd_clientobj*);
   OSet*   thread_info;
@@ -133,7 +127,7 @@
 void clientobj_cleanup(void);
 DrdClientobj* clientobj_get(const Addr addr, const ObjType t);
 Bool clientobj_present(const Addr a1, const Addr a2);
-DrdClientobj* clientobj_add(const Addr a1, const Addr a2, const ObjType t);
+DrdClientobj* clientobj_add(const Addr a1, const ObjType t);
 Bool clientobj_remove(const Addr addr, const ObjType t);
 void clientobj_stop_using_mem(const Addr a1, const Addr a2);
 void clientobj_resetiter(void);
diff --git a/exp-drd/drd_clientreq.c b/exp-drd/drd_clientreq.c
index e63cf6a..5d11d62 100644
--- a/exp-drd/drd_clientreq.c
+++ b/exp-drd/drd_clientreq.c
@@ -41,7 +41,7 @@
 #include "pub_tool_tooliface.h"   // VG_(needs_...)()
 
 
-static void drd_spin_init_or_unlock(const Addr spinlock, const SizeT size)
+static void drd_spin_init_or_unlock(const Addr spinlock)
 {
    struct mutex_info* mutex_p = mutex_get(spinlock);
    if (mutex_p)
@@ -50,15 +50,15 @@
    }
    else
    {
-      mutex_init(spinlock, size, mutex_type_spinlock);
+      mutex_init(spinlock, mutex_type_spinlock);
    }
 }
 
-static void drd_pre_cond_wait(const Addr cond, const SizeT cond_size,
+static void drd_pre_cond_wait(const Addr cond,
                               const Addr mutex, const MutexT mutex_type)
 {
    mutex_unlock(mutex, mutex_type);
-   cond_pre_wait(cond, cond_size, mutex);
+   cond_pre_wait(cond, mutex);
 }
 
 static void drd_post_cond_wait(const Addr cond,
@@ -79,19 +79,22 @@
    cond_pre_broadcast(cond);
 }
 
-static Bool drd_handle_client_request(ThreadId tid, UWord* arg, UWord* ret)
+static Bool drd_handle_client_request(ThreadId vg_tid, UWord* arg, UWord* ret)
 {
    UWord result = 0;
+   const DrdThreadId drd_tid = thread_get_running_tid();
+
+   tl_assert(vg_tid == VG_(get_running_tid()));
+   tl_assert(VgThreadIdToDrdThreadId(vg_tid) == drd_tid);
 
    switch (arg[0])
    {
    case VG_USERREQ__GET_THREAD_SELF:
-      result = tid;
+      result = vg_tid;
       break;
 
    case VG_USERREQ__SET_THREAD_NAME:
-      thread_set_name_fmt(VgThreadIdToDrdThreadId(VG_(get_running_tid)()),
-                          (char*)arg[1], arg[2]);
+      thread_set_name_fmt(drd_tid, (char*)arg[1], arg[2]);
       break;
 
    case VG_USERREQ__DRD_START_SUPPRESSION:
@@ -103,8 +106,7 @@
       break;
 
    case VG_USERREQ__DRD_SUPPRESS_CURRENT_STACK:
-      thread_set_stack_startup(thread_get_running_tid(),
-                               VG_(get_SP)(VG_(get_running_tid)()));
+      thread_set_stack_startup(drd_tid, VG_(get_SP)(vg_tid));
       break;
 
    case VG_USERREQ__DRD_START_NEW_SEGMENT:
@@ -116,7 +118,7 @@
       break;
 
    case VG_USERREQ__SET_PTHREADID:
-      thread_set_pthreadid(thread_get_running_tid(), arg[1]);
+      thread_set_pthreadid(drd_tid, arg[1]);
       break;
 
    case VG_USERREQ__SET_JOINABLE:
@@ -125,101 +127,150 @@
 
    case VG_USERREQ__POST_THREAD_JOIN:
       tl_assert(arg[1]);
-      drd_post_thread_join(thread_get_running_tid(),
+      drd_post_thread_join(drd_tid,
                            PtThreadIdToDrdThreadId(arg[1]));
       break;
 
    case VG_USERREQ__PRE_MUTEX_INIT:
-      drd_pre_mutex_init(arg[1], arg[2], arg[3]);
+      if (thread_enter_synchr(drd_tid) == 0)
+         drd_pre_mutex_init(arg[1], arg[2]);
+      break;
+
+   case VG_USERREQ__POST_MUTEX_INIT:
+      thread_leave_synchr(drd_tid);
+      break;
+
+   case VG_USERREQ__PRE_MUTEX_DESTROY:
+      thread_enter_synchr(drd_tid);
       break;
 
    case VG_USERREQ__POST_MUTEX_DESTROY:
-      drd_post_mutex_destroy(arg[1], arg[2]);
+      if (thread_leave_synchr(drd_tid) == 0)
+         drd_post_mutex_destroy(arg[1], arg[2]);
       break;
 
-   case VG_USERREQ__PRE_PTHREAD_MUTEX_LOCK:
-      drd_pre_mutex_lock(arg[1], arg[2], arg[3]);
+   case VG_USERREQ__PRE_MUTEX_LOCK:
+      if (thread_enter_synchr(drd_tid) == 0)
+         drd_pre_mutex_lock(arg[1], arg[2]);
       break;
 
-   case VG_USERREQ__POST_PTHREAD_MUTEX_LOCK:
-      drd_post_mutex_lock(arg[1], arg[2]);
+   case VG_USERREQ__POST_MUTEX_LOCK:
+      if (thread_leave_synchr(drd_tid) == 0)
+         drd_post_mutex_lock(arg[1], arg[2]);
       break;
 
-   case VG_USERREQ__PRE_PTHREAD_MUTEX_UNLOCK:
-      drd_pre_mutex_unlock(arg[1], arg[3]);
+   case VG_USERREQ__PRE_MUTEX_UNLOCK:
+      if (thread_enter_synchr(drd_tid) == 0)
+         drd_pre_mutex_unlock(arg[1], arg[2]);
+      break;
+
+   case VG_USERREQ__POST_MUTEX_UNLOCK:
+      thread_leave_synchr(drd_tid);
       break;
 
    case VG_USERREQ__SPIN_INIT_OR_UNLOCK:
-      drd_spin_init_or_unlock(arg[1], arg[2]);
+      tl_assert(thread_get_synchr_nesting_count(drd_tid) == 0);
+      drd_spin_init_or_unlock(arg[1]);
       break;
 
-   case VG_USERREQ__PRE_PTHREAD_COND_INIT:
-      drd_pre_cond_init(arg[1], arg[2]);
+   case VG_USERREQ__PRE_COND_INIT:
+      tl_assert(thread_get_synchr_nesting_count(drd_tid) == 0);
+      drd_pre_cond_init(arg[1]);
       break;
 
-   case VG_USERREQ__POST_PTHREAD_COND_DESTROY:
+   case VG_USERREQ__POST_COND_DESTROY:
+      tl_assert(thread_get_synchr_nesting_count(drd_tid) == 0);
       drd_post_cond_destroy(arg[1]);
       break;
 
-   case VG_USERREQ__PRE_PTHREAD_COND_WAIT:
-      drd_pre_cond_wait(arg[1]/*cond*/,  arg[2]/*cond_size*/,
-                        arg[3]/*mutex*/, arg[4]/*mutex_type*/);
+   case VG_USERREQ__PRE_COND_WAIT:
+      if (thread_enter_synchr(drd_tid) == 0)
+         drd_pre_cond_wait(arg[1], arg[2], arg[3]);
       break;
 
-   case VG_USERREQ__POST_PTHREAD_COND_WAIT:
-      drd_post_cond_wait(arg[1]/*cond*/, arg[2]/*mutex*/, arg[3]/*took_lock*/);
+   case VG_USERREQ__POST_COND_WAIT:
+      if (thread_leave_synchr(drd_tid) == 0)
+         drd_post_cond_wait(arg[1], arg[2], arg[3]);
       break;
 
-   case VG_USERREQ__PRE_PTHREAD_COND_SIGNAL:
+   case VG_USERREQ__PRE_COND_SIGNAL:
+      tl_assert(thread_get_synchr_nesting_count(drd_tid) == 0);
       drd_pre_cond_signal(arg[1]);
       break;
 
-   case VG_USERREQ__PRE_PTHREAD_COND_BROADCAST:
+   case VG_USERREQ__PRE_COND_BROADCAST:
+      tl_assert(thread_get_synchr_nesting_count(drd_tid) == 0);
       drd_pre_cond_broadcast(arg[1]);
       break;
 
-   case VG_USERREQ__SEM_INIT:
-      drd_semaphore_init(arg[1], arg[2], arg[3], arg[4]);
+   case VG_USERREQ__PRE_SEM_INIT:
+      if (thread_enter_synchr(drd_tid) == 0)
+         drd_semaphore_init(arg[1], arg[2], arg[3]);
       break;
 
-   case VG_USERREQ__SEM_DESTROY:
-      drd_semaphore_destroy(arg[1]);
+   case VG_USERREQ__POST_SEM_INIT:
+      thread_leave_synchr(drd_tid);
+      break;
+
+   case VG_USERREQ__PRE_SEM_DESTROY:
+      thread_enter_synchr(drd_tid);
+      break;
+
+   case VG_USERREQ__POST_SEM_DESTROY:
+      if (thread_leave_synchr(drd_tid) == 0)
+         drd_semaphore_destroy(arg[1]);
       break;
 
    case VG_USERREQ__PRE_SEM_WAIT:
-      drd_semaphore_pre_wait(thread_get_running_tid(), arg[1], arg[2]);
+      if (thread_enter_synchr(drd_tid) == 0)
+         drd_semaphore_pre_wait(drd_tid, arg[1]);
       break;
 
    case VG_USERREQ__POST_SEM_WAIT:
-      drd_semaphore_post_wait(thread_get_running_tid(), arg[1], arg[2]);
+      if (thread_leave_synchr(drd_tid) == 0)
+         drd_semaphore_post_wait(drd_tid, arg[1], arg[2]);
       break;
 
    case VG_USERREQ__PRE_SEM_POST:
-      drd_semaphore_pre_post(thread_get_running_tid(), arg[1], arg[2]);
+      if (thread_enter_synchr(drd_tid) == 0)
+         drd_semaphore_pre_post(drd_tid, arg[1]);
       break;
 
    case VG_USERREQ__POST_SEM_POST:
-      drd_semaphore_post_post(thread_get_running_tid(), arg[1], arg[2], arg[3]);
+      if (thread_leave_synchr(drd_tid) == 0)
+         drd_semaphore_post_post(drd_tid, arg[1], arg[2]);
       break;
 
-   case VG_USERREQ__BARRIER_INIT:
-      drd_barrier_init(arg[1], arg[2], arg[3]);
+   case VG_USERREQ__PRE_BARRIER_INIT:
+      if (thread_enter_synchr(drd_tid) == 0)
+         drd_barrier_init(arg[1], arg[2], arg[3], arg[4]);
       break;
 
-   case VG_USERREQ__BARRIER_DESTROY:
-      drd_barrier_destroy(arg[1]);
+   case VG_USERREQ__POST_BARRIER_INIT:
+      thread_leave_synchr(drd_tid);
+      break;
+
+   case VG_USERREQ__PRE_BARRIER_DESTROY:
+      thread_enter_synchr(drd_tid);
+      break;
+
+   case VG_USERREQ__POST_BARRIER_DESTROY:
+      if (thread_leave_synchr(drd_tid) == 0)
+         drd_barrier_destroy(arg[1], arg[2]);
       break;
 
    case VG_USERREQ__PRE_BARRIER_WAIT:
-      drd_barrier_pre_wait(thread_get_running_tid(), arg[1]);
+      if (thread_enter_synchr(drd_tid) == 0)
+         drd_barrier_pre_wait(drd_tid, arg[1], arg[2]);
       break;
 
    case VG_USERREQ__POST_BARRIER_WAIT:
-      drd_barrier_post_wait(thread_get_running_tid(), arg[1], arg[2]);
+      if (thread_leave_synchr(drd_tid) == 0)
+         drd_barrier_post_wait(drd_tid, arg[1], arg[2], arg[3]);
       break;
 
    case VG_USERREQ__PRE_RWLOCK_INIT:
-      rwlock_pre_init(arg[1], arg[2]);
+      rwlock_pre_init(arg[1]);
       break;
 
    case VG_USERREQ__POST_RWLOCK_DESTROY:
@@ -227,23 +278,32 @@
       break;
 
    case VG_USERREQ__PRE_RWLOCK_RDLOCK:
-      rwlock_pre_rdlock(arg[1], arg[2]);
+      if (thread_enter_synchr(drd_tid) == 0)
+         rwlock_pre_rdlock(arg[1]);
       break;
 
    case VG_USERREQ__POST_RWLOCK_RDLOCK:
-      rwlock_post_rdlock(arg[1], arg[2]);
+      if (thread_leave_synchr(drd_tid) == 0)
+         rwlock_post_rdlock(arg[1], arg[2]);
       break;
 
    case VG_USERREQ__PRE_RWLOCK_WRLOCK:
-      rwlock_pre_wrlock(arg[1], arg[2]);
+      if (thread_enter_synchr(drd_tid) == 0)
+         rwlock_pre_wrlock(arg[1]);
       break;
 
    case VG_USERREQ__POST_RWLOCK_WRLOCK:
-      rwlock_post_wrlock(arg[1], arg[2]);
+      if (thread_leave_synchr(drd_tid) == 0)
+         rwlock_post_wrlock(arg[1], arg[2]);
       break;
 
    case VG_USERREQ__PRE_RWLOCK_UNLOCK:
-      rwlock_pre_unlock(arg[1]);
+      if (thread_enter_synchr(drd_tid) == 0)
+         rwlock_pre_unlock(arg[1]);
+      break;
+      
+   case VG_USERREQ__POST_RWLOCK_UNLOCK:
+      thread_leave_synchr(drd_tid);
       break;
 
    default:
diff --git a/exp-drd/drd_clientreq.h b/exp-drd/drd_clientreq.h
index 1a569f6..0c73d91 100644
--- a/exp-drd/drd_clientreq.h
+++ b/exp-drd/drd_clientreq.h
@@ -49,89 +49,113 @@
   /* to notify the drd tool of a pthread_mutex_init call. */
   VG_USERREQ__PRE_MUTEX_INIT,
   /* args: Addr, MutexT */
+  /* to notify the drd tool of a pthread_mutex_init call. */
+  VG_USERREQ__POST_MUTEX_INIT,
+  /* args: Addr */
+  /* to notify the drd tool of a pthread_mutex_destroy call. */
+  VG_USERREQ__PRE_MUTEX_DESTROY,
+  /* args: Addr */
   /* to notify the drd tool of a pthread_mutex_destroy call. */
   VG_USERREQ__POST_MUTEX_DESTROY,
-  /* args: Addr, SizeT, MutexT */
+  /* args: Addr, MutexT */
   /* to notify the drd tool of pthread_mutex_lock calls */
-  VG_USERREQ__PRE_PTHREAD_MUTEX_LOCK,
-  /* args: Addr, SizeT, MutexT */
+  VG_USERREQ__PRE_MUTEX_LOCK,
+  /* args: Addr, MutexT */
   /* to notify the drd tool of pthread_mutex_lock calls */
-  VG_USERREQ__POST_PTHREAD_MUTEX_LOCK,
+  VG_USERREQ__POST_MUTEX_LOCK,
   /* args: Addr, Bool */
   /* to notify the drd tool of pthread_mutex_unlock calls */
-  VG_USERREQ__PRE_PTHREAD_MUTEX_UNLOCK,
+  VG_USERREQ__PRE_MUTEX_UNLOCK,
+  /* args: Addr */
+  /* to notify the drd tool of pthread_mutex_unlock calls */
+  VG_USERREQ__POST_MUTEX_UNLOCK,
   /* args: Addr */
   VG_USERREQ__SPIN_INIT_OR_UNLOCK,
-  /* args: Addr spinlock, SizeT size */
+  /* args: Addr spinlock */
 
 
   /* to notify the drd tool of a pthread_cond_init call. */
-  VG_USERREQ__PRE_PTHREAD_COND_INIT,
+  VG_USERREQ__PRE_COND_INIT,
   /* args: Addr */
   /* to notify the drd tool of a pthread_cond_destroy call. */
-  VG_USERREQ__POST_PTHREAD_COND_DESTROY,
-  /* args: Addr cond, SizeT cond_size, Addr mutex, SizeT mutex_size,MutexT mt*/
-  VG_USERREQ__PRE_PTHREAD_COND_WAIT,
-  /* args: Addr cond, SizeT cond_size, Addr mutex, MutexT mt */
-  VG_USERREQ__POST_PTHREAD_COND_WAIT,
-  /* args: Addr cond, Addr mutex, Bool took_lock*/
-  VG_USERREQ__PRE_PTHREAD_COND_SIGNAL,
+  VG_USERREQ__POST_COND_DESTROY,
   /* args: Addr cond */
-  VG_USERREQ__PRE_PTHREAD_COND_BROADCAST,
+  VG_USERREQ__PRE_COND_WAIT,
+  /* args: Addr cond, Addr mutex, MutexT mt */
+  VG_USERREQ__POST_COND_WAIT,
+  /* args: Addr cond, Addr mutex, Bool took_lock*/
+  VG_USERREQ__PRE_COND_SIGNAL,
+  /* args: Addr cond */
+  VG_USERREQ__PRE_COND_BROADCAST,
   /* args: Addr cond */
 
   /* To notify the drd tool of a sem_init call. */
-  VG_USERREQ__SEM_INIT,
-  /* args: Addr sem, SizeT sem_size, Word pshared, Word value */
+  VG_USERREQ__PRE_SEM_INIT,
+  /* args: Addr sem, Word pshared, Word value */
+  /* To notify the drd tool of a sem_init call. */
+  VG_USERREQ__POST_SEM_INIT,
+  /* args: Addr sem */
   /* To notify the drd tool of a sem_destroy call. */
-  VG_USERREQ__SEM_DESTROY,
+  VG_USERREQ__PRE_SEM_DESTROY,
+  /* args: Addr sem */
+  /* To notify the drd tool of a sem_destroy call. */
+  VG_USERREQ__POST_SEM_DESTROY,
   /* args: Addr sem */
   /* To notify the drd tool of a sem_wait call. */
   VG_USERREQ__PRE_SEM_WAIT,
-  /* args: Addr sem, SizeT sem_size */
+  /* args: Addr sem */
   /* To notify the drd tool of a sem_wait call. */
   VG_USERREQ__POST_SEM_WAIT,
   /* args: Addr sem, Bool waited */
   /* To notify the drd tool before a sem_post call. */
   VG_USERREQ__PRE_SEM_POST,
-  /* args: Addr sem, SizeT sem_size */
+  /* args: Addr sem */
   /* To notify the drd tool after a sem_post call. */
   VG_USERREQ__POST_SEM_POST,
-  /* args: Addr sem, SizeT sem_size, Bool waited */
+  /* args: Addr sem, Bool waited */
 
   /* To notify the drd tool of a pthread_barrier_init call. */
-  VG_USERREQ__BARRIER_INIT,
-  /* args: Addr barrier, SizeT barrier_size, Word count */
+  VG_USERREQ__PRE_BARRIER_INIT,
+  /* args: Addr barrier, BarrierT type, Word count, Bool reinit */
+  /* To notify the drd tool of a pthread_barrier_init call. */
+  VG_USERREQ__POST_BARRIER_INIT,
+  /* args: Addr barrier, BarrierT type */
   /* To notify the drd tool of a pthread_barrier_destroy call. */
-  VG_USERREQ__BARRIER_DESTROY,
-  /* args: Addr barrier */
+  VG_USERREQ__PRE_BARRIER_DESTROY,
+  /* args: Addr barrier, BarrierT type. */
+  /* To notify the drd tool of a pthread_barrier_destroy call. */
+  VG_USERREQ__POST_BARRIER_DESTROY,
+  /* args: Addr barrier, BarrierT type. */
   /* To notify the drd tool of a pthread_barrier_wait call. */
   VG_USERREQ__PRE_BARRIER_WAIT,
-  /* args: Addr barrier */
+  /* args: Addr barrier, BarrierT type. */
   /* To notify the drd tool of a pthread_barrier_wait call. */
   VG_USERREQ__POST_BARRIER_WAIT,
-  /* args: Addr barrier, Word has_waited */
+  /* args: Addr barrier, BarrierT type, Word has_waited */
 
   /* To notify the drd tool of a pthread_rwlock_init call. */
   VG_USERREQ__PRE_RWLOCK_INIT,
-  /* args: Addr rwlock, SizeT size */
+  /* args: Addr rwlock */
   /* To notify the drd tool of a pthread_rwlock_destroy call. */
   VG_USERREQ__POST_RWLOCK_DESTROY,
-  /* args: Addr rwlock, SizeT size */
+  /* args: Addr rwlock */
   /* To notify the drd tool of a pthread_rwlock_rdlock call. */
   VG_USERREQ__PRE_RWLOCK_RDLOCK,
-  /* args: Addr rwlock, SizeT size */
+  /* args: Addr rwlock */
   /* To notify the drd tool of a pthread_rwlock_rdlock call. */
   VG_USERREQ__POST_RWLOCK_RDLOCK,
   /* args: Addr rwlock, Bool took_lock */
   /* To notify the drd tool of a pthread_rwlock_wrlock call. */
   VG_USERREQ__PRE_RWLOCK_WRLOCK,
-  /* args: Addr rwlock, SizeT size */
+  /* args: Addr rwlock */
   /* To notify the drd tool of a pthread_rwlock_wrlock call. */
   VG_USERREQ__POST_RWLOCK_WRLOCK,
   /* args: Addr rwlock, Bool took_lock */
   /* To notify the drd tool of a pthread_rwlock_unlock call. */
   VG_USERREQ__PRE_RWLOCK_UNLOCK,
+  /* args: Addr rwlock */
+  /* To notify the drd tool of a pthread_rwlock_unlock call. */
+  VG_USERREQ__POST_RWLOCK_UNLOCK,
   /* args: Addr rwlock, Bool unlocked */
 
 };
@@ -145,5 +169,10 @@
    mutex_type_spinlock         = 4,
 } MutexT;
 
+typedef enum
+  {
+    pthread_barrier = 1,
+    gomp_barrier = 2,
+  } BarrierT;
 
 #endif //  __DRD_CLIENTREQ_H
diff --git a/exp-drd/drd_cond.c b/exp-drd/drd_cond.c
index 4696917..62e8089 100644
--- a/exp-drd/drd_cond.c
+++ b/exp-drd/drd_cond.c
@@ -54,12 +54,10 @@
 }
 
 static
-void cond_initialize(struct cond_info* const p, const Addr cond,
-                     const SizeT size)
+void cond_initialize(struct cond_info* const p, const Addr cond)
 {
   tl_assert(cond != 0);
   tl_assert(p->a1         == cond);
-  tl_assert(p->a2 - p->a1 == size);
   tl_assert(p->type       == ClientCondvar);
 
   p->cleanup      = (void(*)(DrdClientobj*))cond_cleanup;
@@ -90,8 +88,7 @@
   }
 }
 
-static struct cond_info*
-cond_get_or_allocate(const Addr cond, const SizeT size)
+static struct cond_info* cond_get_or_allocate(const Addr cond)
 {
   struct cond_info *p;
 
@@ -99,8 +96,8 @@
   p = &clientobj_get(cond, ClientCondvar)->cond;
   if (p == 0)
   {
-    p = &clientobj_add(cond, cond + size, ClientCondvar)->cond;
-    cond_initialize(p, cond, size);
+    p = &clientobj_add(cond, ClientCondvar)->cond;
+    cond_initialize(p, cond);
   }
   return p;
 }
@@ -112,7 +109,7 @@
 }
 
 /** Called before pthread_cond_init(). */
-void cond_pre_init(const Addr cond, const SizeT size)
+void cond_pre_init(const Addr cond)
 {
   struct cond_info* p;
 
@@ -125,8 +122,6 @@
                  cond);
   }
 
-  tl_assert(size > 0);
-
   p = cond_get(cond);
 
   if (p)
@@ -139,7 +134,7 @@
                             &cei);
   }
 
-  p = cond_get_or_allocate(cond, size);
+  p = cond_get_or_allocate(cond);
 }
 
 /** Called after pthread_cond_destroy(). */
@@ -183,7 +178,7 @@
 }
 
 /** Called before pthread_cond_wait(). */
-int cond_pre_wait(const Addr cond, const SizeT cond_size, const Addr mutex)
+int cond_pre_wait(const Addr cond, const Addr mutex)
 {
   struct cond_info* p;
 
@@ -196,7 +191,7 @@
                  cond);
   }
 
-  p = cond_get_or_allocate(cond, cond_size);
+  p = cond_get_or_allocate(cond);
   tl_assert(p);
 
   if (p->waiter_count == 0)
diff --git a/exp-drd/drd_cond.h b/exp-drd/drd_cond.h
index 03db2a5..23880f5 100644
--- a/exp-drd/drd_cond.h
+++ b/exp-drd/drd_cond.h
@@ -32,16 +32,16 @@
 
 
 #include "drd_thread.h"      // DrdThreadid
-#include "pub_tool_basics.h" // Addr, SizeT
+#include "pub_tool_basics.h" // Addr
 
 
 struct cond_info;
 
 
 void cond_set_trace(const Bool trace_cond);
-void cond_pre_init(const Addr cond, const SizeT size);
+void cond_pre_init(const Addr cond);
 void cond_post_destroy(const Addr cond);
-int cond_pre_wait(const Addr cond, const SizeT cond_size, const Addr mutex);
+int cond_pre_wait(const Addr cond, const Addr mutex);
 int cond_post_wait(const Addr cond);
 void cond_pre_signal(const Addr cond);
 void cond_pre_broadcast(const Addr cond);
diff --git a/exp-drd/drd_error.c b/exp-drd/drd_error.c
index fdf9b0e..cc1923b 100644
--- a/exp-drd/drd_error.c
+++ b/exp-drd/drd_error.c
@@ -249,8 +249,9 @@
    else if (ai.akind == eMallocd && ai.lastchange)
    {
       VG_(message)(Vg_UserMsg,
-                   "Address 0x%08lx is at offset %ld. Allocation context:",
-                   dri->addr, ai.rwoffset);
+                   "Address 0x%lx is at offset %ld from 0x%lx."
+                   " Allocation context:",
+                   dri->addr, ai.rwoffset, dri->addr - ai.rwoffset);
       VG_(pp_ExeContext)(ai.lastchange);
    }
    else
diff --git a/exp-drd/drd_gomp_intercepts.c b/exp-drd/drd_gomp_intercepts.c
new file mode 100644
index 0000000..e8c8db2
--- /dev/null
+++ b/exp-drd/drd_gomp_intercepts.c
@@ -0,0 +1,118 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Client-space code for drd.             drd_gomp_intercepts.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+  This file is part of drd, a data race detector.
+
+  Copyright (C) 2006-2008 Bart Van Assche
+  bart.vanassche@gmail.com
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+  02111-1307, USA.
+
+  The GNU General Public License is contained in the file COPYING.
+*/
+
+/* ---------------------------------------------------------------------
+   ALL THE CODE IN THIS FILE RUNS ON THE SIMULATED CPU. 
+
+   These functions are not called directly - they're the targets of code
+   redirection or load notifications (see pub_core_redir.h for info).
+   They're named weirdly so that the intercept code can find them when the
+   shared object is initially loaded.
+
+   Note that this filename has the "drd_" prefix because it can appear
+   in stack traces, and the "drd_" makes it a little clearer that it
+   originates from Valgrind.
+   ------------------------------------------------------------------ */
+
+#include <assert.h>
+#include "drd_clientreq.h"
+#include "pub_tool_redir.h"
+
+
+// Defines.
+
+#define GOMP_FUNC(ret_ty, f, args...)                   \
+  ret_ty VG_WRAP_FUNCTION_ZZ(libgompZdsoZd1Za,f)(args); \
+  ret_ty VG_WRAP_FUNCTION_ZZ(libgompZdsoZd1Za,f)(args)
+
+
+// Type definitions
+
+typedef void* gomp_barrier_t;
+
+
+// Function definitions.
+
+GOMP_FUNC(void, gompZubarrierZuinit, // gomp_barrier_init
+          gomp_barrier_t* barrier, unsigned count)
+{
+  int    ret;
+  int    res;
+  OrigFn fn;
+
+  VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_BARRIER_INIT,
+                             barrier, gomp_barrier, count, 0, 0);
+  VALGRIND_GET_ORIG_FN(fn);
+  CALL_FN_W_WW(ret, fn, barrier, count);
+  VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_BARRIER_INIT,
+                             barrier, gomp_barrier, 0, 0, 0);
+}
+
+GOMP_FUNC(void, gompZubarrierZureinit, // gomp_barrier_reinit
+          gomp_barrier_t* barrier, unsigned count)
+{
+  int    ret;
+  int    res;
+  OrigFn fn;
+  VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_BARRIER_INIT,
+                             barrier, gomp_barrier, count, 1, 0);
+  VALGRIND_GET_ORIG_FN(fn);
+  CALL_FN_W_WW(ret, fn, barrier, count);
+  VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_BARRIER_INIT,
+                             barrier, gomp_barrier, 0, 0, 0);
+}
+
+GOMP_FUNC(void, gompZubarrierZudestroy, // gomp_barrier_destroy
+          gomp_barrier_t* barrier)
+{
+  int    ret;
+  int    res;
+  OrigFn fn;
+  VALGRIND_GET_ORIG_FN(fn);
+  VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_BARRIER_DESTROY,
+                             barrier, gomp_barrier,
+                             0, 0, 0);
+  CALL_FN_W_W(ret, fn, barrier);
+  VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_BARRIER_DESTROY,
+                             barrier, gomp_barrier, 0, 0, 0);
+}
+
+GOMP_FUNC(void, gompZubarrierZuwait, // gomp_barrier_wait
+          gomp_barrier_t* barrier)
+{
+  int    ret;
+  int    res;
+  OrigFn fn;
+  VALGRIND_GET_ORIG_FN(fn);
+  VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_BARRIER_WAIT,
+                             barrier, gomp_barrier, 0, 0, 0);
+  CALL_FN_W_W(ret, fn, barrier);
+  VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_BARRIER_WAIT,
+                             barrier, gomp_barrier, 1, 0, 0);
+}
diff --git a/exp-drd/drd_main.c b/exp-drd/drd_main.c
index 1f77733..35e3cef 100644
--- a/exp-drd/drd_main.c
+++ b/exp-drd/drd_main.c
@@ -156,6 +156,9 @@
    tl_assert(thread_get_running_tid()
              == VgThreadIdToDrdThreadId(VG_(get_running_tid())));
 
+   if (! thread_is_recording(thread_get_running_tid()))
+      return;
+
 #if 1
    if (drd_trace_mem || (addr == drd_trace_address))
    {
@@ -199,6 +202,9 @@
    tl_assert(thread_get_running_tid()
              == VgThreadIdToDrdThreadId(VG_(get_running_tid())));
 
+   if (! thread_is_recording(thread_get_running_tid()))
+      return;
+
 #if 1
    if (drd_trace_mem || (addr == drd_trace_address))
    {
@@ -230,7 +236,7 @@
       VG_(maybe_record_error)(VG_(get_running_tid)(),
                               DataRaceErr,
                               VG_(get_IP)(VG_(get_running_tid)()),
-                                    "Conflicting accesses",
+                              "Conflicting accesses",
                               &drei);
    }
 }
@@ -452,10 +458,9 @@
    thread_finished(drd_tid);
 }
 
-void drd_pre_mutex_init(const Addr mutex, const SizeT size,
-			const MutexT mutex_type)
+void drd_pre_mutex_init(const Addr mutex, const MutexT mutex_type)
 {
-   mutex_init(mutex, size, mutex_type);
+   mutex_init(mutex, mutex_type);
 }
 
 void drd_post_mutex_destroy(const Addr mutex, const MutexT mutex_type)
@@ -463,11 +468,9 @@
    mutex_post_destroy(mutex);
 }
 
-void drd_pre_mutex_lock(const Addr mutex,
-                        const SizeT size,
-                        const MutexT mutex_type)
+void drd_pre_mutex_lock(const Addr mutex, const MutexT mutex_type)
 {
-   mutex_pre_lock(mutex, size, mutex_type);
+   mutex_pre_lock(mutex, mutex_type);
 }
 
 void drd_post_mutex_lock(const Addr mutex, const Bool took_lock)
@@ -480,9 +483,9 @@
    mutex_unlock(mutex, mutex_type);
 }
 
-void drd_pre_cond_init(Addr cond, SizeT s)
+void drd_pre_cond_init(Addr cond)
 {
-   cond_pre_init(cond, s);
+   cond_pre_init(cond);
 }
 
 void drd_post_cond_destroy(Addr cond)
@@ -490,10 +493,10 @@
    cond_post_destroy(cond);
 }
 
-void drd_semaphore_init(const Addr semaphore, const SizeT size,
+void drd_semaphore_init(const Addr semaphore,
                         const Word pshared, const Word value)
 {
-   semaphore_init(semaphore, size, pshared, value);
+   semaphore_init(semaphore, pshared, value);
 }
 
 void drd_semaphore_destroy(const Addr semaphore)
@@ -501,10 +504,9 @@
    semaphore_destroy(semaphore);
 }
 
-void drd_semaphore_pre_wait(const DrdThreadId tid, const Addr semaphore,
-                            const SizeT size)
+void drd_semaphore_pre_wait(const DrdThreadId tid, const Addr semaphore)
 {
-   semaphore_pre_wait(semaphore, size);
+   semaphore_pre_wait(semaphore);
 }
 
 void drd_semaphore_post_wait(const DrdThreadId tid, const Addr semaphore,
@@ -513,38 +515,40 @@
    semaphore_post_wait(tid, semaphore, waited);
 }
 
-void drd_semaphore_pre_post(const DrdThreadId tid, const Addr semaphore,
-                            const SizeT size)
+void drd_semaphore_pre_post(const DrdThreadId tid, const Addr semaphore)
 {
-   semaphore_pre_post(tid, semaphore, size);
+   semaphore_pre_post(tid, semaphore);
 }
 
 void drd_semaphore_post_post(const DrdThreadId tid, const Addr semaphore,
-                             const SizeT size, const Bool waited)
+                             const Bool waited)
 {
-   semaphore_post_post(tid, semaphore, size, waited);
+   semaphore_post_post(tid, semaphore, waited);
 }
 
 
-void drd_barrier_init(const Addr barrier, const SizeT size, const Word count)
+void drd_barrier_init(const Addr barrier,
+                      const BarrierT barrier_type, const Word count,
+                      const Bool reinitialization)
 {
-   barrier_init(barrier, size, count);
+   barrier_init(barrier, barrier_type, count, reinitialization);
 }
 
-void drd_barrier_destroy(const Addr barrier)
+void drd_barrier_destroy(const Addr barrier, const BarrierT barrier_type)
 {
-   barrier_destroy(barrier);
+   barrier_destroy(barrier, barrier_type);
 }
 
-void drd_barrier_pre_wait(const DrdThreadId tid, const Addr barrier)
+void drd_barrier_pre_wait(const DrdThreadId tid, const Addr barrier,
+                          const BarrierT barrier_type)
 {
-   barrier_pre_wait(tid, barrier);
+   barrier_pre_wait(tid, barrier, barrier_type);
 }
 
 void drd_barrier_post_wait(const DrdThreadId tid, const Addr barrier,
-                           const Bool waited)
+                           const BarrierT barrier_type, const Bool waited)
 {
-   barrier_post_wait(tid, barrier, waited);
+   barrier_post_wait(tid, barrier, barrier_type, waited);
 }
 
 
@@ -596,7 +600,7 @@
       {
       case Ist_IMark:
          instrument = VG_(seginfo_sect_kind)(NULL, 0, st->Ist.IMark.addr)
-                      != Vg_SectPLT;
+            != Vg_SectPLT;
          break;
 
       case Ist_MBE:
@@ -623,7 +627,7 @@
          {
             addr_expr = st->Ist.Store.addr;
             size_expr = mkIRExpr_HWord( 
-                   sizeofIRType(typeOfIRExpr(bb->tyenv, st->Ist.Store.data)));
+                                       sizeofIRType(typeOfIRExpr(bb->tyenv, st->Ist.Store.data)));
             argv = mkIRExprVec_2(addr_expr, size_expr);
             di = unsafeIRDirty_0_N(/*regparms*/2, 
                                    "drd_trace_store",
diff --git a/exp-drd/drd_mutex.c b/exp-drd/drd_mutex.c
index 55598ae..df79cbe 100644
--- a/exp-drd/drd_mutex.c
+++ b/exp-drd/drd_mutex.c
@@ -57,15 +57,11 @@
 
 static
 void mutex_initialize(struct mutex_info* const p,
-                      const Addr mutex,
-                      const SizeT size,
-                      const MutexT mutex_type)
+                      const Addr mutex, const MutexT mutex_type)
 {
   tl_assert(mutex != 0);
-  tl_assert(size > 0);
 
   tl_assert(p->a1 == mutex);
-  tl_assert(p->a2 == mutex + size);
   p->cleanup         = (void(*)(DrdClientobj*))&mutex_cleanup;
   p->mutex_type      = mutex_type;
   p->recursion_count = 0;
@@ -101,9 +97,7 @@
 
 static
 struct mutex_info*
-mutex_get_or_allocate(const Addr mutex,
-                      const SizeT size,
-                      const MutexT mutex_type)
+mutex_get_or_allocate(const Addr mutex, const MutexT mutex_type)
 {
   struct mutex_info* p;
 
@@ -111,12 +105,10 @@
   p = &clientobj_get(mutex, ClientMutex)->mutex;
   if (p)
   {
-    tl_assert(p->mutex_type == mutex_type);
-    tl_assert(p->a2 - p->a1 == size);
     return p;
   }
 
-  if (clientobj_present(mutex, mutex + size))
+  if (clientobj_present(mutex, mutex + 1))
   {
     GenericErrInfo GEI;
     VG_(maybe_record_error)(VG_(get_running_tid)(),
@@ -127,8 +119,8 @@
     return 0;
   }
 
-  p = &clientobj_add(mutex, mutex + size, ClientMutex)->mutex;
-  mutex_initialize(p, mutex, size, mutex_type);
+  p = &clientobj_add(mutex, ClientMutex)->mutex;
+  mutex_initialize(p, mutex, mutex_type);
   return p;
 }
 
@@ -140,7 +132,7 @@
 
 /** Called before pthread_mutex_init(). */
 struct mutex_info*
-mutex_init(const Addr mutex, const SizeT size, const MutexT mutex_type)
+mutex_init(const Addr mutex, const MutexT mutex_type)
 {
   struct mutex_info* p;
 
@@ -178,7 +170,7 @@
                             &MEI);
     return p;
   }
-  p = mutex_get_or_allocate(mutex, size, mutex_type);
+  p = mutex_get_or_allocate(mutex, mutex_type);
 
   return p;
 }
@@ -208,11 +200,13 @@
  *  an attempt is made to lock recursively a synchronization object that must
  *  not be locked recursively.
  */
-void mutex_pre_lock(const Addr mutex, const SizeT size, MutexT mutex_type)
+void mutex_pre_lock(const Addr mutex, MutexT mutex_type)
 {
   struct mutex_info* p;
 
-  p = mutex_get(mutex);
+  p = mutex_get_or_allocate(mutex, mutex_type);
+
+  tl_assert(p);
 
   if (s_trace_mutex)
   {
@@ -220,10 +214,10 @@
                  "[%d/%d] pre_mutex_lock  %s 0x%lx rc %d owner %d",
                  VG_(get_running_tid)(),
                  thread_get_running_tid(),
-                 p ? mutex_get_typename(p) : "(?)",
+                 mutex_get_typename(p),
                  mutex,
-                 p ? p->recursion_count : 0,
-                 p ? p->owner : VG_INVALID_THREADID);
+                 p->recursion_count,
+                 p->owner);
   }
 
   if (mutex_type == mutex_type_invalid_mutex)
@@ -237,13 +231,6 @@
     return;
   }
 
-  if (p == 0)
-  {
-    p = mutex_init(mutex, size, mutex_type);
-  }
-
-  tl_assert(p);
-
   if (p->owner == thread_get_running_tid()
       && p->recursion_count >= 1
       && mutex_type != mutex_type_recursive_mutex)
diff --git a/exp-drd/drd_mutex.h b/exp-drd/drd_mutex.h
index 27cc5d0..204388c 100644
--- a/exp-drd/drd_mutex.h
+++ b/exp-drd/drd_mutex.h
@@ -33,18 +33,18 @@
 #include "drd_clientreq.h"        // MutexT
 #include "drd_thread.h"           // DrdThreadId
 #include "drd_vc.h"
-#include "pub_tool_basics.h"      // Addr, SizeT
+#include "pub_tool_basics.h"      // Addr
 
 
 struct mutex_info;
 
 
 void mutex_set_trace(const Bool trace_mutex);
-struct mutex_info* mutex_init(const Addr mutex, const SizeT size,
+struct mutex_info* mutex_init(const Addr mutex,
                               const MutexT mutex_type);
 void mutex_post_destroy(const Addr mutex);
 struct mutex_info* mutex_get(const Addr mutex);
-void mutex_pre_lock(const Addr mutex, const SizeT size,
+void mutex_pre_lock(const Addr mutex,
                     const MutexT mutex_type);
 void mutex_post_lock(const Addr mutex, const Bool took_lock);
 void mutex_unlock(const Addr mutex, const MutexT mutex_type);
diff --git a/exp-drd/drd_pthread_intercepts.c b/exp-drd/drd_pthread_intercepts.c
index e41d51d..0d132cb 100644
--- a/exp-drd/drd_pthread_intercepts.c
+++ b/exp-drd/drd_pthread_intercepts.c
@@ -321,14 +321,16 @@
    int ret;
    int res;
    OrigFn fn;
-   int mt = PTHREAD_MUTEX_DEFAULT;
+   int mt;
    VALGRIND_GET_ORIG_FN(fn);
+   mt = PTHREAD_MUTEX_DEFAULT;
    if (attr)
       pthread_mutexattr_gettype(attr, &mt);
    VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_MUTEX_INIT,
-                              mutex, sizeof(*mutex),
-                              pthread_to_drd_mutex_type(mt), 0, 0);
+                              mutex, pthread_to_drd_mutex_type(mt), 0, 0, 0);
    CALL_FN_W_WW(ret, fn, mutex, attr);
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_MUTEX_INIT,
+                              mutex, 0, 0, 0, 0);
    return ret;
 }
 
@@ -340,6 +342,8 @@
    int res;
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_MUTEX_DESTROY,
+                              mutex, 0, 0, 0, 0);
    CALL_FN_W_W(ret, fn, mutex);
    VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_MUTEX_DESTROY,
                               mutex, mutex_type(mutex), 0, 0, 0);
@@ -354,10 +358,10 @@
    int   res;
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
-   VALGRIND_DO_CLIENT_REQUEST(res, 0, VG_USERREQ__PRE_PTHREAD_MUTEX_LOCK,
-                              mutex, sizeof(*mutex), mutex_type(mutex), 0, 0);
+   VALGRIND_DO_CLIENT_REQUEST(res, 0, VG_USERREQ__PRE_MUTEX_LOCK,
+                              mutex, mutex_type(mutex), 0, 0, 0);
    CALL_FN_W_W(ret, fn, mutex);
-   VALGRIND_DO_CLIENT_REQUEST(res, 0, VG_USERREQ__POST_PTHREAD_MUTEX_LOCK,
+   VALGRIND_DO_CLIENT_REQUEST(res, 0, VG_USERREQ__POST_MUTEX_LOCK,
                               mutex, ret == 0, 0, 0, 0);
    return ret;
 }
@@ -370,10 +374,10 @@
    int   res;
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
-   VALGRIND_DO_CLIENT_REQUEST(res, 0, VG_USERREQ__PRE_PTHREAD_MUTEX_LOCK,
-                              mutex, sizeof(*mutex), mutex_type(mutex), 0, 0);
+   VALGRIND_DO_CLIENT_REQUEST(res, 0, VG_USERREQ__PRE_MUTEX_LOCK,
+                              mutex, mutex_type(mutex), 0, 0, 0);
    CALL_FN_W_W(ret, fn, mutex);
-   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_PTHREAD_MUTEX_LOCK,
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_MUTEX_LOCK,
                               mutex, ret == 0, 0, 0, 0);
    return ret;
 }
@@ -387,10 +391,10 @@
    int   res;
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
-   VALGRIND_DO_CLIENT_REQUEST(res, 0, VG_USERREQ__PRE_PTHREAD_MUTEX_LOCK,
-                              mutex, sizeof(*mutex), mutex_type(mutex), 0, 0);
+   VALGRIND_DO_CLIENT_REQUEST(res, 0, VG_USERREQ__PRE_MUTEX_LOCK,
+                              mutex, mutex_type(mutex), 0, 0, 0);
    CALL_FN_W_WW(ret, fn, mutex, abs_timeout);
-   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_PTHREAD_MUTEX_LOCK,
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_MUTEX_LOCK,
                               mutex, ret == 0, 0, 0, 0);
    return ret;
 }
@@ -404,9 +408,12 @@
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
    VALGRIND_DO_CLIENT_REQUEST(res, -1,
-                              VG_USERREQ__PRE_PTHREAD_MUTEX_UNLOCK,
-                              mutex, sizeof(*mutex), mutex_type(mutex), 0, 0);
+                              VG_USERREQ__PRE_MUTEX_UNLOCK,
+                              mutex, mutex_type(mutex), 0, 0, 0);
    CALL_FN_W_W(ret, fn, mutex);
+   VALGRIND_DO_CLIENT_REQUEST(res, -1,
+                              VG_USERREQ__POST_MUTEX_UNLOCK,
+                              mutex, 0, 0, 0, 0);
    return ret;
 }
 
@@ -419,8 +426,8 @@
    int res;
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
-   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_PTHREAD_COND_INIT,
-                              cond, sizeof(*cond), 0, 0, 0);
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_COND_INIT,
+                              cond, 0, 0, 0, 0);
    CALL_FN_W_WW(ret, fn, cond, attr);
    return ret;
 }
@@ -434,7 +441,7 @@
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
    CALL_FN_W_W(ret, fn, cond);
-   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_PTHREAD_COND_DESTROY,
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_COND_DESTROY,
                               cond, 0, 0, 0, 0);
    return ret;
 }
@@ -448,11 +455,10 @@
    int   res;
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
-   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_PTHREAD_COND_WAIT,
-                              cond, sizeof(*cond), mutex, mutex_type(mutex),
-                              0);
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_COND_WAIT,
+                              cond, mutex, mutex_type(mutex), 0, 0);
    CALL_FN_W_WW(ret, fn, cond, mutex);
-   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_PTHREAD_COND_WAIT,
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_COND_WAIT,
                               cond, mutex, ret == 0, 0, 0);
    return ret;
 }
@@ -467,11 +473,10 @@
    int   res;
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
-   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_PTHREAD_COND_WAIT,
-                              cond, sizeof(*cond), mutex, mutex_type(mutex),
-                              0);
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_COND_WAIT,
+                              cond, mutex, mutex_type(mutex), 0, 0);
    CALL_FN_W_WWW(ret, fn, cond, mutex, abstime);
-   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_PTHREAD_COND_WAIT,
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_COND_WAIT,
                               cond, mutex, ret == 0, 0, 0);
    return ret;
 }
@@ -484,7 +489,7 @@
    int   res;
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
-   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_PTHREAD_COND_SIGNAL,
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_COND_SIGNAL,
                               cond, 0, 0, 0, 0);
    CALL_FN_W_W(ret, fn, cond);
    return ret;
@@ -498,7 +503,7 @@
    int   res;
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
-   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_PTHREAD_COND_BROADCAST,
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_COND_BROADCAST,
                               cond, 0, 0, 0, 0);
    CALL_FN_W_W(ret, fn, cond);
    return ret;
@@ -515,8 +520,7 @@
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
    VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__SPIN_INIT_OR_UNLOCK,
-                              spinlock, sizeof(*spinlock),
-                              mutex_type_spinlock, 0, 0);
+                              spinlock, mutex_type_spinlock, 0, 0, 0);
    CALL_FN_W_WW(ret, fn, spinlock, pshared);
    return ret;
 }
@@ -543,11 +547,10 @@
    int   res;
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
-   VALGRIND_DO_CLIENT_REQUEST(res, 0, VG_USERREQ__PRE_PTHREAD_MUTEX_LOCK,
-                              spinlock, sizeof(*spinlock), mutex_type_spinlock,
-                              0, 0);
+   VALGRIND_DO_CLIENT_REQUEST(res, 0, VG_USERREQ__PRE_MUTEX_LOCK,
+                              spinlock, mutex_type_spinlock, 0, 0, 0);
    CALL_FN_W_W(ret, fn, spinlock);
-   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_PTHREAD_MUTEX_LOCK,
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_MUTEX_LOCK,
                               spinlock, ret == 0, 0, 0, 0);
    return ret;
 }
@@ -560,11 +563,10 @@
    int   res;
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
-   VALGRIND_DO_CLIENT_REQUEST(res, 0, VG_USERREQ__PRE_PTHREAD_MUTEX_LOCK,
-                              spinlock, sizeof(*spinlock), mutex_type_spinlock,
-                              0, 0);
+   VALGRIND_DO_CLIENT_REQUEST(res, 0, VG_USERREQ__PRE_MUTEX_LOCK,
+                              spinlock, mutex_type_spinlock, 0, 0, 0);
    CALL_FN_W_W(ret, fn, spinlock);
-   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_PTHREAD_MUTEX_LOCK,
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_MUTEX_LOCK,
                               spinlock, ret == 0, 0, 0, 0);
    return ret;
 }
@@ -578,8 +580,7 @@
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
    VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__SPIN_INIT_OR_UNLOCK,
-                              spinlock, sizeof(*spinlock),
-                              mutex_type_spinlock, 0, 0);
+                              spinlock, mutex_type_spinlock, 0, 0, 0);
    CALL_FN_W_W(ret, fn, spinlock);
    return ret;
 }
@@ -594,10 +595,11 @@
    int   res;
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
-   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__BARRIER_INIT,
-                              barrier, sizeof(*barrier),
-                              count, 0, 0);
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_BARRIER_INIT,
+                              barrier, pthread_barrier, count, 0, 0);
    CALL_FN_W_WWW(ret, fn, barrier, attr, count);
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_BARRIER_INIT,
+                              barrier, pthread_barrier, 0, 0, 0);
    return ret;
 }
 
@@ -609,9 +611,11 @@
    int   res;
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_BARRIER_DESTROY,
+                              barrier, pthread_barrier, 0, 0, 0);
    CALL_FN_W_W(ret, fn, barrier);
-   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__BARRIER_DESTROY,
-                              barrier, 0, 0, 0, 0);
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_BARRIER_DESTROY,
+                              barrier, pthread_barrier, 0, 0, 0);
    return ret;
 }
 
@@ -624,22 +628,19 @@
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
    VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_BARRIER_WAIT,
-                              barrier, 0, 0, 0, 0);
+                              barrier, pthread_barrier, 0, 0, 0);
    CALL_FN_W_W(ret, fn, barrier);
    VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_BARRIER_WAIT,
-                              barrier,
+                              barrier, pthread_barrier,
                               ret == 0 || ret == PTHREAD_BARRIER_SERIAL_THREAD,
-                              0, 0, 0);
+                              0, 0);
    return ret;
 }
 
 
-// From glibc 2.0 linuxthreads/sysdeps/pthread/cmpxchg/semaphorebits.h
-typedef struct { long int sem_status; } sem_t_glibc_2_0;
-
 // sem_init
 PTH_FUNC(int, semZuinitZAGLIBCZu2Zd0, // sem_init@GLIBC_2.0
-              sem_t_glibc_2_0 *sem,
+              sem_t *sem,
               int pshared,
               unsigned int value)
 {
@@ -647,13 +648,11 @@
    int   res;
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_SEM_INIT,
+                              sem, pshared, value, 0, 0);
    CALL_FN_W_WWW(ret, fn, sem, pshared, value);
-   if (ret == 0)
-   {
-      VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__SEM_INIT,
-                                 sem, sizeof(*sem),
-                                 pshared, value, 0);
-   }
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_SEM_INIT,
+                              sem, 0, 0, 0, 0);
    return ret;
 }
 
@@ -666,30 +665,27 @@
    int   res;
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_SEM_INIT,
+                              sem, pshared, value, 0, 0);
    CALL_FN_W_WWW(ret, fn, sem, pshared, value);
-   if (ret == 0)
-   {
-      VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__SEM_INIT,
-                                 sem, sizeof(*sem),
-                                 pshared, value, 0);
-   }
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_SEM_INIT,
+                              sem, 0, 0, 0, 0);
    return ret;
 }
 
 // sem_destroy
 PTH_FUNC(int, semZudestroyZAGLIBCZu2Zd0, // sem_destroy@GLIBC_2.0
-              sem_t_glibc_2_0 *sem)
+              sem_t *sem)
 {
    int   ret;
    int   res;
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_SEM_DESTROY,
+                              sem, 0, 0, 0, 0);
    CALL_FN_W_W(ret, fn, sem);
-   if (ret == 0)
-   {
-      VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__SEM_DESTROY,
-                                 sem, 0, 0, 0, 0);
-   }
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_SEM_DESTROY,
+                              sem, 0, 0, 0, 0);
    return ret;
 }
 
@@ -700,18 +696,17 @@
    int   res;
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_SEM_DESTROY,
+                              sem, 0, 0, 0, 0);
    CALL_FN_W_W(ret, fn, sem);
-   if (ret == 0)
-   {
-      VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__SEM_DESTROY,
-                                 sem, 0, 0, 0, 0);
-   }
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_SEM_DESTROY,
+                              sem, 0, 0, 0, 0);
    return ret;
 }
 
 // sem_wait
 PTH_FUNC(int, semZuwaitZAGLIBCZu2Zd0, // sem_wait@GLIBC_2.0
-              sem_t_glibc_2_0 *sem)
+              sem_t *sem)
 {
    int   ret;
    int   res;
@@ -743,7 +738,7 @@
 
 // sem_trywait
 PTH_FUNC(int, semZutrywaitZAGLIBCZu2Zd0, // sem_trywait@GLIBC_2.0
-              sem_t_glibc_2_0 *sem)
+              sem_t *sem)
 {
    int   ret;
    int   res;
@@ -790,17 +785,17 @@
 
 // sem_post
 PTH_FUNC(int, semZupostZAGLIBCZu2Zd0, // sem_post@GLIBC_2.0
-              sem_t_glibc_2_0 *sem)
+              sem_t *sem)
 {
    int   ret;
    int   res;
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
    VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_SEM_POST,
-                              sem, sizeof(*sem), 0, 0, 0);
+                              sem, 0, 0, 0, 0);
    CALL_FN_W_W(ret, fn, sem);
    VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_SEM_POST,
-                              sem, sizeof(*sem), ret == 0, 0, 0);
+                              sem, ret == 0, 0, 0, 0);
    return ret;
 }
 
@@ -813,10 +808,10 @@
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
    VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_SEM_POST,
-                              sem, sizeof(*sem), 0, 0, 0);
+                              sem, 0, 0, 0, 0);
    CALL_FN_W_W(ret, fn, sem);
    VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_SEM_POST,
-                              sem, sizeof(*sem), ret == 0, 0, 0);
+                              sem, ret == 0, 0, 0, 0);
    return ret;
 }
 
@@ -831,7 +826,7 @@
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
    VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_RWLOCK_INIT,
-                              rwlock, sizeof(*rwlock), 0, 0, 0);
+                              rwlock, 0, 0, 0, 0);
    CALL_FN_W_WW(ret, fn, rwlock, attr);
    return ret;
 }
@@ -861,7 +856,7 @@
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
    VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_RWLOCK_RDLOCK,
-                              rwlock, sizeof(*rwlock), 0, 0, 0);
+                              rwlock, 0, 0, 0, 0);
    CALL_FN_W_W(ret, fn, rwlock);
    VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_RWLOCK_RDLOCK,
                               rwlock, ret == 0, 0, 0, 0);
@@ -878,7 +873,7 @@
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
    VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_RWLOCK_WRLOCK,
-                              rwlock, sizeof(*rwlock), 0, 0, 0);
+                              rwlock, 0, 0, 0, 0);
    CALL_FN_W_W(ret, fn, rwlock);
    VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_RWLOCK_WRLOCK,
                               rwlock, ret == 0, 0, 0, 0);
@@ -895,7 +890,7 @@
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
    VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_RWLOCK_RDLOCK,
-                              rwlock, sizeof(*rwlock), 0, 0, 0);
+                              rwlock, 0, 0, 0, 0);
    CALL_FN_W_W(ret, fn, rwlock);
    VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_RWLOCK_RDLOCK,
                               rwlock, ret == 0, 0, 0, 0);
@@ -912,7 +907,7 @@
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
    VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_RWLOCK_WRLOCK,
-                              rwlock, sizeof(*rwlock), 0, 0, 0);
+                              rwlock, 0, 0, 0, 0);
    CALL_FN_W_W(ret, fn, rwlock);
    VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_RWLOCK_WRLOCK,
                               rwlock, ret == 0, 0, 0, 0);
@@ -929,7 +924,7 @@
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
    VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_RWLOCK_RDLOCK,
-                              rwlock, sizeof(*rwlock), 0, 0, 0);
+                              rwlock, 0, 0, 0, 0);
    CALL_FN_W_W(ret, fn, rwlock);
    VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_RWLOCK_RDLOCK,
                               rwlock, ret == 0, 0, 0, 0);
@@ -946,7 +941,7 @@
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
    VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_RWLOCK_WRLOCK,
-                              rwlock, sizeof(*rwlock), 0, 0, 0);
+                              rwlock, 0, 0, 0, 0);
    CALL_FN_W_W(ret, fn, rwlock);
    VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_RWLOCK_WRLOCK,
                               rwlock, ret == 0, 0, 0, 0);
@@ -962,8 +957,10 @@
    int   res;
    OrigFn fn;
    VALGRIND_GET_ORIG_FN(fn);
-   CALL_FN_W_W(ret, fn, rwlock);
    VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__PRE_RWLOCK_UNLOCK,
+                              rwlock, 0, 0, 0, 0);
+   CALL_FN_W_W(ret, fn, rwlock);
+   VALGRIND_DO_CLIENT_REQUEST(res, -1, VG_USERREQ__POST_RWLOCK_UNLOCK,
                               rwlock, ret == 0, 0, 0, 0);
    return ret;
 }
diff --git a/exp-drd/drd_rwlock.c b/exp-drd/drd_rwlock.c
index ade98db..d3a5ed9 100644
--- a/exp-drd/drd_rwlock.c
+++ b/exp-drd/drd_rwlock.c
@@ -154,14 +154,10 @@
 }
 
 static
-void rwlock_initialize(struct rwlock_info* const p,
-                       const Addr rwlock,
-                       const SizeT size)
+void rwlock_initialize(struct rwlock_info* const p, const Addr rwlock)
 {
   tl_assert(rwlock != 0);
-  tl_assert(size > 0);
   tl_assert(p->a1 == rwlock);
-  tl_assert(p->a2 == rwlock + size);
   tl_assert(p->type == ClientRwlock);
 
   p->cleanup     = (void(*)(DrdClientobj*))&rwlock_cleanup;
@@ -204,7 +200,7 @@
 
 static
 struct rwlock_info*
-rwlock_get_or_allocate(const Addr rwlock, const SizeT size)
+rwlock_get_or_allocate(const Addr rwlock)
 {
   struct rwlock_info* p;
 
@@ -212,11 +208,10 @@
   p = &clientobj_get(rwlock, ClientRwlock)->rwlock;
   if (p)
   {
-    tl_assert(p->a2 - p->a1 == size);
     return p;
   }
 
-  if (clientobj_present(rwlock, rwlock + size))
+  if (clientobj_present(rwlock, rwlock + 1))
   {
     GenericErrInfo GEI;
     VG_(maybe_record_error)(VG_(get_running_tid)(),
@@ -227,8 +222,8 @@
     return 0;
   }
 
-  p = &clientobj_add(rwlock, rwlock + size, ClientRwlock)->rwlock;
-  rwlock_initialize(p, rwlock, size);
+  p = &clientobj_add(rwlock, ClientRwlock)->rwlock;
+  rwlock_initialize(p, rwlock);
   return p;
 }
 
@@ -239,8 +234,7 @@
 }
 
 /** Called before pthread_rwlock_init(). */
-struct rwlock_info*
-rwlock_pre_init(const Addr rwlock, const SizeT size)
+struct rwlock_info* rwlock_pre_init(const Addr rwlock)
 {
   struct rwlock_info* p;
 
@@ -268,7 +262,7 @@
     return p;
   }
 
-  p = rwlock_get_or_allocate(rwlock, size);
+  p = rwlock_get_or_allocate(rwlock);
 
   return p;
 }
@@ -298,11 +292,11 @@
  *  an attempt is made to lock recursively a synchronization object that must
  *  not be locked recursively.
  */
-void rwlock_pre_rdlock(const Addr rwlock, const SizeT size)
+void rwlock_pre_rdlock(const Addr rwlock)
 {
   struct rwlock_info* p;
 
-  p = rwlock_get_or_allocate(rwlock, size);
+  p = rwlock_get_or_allocate(rwlock);
 
   tl_assert(p);
 
@@ -364,7 +358,7 @@
  *  an attempt is made to lock recursively a synchronization object that must
  *  not be locked recursively.
  */
-void rwlock_pre_wrlock(const Addr rwlock, const SizeT size)
+void rwlock_pre_wrlock(const Addr rwlock)
 {
   struct rwlock_info* p;
 
@@ -381,7 +375,7 @@
 
   if (p == 0)
   {
-    p = rwlock_get_or_allocate(rwlock, size);
+    p = rwlock_get_or_allocate(rwlock);
   }
 
   tl_assert(p);
diff --git a/exp-drd/drd_rwlock.h b/exp-drd/drd_rwlock.h
index b73db93..6abc203 100644
--- a/exp-drd/drd_rwlock.h
+++ b/exp-drd/drd_rwlock.h
@@ -33,18 +33,18 @@
 #include "drd_clientobj.h"        // struct rwlock_info
 #include "drd_thread.h"           // DrdThreadId
 #include "drd_vc.h"
-#include "pub_tool_basics.h"      // Addr, SizeT
+#include "pub_tool_basics.h"      // Addr
 
 
 struct rwlock_info;
 
 
 void rwlock_set_trace(const Bool trace_rwlock);
-struct rwlock_info* rwlock_pre_init(const Addr rwlock, const SizeT size);
+struct rwlock_info* rwlock_pre_init(const Addr rwlock);
 void rwlock_post_destroy(const Addr rwlock);
-void rwlock_pre_rdlock(const Addr rwlock, const SizeT size);
+void rwlock_pre_rdlock(const Addr rwlock);
 void rwlock_post_rdlock(const Addr rwlock, const Bool took_lock);
-void rwlock_pre_wrlock(const Addr rwlock, const SizeT size);
+void rwlock_pre_wrlock(const Addr rwlock);
 void rwlock_post_wrlock(const Addr rwlock, const Bool took_lock);
 void rwlock_pre_unlock(const Addr rwlock);
 void rwlock_thread_delete(const DrdThreadId tid);
diff --git a/exp-drd/drd_semaphore.c b/exp-drd/drd_semaphore.c
index 48b6f91..607abe9 100644
--- a/exp-drd/drd_semaphore.c
+++ b/exp-drd/drd_semaphore.c
@@ -54,14 +54,10 @@
 
 static
 void semaphore_initialize(struct semaphore_info* const p,
-                          const Addr semaphore,
-                          const SizeT size,
-                          const UWord value)
+                          const Addr semaphore, const UWord value)
 {
   tl_assert(semaphore != 0);
-  tl_assert(size > 0);
   tl_assert(p->a1 == semaphore);
-  tl_assert(p->a2 - p->a1 == size);
   tl_assert(p->type == ClientSemaphore);
 
   p->cleanup   = (void(*)(DrdClientobj*))semaphore_cleanup;
@@ -91,7 +87,7 @@
 
 static
 struct semaphore_info*
-semaphore_get_or_allocate(const Addr semaphore, const SizeT size)
+semaphore_get_or_allocate(const Addr semaphore)
 {
   struct semaphore_info *p;
 
@@ -100,9 +96,8 @@
   if (p == 0)
   {
     tl_assert(offsetof(DrdClientobj, semaphore) == 0);
-    p = &clientobj_add(semaphore, semaphore + size,
-                           ClientSemaphore)->semaphore;
-    semaphore_initialize(p, semaphore, size, 0);
+    p = &clientobj_add(semaphore, ClientSemaphore)->semaphore;
+    semaphore_initialize(p, semaphore, 0);
   }
   return p;
 }
@@ -114,7 +109,7 @@
 }
 
 /** Called before sem_init(). */
-struct semaphore_info* semaphore_init(const Addr semaphore, const SizeT size,
+struct semaphore_info* semaphore_init(const Addr semaphore,
                                       const Word pshared, const UWord value)
 {
   struct semaphore_info* p;
@@ -127,8 +122,11 @@
                  thread_get_running_tid(),
                  semaphore);
   }
-  tl_assert(semaphore_get(semaphore) == 0);
-  p = semaphore_get_or_allocate(semaphore, size);
+  if (semaphore_get(semaphore))
+  {
+    // To do: print an error message that a semaphore is being reinitialized.
+  }
+  p = semaphore_get_or_allocate(semaphore);
   p->value = value;
   return p;
 }
@@ -164,11 +162,11 @@
 }
 
 /** Called before sem_wait(). */
-void semaphore_pre_wait(const Addr semaphore, const SizeT size)
+void semaphore_pre_wait(const Addr semaphore)
 {
   struct semaphore_info* p;
 
-  p = semaphore_get_or_allocate(semaphore, size);
+  p = semaphore_get_or_allocate(semaphore);
   if (s_trace_semaphore)
   {
     VG_(message)(Vg_UserMsg,
@@ -223,8 +221,7 @@
 }
 
 /** Called before sem_post(). */
-void semaphore_pre_post(const DrdThreadId tid, const Addr semaphore,
-                        const SizeT size)
+void semaphore_pre_post(const DrdThreadId tid, const Addr semaphore)
 {
   struct semaphore_info* p;
 
@@ -236,7 +233,7 @@
                  thread_get_running_tid(),
                  semaphore);
   }
-  p = semaphore_get_or_allocate(semaphore, size);
+  p = semaphore_get_or_allocate(semaphore);
   p->value++;
   if (p->value == 1)
   {
@@ -248,7 +245,7 @@
 
 /** Called after sem_post() finished successfully. */
 void semaphore_post_post(const DrdThreadId tid, const Addr semaphore,
-                         const SizeT size, const Bool waited)
+                         const Bool waited)
 {
   /* Note: it is hard to implement the sem_post() wrapper correctly in     */
   /* case sem_post() returns an error code. This is because handling this  */
diff --git a/exp-drd/drd_semaphore.h b/exp-drd/drd_semaphore.h
index 0c151e8..c55dc9d 100644
--- a/exp-drd/drd_semaphore.h
+++ b/exp-drd/drd_semaphore.h
@@ -32,23 +32,22 @@
 
 #include "drd_thread.h"           // DrdThreadId
 #include "drd_vc.h"
-#include "pub_tool_basics.h"      // Addr, SizeT
+#include "pub_tool_basics.h"      // Addr
 
 
 struct semaphore_info;
 
 
 void semaphore_set_trace(const Bool trace_semaphore);
-struct semaphore_info* semaphore_init(const Addr semaphore, const SizeT size,
+struct semaphore_info* semaphore_init(const Addr semaphore,
                                       const Word pshared, const UWord value);
 void semaphore_destroy(const Addr semaphore);
-void semaphore_pre_wait(const Addr semaphore, const SizeT size);
+void semaphore_pre_wait(const Addr semaphore);
 void semaphore_post_wait(const DrdThreadId tid, const Addr semaphore,
                          const Bool waited);
-void semaphore_pre_post(const DrdThreadId tid, const Addr semaphore,
-                        const SizeT size);
+void semaphore_pre_post(const DrdThreadId tid, const Addr semaphore);
 void semaphore_post_post(const DrdThreadId tid, const Addr semaphore,
-                         const SizeT size, const Bool waited);
+                         const Bool waited);
 void semaphore_thread_delete(const DrdThreadId tid);
 
 
diff --git a/exp-drd/drd_thread.c b/exp-drd/drd_thread.c
index a47483a..c8ab570 100644
--- a/exp-drd/drd_thread.c
+++ b/exp-drd/drd_thread.c
@@ -63,6 +63,10 @@
    /// If true, indicates that there is a corresponding POSIX thread ID and
    /// a corresponding OS thread that is detached.
    Bool      detached_posix_thread;
+   /// Wether recording of memory accesses is active.
+   Bool      is_recording;
+   /// Nesting level of synchronization functions called by the client.
+   Int       synchr_nesting;
 } ThreadInfo;
 
 
@@ -154,6 +158,8 @@
          VG_(snprintf)(s_threadinfo[i].name, sizeof(s_threadinfo[i].name),
                        "thread %d", tid);
          s_threadinfo[i].name[sizeof(s_threadinfo[i].name) - 1] = 0;
+         s_threadinfo[i].is_recording  = True;
+         s_threadinfo[i].synchr_nesting = 0;
          if (s_threadinfo[i].first != 0)
             VG_(printf)("drd thread id = %d\n", i);
          tl_assert(s_threadinfo[i].first == 0);
@@ -330,6 +336,7 @@
 
    tl_assert(0 <= tid && tid < DRD_N_THREADS
              && tid != DRD_INVALID_THREADID);
+   tl_assert(s_threadinfo[tid].synchr_nesting == 0);
    for (sg = s_threadinfo[tid].last; sg; sg = sg_prev)
    {
       sg_prev = sg->prev;
@@ -471,6 +478,25 @@
    tl_assert(s_drd_running_tid != DRD_INVALID_THREADID);
 }
 
+int thread_enter_synchr(const DrdThreadId tid)
+{
+   tl_assert(IsValidDrdThreadId(tid));
+   return s_threadinfo[tid].synchr_nesting++;
+}
+
+int thread_leave_synchr(const DrdThreadId tid)
+{
+   tl_assert(IsValidDrdThreadId(tid));
+   tl_assert(s_threadinfo[tid].synchr_nesting >= 1);
+   return --s_threadinfo[tid].synchr_nesting;
+}
+
+int thread_get_synchr_nesting_count(const DrdThreadId tid)
+{
+   tl_assert(IsValidDrdThreadId(tid));
+   return s_threadinfo[tid].synchr_nesting;
+}
+
 /**
  * Return a pointer to the latest segment for the specified thread.
  */
@@ -720,6 +746,27 @@
    }
 }
 
+void thread_start_recording(const DrdThreadId tid)
+{
+   tl_assert(0 <= tid && tid < DRD_N_THREADS && tid != DRD_INVALID_THREADID);
+   tl_assert(! s_threadinfo[tid].is_recording);
+   s_threadinfo[tid].is_recording = True;
+}
+
+void thread_stop_recording(const DrdThreadId tid)
+{
+   tl_assert(0 <= tid && tid < DRD_N_THREADS && tid != DRD_INVALID_THREADID);
+   tl_assert(s_threadinfo[tid].is_recording);
+   s_threadinfo[tid].is_recording = False;
+}
+
+Bool thread_is_recording(const DrdThreadId tid)
+{
+   tl_assert(0 <= tid && tid < DRD_N_THREADS && tid != DRD_INVALID_THREADID);
+   return (s_threadinfo[tid].synchr_nesting == 0
+           && s_threadinfo[tid].is_recording);
+}
+
 void thread_print_all(void)
 {
    unsigned i;
diff --git a/exp-drd/drd_thread.h b/exp-drd/drd_thread.h
index 7e47ed9..b7b2995 100644
--- a/exp-drd/drd_thread.h
+++ b/exp-drd/drd_thread.h
@@ -75,12 +75,18 @@
 void thread_set_vg_running_tid(const ThreadId vg_tid);
 void thread_set_running_tid(const ThreadId vg_tid,
                             const DrdThreadId drd_tid);
+int thread_enter_synchr(const DrdThreadId tid);
+int thread_leave_synchr(const DrdThreadId tid);
+int thread_get_synchr_nesting_count(const DrdThreadId tid);
 Segment* thread_get_segment(const DrdThreadId tid);
 void thread_new_segment(const DrdThreadId tid);
 VectorClock* thread_get_vc(const DrdThreadId tid);
 void thread_combine_vc(const DrdThreadId joiner, const DrdThreadId joinee);
 void thread_combine_vc2(const DrdThreadId tid, const VectorClock* const vc);
 void thread_stop_using_mem(const Addr a1, const Addr a2);
+void thread_start_recording(const DrdThreadId tid);
+void thread_stop_recording(const DrdThreadId tid);
+Bool thread_is_recording(const DrdThreadId tid);
 void thread_print_all(void);
 void thread_report_races(const DrdThreadId tid);
 void thread_report_races_segment(const DrdThreadId tid,
diff --git a/exp-drd/drd_track.h b/exp-drd/drd_track.h
index 0b0045c..4cde2af 100644
--- a/exp-drd/drd_track.h
+++ b/exp-drd/drd_track.h
@@ -27,30 +27,30 @@
 
 void drd_trace_addr(const Addr addr);
 
-void drd_pre_mutex_init(Addr mutex, SizeT size, const MutexT mutex_type);
+void drd_pre_mutex_init(Addr mutex, const MutexT mutex_type);
 void drd_post_mutex_destroy(Addr mutex, const MutexT mutex_type);
-void drd_pre_mutex_lock(const Addr mutex, const SizeT size,
-                        const MutexT mutex_type);
+void drd_pre_mutex_lock(const Addr mutex, const MutexT mutex_type);
 void drd_post_mutex_lock(Addr mutex, const Bool took_lock);
 void drd_pre_mutex_unlock(const Addr mutex, const MutexT mutex_type);
 
-void drd_pre_cond_init(Addr cond, SizeT s);
+void drd_pre_cond_init(Addr cond);
 void drd_post_cond_destroy(Addr cond);
 
-void drd_semaphore_init(const Addr semaphore, const SizeT size,
+void drd_semaphore_init(const Addr semaphore,
                         const Word pshared, const Word value);
 void drd_semaphore_destroy(const Addr semaphore);
-void drd_semaphore_pre_wait(const DrdThreadId tid, const Addr semaphore,
-                            const SizeT size);
+void drd_semaphore_pre_wait(const DrdThreadId tid, const Addr semaphore);
 void drd_semaphore_post_wait(const DrdThreadId tid, const Addr semaphore,
                              const Bool waited);
-void drd_semaphore_pre_post(const DrdThreadId tid, const Addr semaphore,
-                            const SizeT size);
+void drd_semaphore_pre_post(const DrdThreadId tid, const Addr semaphore);
 void drd_semaphore_post_post(const DrdThreadId tid, const Addr semaphore,
-                             const SizeT size, const Bool waited);
+                             const Bool waited);
 
-void drd_barrier_init(const Addr barrier, const SizeT size, const Word count);
-void drd_barrier_destroy(const Addr barrier);
-void drd_barrier_pre_wait(const DrdThreadId tid, const Addr barrier);
+void drd_barrier_init(const Addr barrier,
+                      const BarrierT barrier_type, const Word count,
+                      const Bool reinitialization);
+void drd_barrier_destroy(const Addr barrier, const BarrierT barrier_type);
+void drd_barrier_pre_wait(const DrdThreadId tid, const Addr barrier,
+                          const BarrierT barrier_type);
 void drd_barrier_post_wait(const DrdThreadId tid, const Addr barrier,
-                           const Bool waited);
+                           const BarrierT barrier_type, const Bool waited);
diff --git a/exp-drd/pub_drd_bitmap.h b/exp-drd/pub_drd_bitmap.h
index f0afffc..b706657 100644
--- a/exp-drd/pub_drd_bitmap.h
+++ b/exp-drd/pub_drd_bitmap.h
@@ -57,9 +57,9 @@
 struct bitmap* bm_new(void);
 void bm_delete(struct bitmap* const bm);
 void bm_access_range(struct bitmap* const bm,
-		     const Addr address,
-		     const SizeT size,
-		     const BmAccessTypeT access_type);
+                     const Addr address,
+                     const SizeT size,
+                     const BmAccessTypeT access_type);
 void bm_access_4(struct bitmap* const bm,
                  const Addr address,
                  const BmAccessTypeT access_type);
diff --git a/exp-drd/tests/tc20_verifywrap2.stderr.exp b/exp-drd/tests/tc20_verifywrap2.stderr.exp
index a6f2970..b02f57c 100644
--- a/exp-drd/tests/tc20_verifywrap2.stderr.exp
+++ b/exp-drd/tests/tc20_verifywrap2.stderr.exp
@@ -29,25 +29,25 @@
 Destroying locked mutex: mutex 0x........, recursion count 1, owner 1.
    at 0x........: pthread_mutex_destroy (drd_pthread_intercepts.c:?)
    by 0x........: main (tc20_verifywrap.c:102)
-[1/1] pre_mutex_lock  (?) 0x........ rc 0 owner 0
+[1/1] pre_mutex_lock  invalid mutex 0x........ rc 0 owner 0
 
 Not a mutex
    at 0x........: pthread_mutex_lock (drd_pthread_intercepts.c:?)
    by 0x........: main (tc20_verifywrap.c:108)
-[1/1] post_mutex_lock (?) 0x........ rc 0 owner 0
-[1/1] pre_mutex_lock  (?) 0x........ rc 0 owner 0
+[1/1] post_mutex_lock invalid mutex 0x........ rc 0 owner 0
+[1/1] pre_mutex_lock  invalid mutex 0x........ rc 0 owner 0
 
 Not a mutex
    at 0x........: pthread_mutex_trylock (drd_pthread_intercepts.c:?)
    by 0x........: main (tc20_verifywrap.c:116)
-[1/1] post_mutex_lock (?) 0x........ rc 0 owner 0
-[1/1] pre_mutex_lock  (?) 0x........ rc 0 owner 0
+[1/1] post_mutex_lock invalid mutex 0x........ rc 0 owner 0
+[1/1] pre_mutex_lock  invalid mutex 0x........ rc 0 owner 0
 
 Not a mutex
    at 0x........: pthread_mutex_timedlock (drd_pthread_intercepts.c:?)
    by 0x........: main (tc20_verifywrap.c:121)
-[1/1] post_mutex_lock (?) 0x........ rc 0 owner 0
-[1/1] mutex_unlock    ? 0x........ rc 0
+[1/1] post_mutex_lock invalid mutex 0x........ rc 0 owner 0
+[1/1] mutex_unlock    invalid mutex 0x........ rc 0
 
 Not a mutex
    at 0x........: pthread_mutex_unlock (drd_pthread_intercepts.c:?)
@@ -112,6 +112,7 @@
 ---------------- sem_* ----------------
 
 [1/1] semaphore_init 0x........
+[1/1] semaphore_init 0x........
 
 FIXME: can't figure out how to verify wrap of sem_destroy
 
@@ -133,8 +134,8 @@
 Destroying locked rwlock: rwlock 0x.........
    at 0x........: main (tc20_verifywrap.c:262)
 [1/1] mutex_destroy   error checking mutex 0x........
-[1/1] pre_mutex_lock  (?) 0x........ rc 0 owner 0
-[1/1] mutex_init      recursive mutex 0x........
+[1/1] mutex_destroy   invalid mutex 0x........
+[1/1] pre_mutex_lock  recursive mutex 0x........ rc 0 owner 0
 [1/1] post_mutex_lock recursive mutex 0x........ rc 0 owner 0
 [1/1] mutex_unlock    recursive mutex 0x........ rc 1
 [1/1] pre_mutex_lock  recursive mutex 0x........ rc 0 owner 1
diff --git a/exp-drd/tests/tc20_verifywrap2.stderr.exp-glibc2.3 b/exp-drd/tests/tc20_verifywrap2.stderr.exp-glibc2.3
index 465c781..69771f2 100644
--- a/exp-drd/tests/tc20_verifywrap2.stderr.exp-glibc2.3
+++ b/exp-drd/tests/tc20_verifywrap2.stderr.exp-glibc2.3
@@ -32,19 +32,19 @@
 
 make pthread_mutex_lock fail: skipped on glibc < 2.4
 
-[1/1] pre_mutex_lock  (?) 0x........ rc 0 owner 0
+[1/1] pre_mutex_lock  invalid mutex 0x........ rc 0 owner 0
 
 Not a mutex
    at 0x........: pthread_mutex_trylock (drd_pthread_intercepts.c:?)
    by 0x........: main (tc20_verifywrap.c:116)
-[1/1] post_mutex_lock (?) 0x........ rc 0 owner 0
-[1/1] pre_mutex_lock  (?) 0x........ rc 0 owner 0
+[1/1] post_mutex_lock invalid mutex 0x........ rc 0 owner 0
+[1/1] pre_mutex_lock  invalid mutex 0x........ rc 0 owner 0
 
 Not a mutex
    at 0x........: pthread_mutex_timedlock (drd_pthread_intercepts.c:?)
    by 0x........: main (tc20_verifywrap.c:121)
-[1/1] post_mutex_lock (?) 0x........ rc 0 owner 0
-[1/1] mutex_unlock    ? 0x........ rc 0
+[1/1] post_mutex_lock invalid mutex 0x........ rc 0 owner 0
+[1/1] mutex_unlock    invalid mutex 0x........ rc 0
 
 Not a mutex
    at 0x........: pthread_mutex_unlock (drd_pthread_intercepts.c:?)
@@ -109,6 +109,7 @@
 ---------------- sem_* ----------------
 
 [1/1] semaphore_init 0x........
+[1/1] semaphore_init 0x........
 
 FIXME: can't figure out how to verify wrap of sem_destroy
 
@@ -130,8 +131,8 @@
 Destroying locked rwlock: rwlock 0x.........
    at 0x........: main (tc20_verifywrap.c:262)
 [1/1] mutex_destroy   error checking mutex 0x........
-[1/1] pre_mutex_lock  (?) 0x........ rc 0 owner 0
-[1/1] mutex_init      recursive mutex 0x........
+[1/1] mutex_destroy   invalid mutex 0x........
+[1/1] pre_mutex_lock  recursive mutex 0x........ rc 0 owner 0
 [1/1] post_mutex_lock recursive mutex 0x........ rc 0 owner 0
 [1/1] mutex_unlock    recursive mutex 0x........ rc 1
 [1/1] pre_mutex_lock  recursive mutex 0x........ rc 0 owner 1
diff --git a/exp-drd/tests/tc20_verifywrap2.stderr.exp-glibc2.3-b b/exp-drd/tests/tc20_verifywrap2.stderr.exp-glibc2.3-b
index 39f95f5..01a5dd6 100644
--- a/exp-drd/tests/tc20_verifywrap2.stderr.exp-glibc2.3-b
+++ b/exp-drd/tests/tc20_verifywrap2.stderr.exp-glibc2.3-b
@@ -32,19 +32,19 @@
 
 make pthread_mutex_lock fail: skipped on glibc < 2.4
 
-[1/1] pre_mutex_lock  (?) 0x........ rc 0 owner 0
+[1/1] pre_mutex_lock  invalid mutex 0x........ rc 0 owner 0
 
 Not a mutex
    at 0x........: pthread_mutex_trylock (drd_pthread_intercepts.c:?)
    by 0x........: main (tc20_verifywrap.c:116)
-[1/1] post_mutex_lock (?) 0x........ rc 0 owner 0
-[1/1] pre_mutex_lock  (?) 0x........ rc 0 owner 0
+[1/1] post_mutex_lock invalid mutex 0x........ rc 0 owner 0
+[1/1] pre_mutex_lock  invalid mutex 0x........ rc 0 owner 0
 
 Not a mutex
    at 0x........: pthread_mutex_timedlock (drd_pthread_intercepts.c:?)
    by 0x........: main (tc20_verifywrap.c:121)
-[1/1] post_mutex_lock (?) 0x........ rc 0 owner 0
-[1/1] mutex_unlock    ? 0x........ rc 0
+[1/1] post_mutex_lock invalid mutex 0x........ rc 0 owner 0
+[1/1] mutex_unlock    invalid mutex 0x........ rc 0
 
 Not a mutex
    at 0x........: pthread_mutex_unlock (drd_pthread_intercepts.c:?)
@@ -109,6 +109,7 @@
 ---------------- sem_* ----------------
 
 [1/1] semaphore_init 0x........
+[1/1] semaphore_init 0x........
 
 FIXME: can't figure out how to verify wrap of sem_destroy
 
@@ -130,8 +131,8 @@
 Destroying locked rwlock: rwlock 0x.........
    at 0x........: main (tc20_verifywrap.c:262)
 [1/1] mutex_destroy   error checking mutex 0x........
-[1/1] pre_mutex_lock  (?) 0x........ rc 0 owner 0
-[1/1] mutex_init      recursive mutex 0x........
+[1/1] mutex_destroy   invalid mutex 0x........
+[1/1] pre_mutex_lock  recursive mutex 0x........ rc 0 owner 0
 [1/1] post_mutex_lock recursive mutex 0x........ rc 0 owner 0
 [1/1] mutex_unlock    recursive mutex 0x........ rc 1