new futex-requeue-based pthread_cond_broadcast implementation

this avoids the "stampede effect" where pthread_cond_broadcast would
result in all waiters waking up simultaneously, only to immediately
contend for the mutex and go back to sleep.
diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h
index d123e5e..4c743d2 100644
--- a/src/internal/pthread_impl.h
+++ b/src/internal/pthread_impl.h
@@ -64,9 +64,12 @@
 #define _m_prev __u.__p[3]
 #define _m_next __u.__p[4]
 #define _m_count __u.__i[5]
-#define _c_block __u.__i[0]
-#define _c_clock __u.__i[1]
-#define _c_waiters __u.__i[2]
+#define _c_mutex __u.__p[0]
+#define _c_block __u.__i[2]
+#define _c_waiters __u.__i[3]
+#define _c_clock __u.__i[4]
+#define _c_bcast __u.__i[5]
+#define _c_leavers __u.__i[6]
 #define _rw_lock __u.__i[0]
 #define _rw_waiters __u.__i[1]
 #define _b_inst __u.__p[0]