Add option a new sim-hint  no-nptl-pthread-stackcache.

Activating this hint using --sim-hints=no-nptl-pthread-stackcache
means the glibc nptl stack cache will be disabled.

Disabling this stack/tls cache avoids helgrind false positive race conditions
errors when using __thread variables.

Note: disabling the stack cache is done by a kludge, dependent on
internal knowledge of glibc code, and using libpthread debug info.
So, this kludge might be broken with newer glibc version.
This has been tested on various platforms and various
glibc versions 2.11, 2.16 and 2.18

To check if the disabling works, you can do:
valgrind --tool=helgrind --sim-hints=no-nptl-pthread-stackcache -d -v ./helgrind/tests/tls_threads |& grep kludge

If you see the below 2 lines, then hopefully the stack cache has been disabled.
--12624-- deactivate nptl pthread stackcache via kludge: found symbol stack_cache_actsize at addr 0x3AF178
--12624:1:sched    pthread stack cache size disabling done via kludge




git-svn-id: svn://svn.valgrind.org/valgrind/trunk@14313 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/coregrind/m_clientstate.c b/coregrind/m_clientstate.c
index 718d3cd..b251f1b 100644
--- a/coregrind/m_clientstate.c
+++ b/coregrind/m_clientstate.c
@@ -107,6 +107,11 @@
    VG_(get_StackTrace) in m_stacktrace.c for further info. */
 Addr VG_(client__dl_sysinfo_int80) = 0;
 
+/* Address of the (internal) glibc nptl pthread stack cache size,
+   declared as:
+      static size_t stack_cache_actsize;
+   in nptl/allocatestack.c */
+SizeT* VG_(client__stack_cache_actsize__addr) = 0;
 
 /*--------------------------------------------------------------------*/
 /*--- end                                                          ---*/
diff --git a/coregrind/m_main.c b/coregrind/m_main.c
index 1a3374c..fc0c8c9 100644
--- a/coregrind/m_main.c
+++ b/coregrind/m_main.c
@@ -175,8 +175,8 @@
 "    --vgdb-prefix=<prefix>    prefix for vgdb FIFOs [%s]\n"
 "    --run-libc-freeres=no|yes free up glibc memory at exit on Linux? [yes]\n"
 "    --sim-hints=hint1,hint2,...  activate unusual sim behaviours [none] \n"
-"         where hint is one of no-inner-prefix lax-ioctls enable-outer\n"
-"                 fuse-compatible none\n"
+"         where hint is one of lax-ioctls fuse-compatible enable-outer\n"
+"           no-inner-prefix no-nptl-pthread-stackcache none\n"
 "    --fair-sched=no|yes|try   schedule threads fairly on multicore systems [no]\n"
 "    --kernel-variant=variant1,variant2,...  handle non-standard kernel"
                                                                " variants [none]\n"
@@ -380,7 +380,8 @@
       // as early as possible.
       else if VG_USETX_CLO (str, "--sim-hints",
                             "no-inner-prefix,fuse-compatible,"
-                            "lax-ioctls,enable-outer",
+                            "lax-ioctls,enable-outer,"
+                            "no-nptl-pthread-stackcache",
                             VG_(clo_sim_hints)) {}
    }
 }
diff --git a/coregrind/m_redir.c b/coregrind/m_redir.c
index 9e6a10c..1fb3df2 100644
--- a/coregrind/m_redir.c
+++ b/coregrind/m_redir.c
@@ -403,6 +403,10 @@
    Bool         check_ppcTOCs = False;
    Bool         isText;
    const HChar* newdi_soname;
+   Bool         dehacktivate_pthread_stack_cache_var_search = False;
+   const HChar* const pthread_soname = "libpthread.so.0";
+   const HChar* const pthread_stack_cache_actsize_varname
+      = "stack_cache_actsize";
 
 #  if defined(VG_PLAT_USES_PPCTOC)
    check_ppcTOCs = True;
@@ -497,6 +501,10 @@
 
    specList = NULL; /* the spec list we're building up */
 
+   dehacktivate_pthread_stack_cache_var_search = 
+      SimHintiS(SimHint_no_nptl_pthread_stackcache, VG_(clo_sim_hints))
+      && 0 == VG_(strcmp)(newdi_soname, pthread_soname);
+
    nsyms = VG_(DebugInfo_syms_howmany)( newdi );
    for (i = 0; i < nsyms; i++) {
       VG_(DebugInfo_syms_getidx)( newdi, i, &sym_avmas,
@@ -513,8 +521,22 @@
                                      demangled_fnpatt, N_DEMANGLED,
                                      &isWrap, &becTag, &becPrio );
          /* ignore data symbols */
-         if (!isText)
+         if (!isText) {
+            /* But search for dehacktivate stack cache var if needed. */
+            if (dehacktivate_pthread_stack_cache_var_search
+                && 0 == VG_(strcmp)(*names,
+                                    pthread_stack_cache_actsize_varname)) {
+               if ( VG_(clo_verbosity) > 1 ) {
+                  VG_(message)( Vg_DebugMsg,
+                                "deactivate nptl pthread stackcache via kludge:"
+                                " found symbol %s at addr %p\n",
+                                *names, (void*) sym_avmas.main); 
+               }
+               VG_(client__stack_cache_actsize__addr) = (SizeT*) sym_avmas.main;
+               dehacktivate_pthread_stack_cache_var_search = False;
+            }
             continue;
+         }
          if (!ok) {
             /* It's not a full-scale redirect, but perhaps it is a load-notify
                fn?  Let the load-notify department see it. */
@@ -589,6 +611,13 @@
       }
       free_symname_array(names_init, &twoslots[0]);
    }
+   if (dehacktivate_pthread_stack_cache_var_search) {
+      VG_(message)(Vg_DebugMsg,
+                   "WARNING: could not find symbol for var %s in %s\n",
+                   pthread_stack_cache_actsize_varname, pthread_soname);
+      VG_(message)(Vg_DebugMsg,
+                   "=> pthread stack cache cannot be disabled!\n");
+   }
 
    if (check_ppcTOCs) {
       for (i = 0; i < nsyms; i++) {
diff --git a/coregrind/m_scheduler/scheduler.c b/coregrind/m_scheduler/scheduler.c
index 64d45cb..3a85e52 100644
--- a/coregrind/m_scheduler/scheduler.c
+++ b/coregrind/m_scheduler/scheduler.c
@@ -61,14 +61,15 @@
 #include "pub_core_basics.h"
 #include "pub_core_debuglog.h"
 #include "pub_core_vki.h"
-#include "pub_core_vkiscnums.h"    // __NR_sched_yield
-#include "pub_core_libcsetjmp.h"   // to keep _threadstate.h happy
+#include "pub_core_vkiscnums.h"  // __NR_sched_yield
+#include "pub_core_libcsetjmp.h" // to keep _threadstate.h happy
 #include "pub_core_threadstate.h"
+#include "pub_core_clientstate.h"
 #include "pub_core_aspacemgr.h"
-#include "pub_core_clreq.h"         // for VG_USERREQ__*
+#include "pub_core_clreq.h"      // for VG_USERREQ__*
 #include "pub_core_dispatch.h"
-#include "pub_core_errormgr.h"      // For VG_(get_n_errs_found)()
-#include "pub_core_gdbserver.h"     // for VG_(gdbserver) and VG_(gdbserver_activity)
+#include "pub_core_errormgr.h"   // For VG_(get_n_errs_found)()
+#include "pub_core_gdbserver.h"  // for VG_(gdbserver)/VG_(gdbserver_activity)
 #include "pub_core_libcbase.h"
 #include "pub_core_libcassert.h"
 #include "pub_core_libcprint.h"
@@ -1579,6 +1580,30 @@
    if (VG_(clo_trace_sched))
       print_sched_event(tid, "exiting VG_(scheduler)");
 
+   if (SimHintiS(SimHint_no_nptl_pthread_stackcache, VG_(clo_sim_hints))) {
+      if (VG_(client__stack_cache_actsize__addr)) {
+         if (*VG_(client__stack_cache_actsize__addr) == 0) {
+            /* We disable the stack cache just before the first
+               thread exits. At this moment, we are sure the pthread lib
+               loading is done/variable was initialised by
+               pthread lib/... */
+            VG_(debugLog)(1,"sched",
+                          "pthread stack cache size disabling done"
+                          " via kludge\n");
+            *VG_(client__stack_cache_actsize__addr) = 1000 * 1000 * 1000;
+            /* Set a value big enough to be above the hardcoded maximum stack
+               cache size in glibc, small enough to allow a pthread stack size
+               to be added without risk of overflow. */
+         }
+      } else {
+          VG_(debugLog)(0,"sched",
+                        "WARNING: pthread stack cache cannot be disabled!\n");
+          VG_(clo_sim_hints) &= !SimHint2S(SimHint_no_nptl_pthread_stackcache);
+          /* Remove SimHint_no_nptl_pthread_stackcache from VG_(clo_sim_hints)
+             to avoid having a msg for at all thread terminations. */
+      }
+   }
+
    vg_assert(VG_(is_exiting)(tid));
 
    return tst->exitreason;
diff --git a/coregrind/pub_core_clientstate.h b/coregrind/pub_core_clientstate.h
index fe95b82..64ec161 100644
--- a/coregrind/pub_core_clientstate.h
+++ b/coregrind/pub_core_clientstate.h
@@ -93,6 +93,24 @@
 extern Addr VG_(client__dl_sysinfo_int80);
 
 
+/* glibc nptl pthread systems only, when no-nptl-pthread-stackcache
+   was given in --sim-hints.
+   Used for a (kludgy) way to disable the cache of stacks as implemented in
+   nptl glibc. 
+   Based on internal knowledge of the pthread glibc nptl/allocatestack.c code:
+   a huge value in stack_cache_actsize (bigger than the constant
+   stack_cache_maxsize) makes glibc believes the cache is full
+   and so stacks are always released when a pthread terminates.
+   Several ugliness in this kludge:
+    * hardcodes private glibc var name "stack_cache_maxsize"
+    * based on knowledge of the code of the functions
+      queue_stack and __free_stacks
+    * static symbol for "stack_cache_maxsize" must be in
+      the debug info.
+   It would be much cleaner to have a documented and supported
+   way to disable the pthread stack cache. */
+extern SizeT* VG_(client__stack_cache_actsize__addr);
+
 #endif   // __PUB_CORE_CLIENTSTATE_H
 
 /*--------------------------------------------------------------------*/
diff --git a/coregrind/pub_core_options.h b/coregrind/pub_core_options.h
index 1c1d285..9bf2d04 100644
--- a/coregrind/pub_core_options.h
+++ b/coregrind/pub_core_options.h
@@ -223,10 +223,11 @@
 /* Engage miscellaneous weird hacks needed for some progs. */
 typedef
    enum {
-      SimHint_no_inner_prefix,
-      SimHint_fuse_compatible,
       SimHint_lax_ioctls,
-      SimHint_enable_outer
+      SimHint_fuse_compatible,
+      SimHint_enable_outer,
+      SimHint_no_inner_prefix,
+      SimHint_no_nptl_pthread_stackcache
    }
    SimHint;
 
diff --git a/docs/xml/manual-core.xml b/docs/xml/manual-core.xml
index 4d60131..949874b 100644
--- a/docs/xml/manual-core.xml
+++ b/docs/xml/manual-core.xml
@@ -1956,6 +1956,7 @@
       the simulated behaviour in nonstandard or dangerous ways, possibly
       to help the simulation of strange features.  By default no hints
       are enabled.  Use with caution!  Currently known hints are:</para>
+
       <itemizedlist>
         <listitem>
           <para><option>lax-ioctls: </option> Be very lax about ioctl
@@ -1965,11 +1966,23 @@
           large number of strange ioctl commands becomes very
           tiresome.</para>
         </listitem>
+
+        <listitem>
+          <para><option>fuse-compatible: </option> Enable special
+            handling for certain system calls that may block in a FUSE
+            file-system.  This may be necessary when running Valgrind
+            on a multi-threaded program that uses one thread to manage
+            a FUSE file-system and another thread to access that
+            file-system.
+          </para>
+        </listitem>
+
         <listitem>
           <para><option>enable-outer: </option> Enable some special
           magic needed when the program being run is itself
           Valgrind.</para>
         </listitem>
+
         <listitem>
           <para><option>no-inner-prefix: </option> Disable printing
           a prefix <option>&gt;</option> in front of each stdout or
@@ -1980,13 +1993,39 @@
           front of the inner debug logging lines.</para>
         </listitem>
         <listitem>
-          <para><option>fuse-compatible: </option> Enable special
-            handling for certain system calls that may block in a FUSE
-            file-system.  This may be necessary when running Valgrind
-            on a multi-threaded program that uses one thread to manage
-            a FUSE file-system and another thread to access that
-            file-system.
-          </para>
+          <para><option>no-nptl-pthread-stackcache: </option>
+            This hint is only relevant when running Valgrind on Linux.</para>
+
+          <para>The GNU glibc pthread library
+            (<function>libpthread.so</function>), which is used by
+            pthread programs, maintains a cache of pthread stacks.
+            When a pthread terminates, the memory used for the pthread
+            stack and some thread local storage related data structure
+            are not always directly released.  This memory is kept in
+            a cache (up to a certain size), and is re-used if a new
+            thread is started.</para>
+
+          <para>This cache causes the helgrind tool to report some
+            false positive race condition errors on this cached
+            memory, as helgrind does not understand the internal glibc
+            cache synchronisation primitives. So, when using helgrind,
+            disabling the cache helps to avoid false positive race
+            conditions, in particular when using thread local storage
+            variables (e.g. variables using the
+            <function>__thread</function> qualifier).</para>
+
+          <para>When using the memcheck tool, disabling the cache
+            ensures the memory used by glibc to handle __thread
+            variables is directly released when a thread
+            terminates.</para>
+
+          <para>Note: Valgrind disables the cache using some internal
+            knowledge of the glibc stack cache implementation and by
+            examining the debug information of the pthread
+            library. This technique is thus somewhat fragile and might
+            not work for all glibc versions. This has been succesfully
+            tested with various glibc versions (e.g. 2.11, 2.16, 2.18)
+            on various platforms.</para>
         </listitem>
       </itemizedlist>
     </listitem>
diff --git a/helgrind/docs/hg-manual.xml b/helgrind/docs/hg-manual.xml
index 21328b4..bc9f49a 100644
--- a/helgrind/docs/hg-manual.xml
+++ b/helgrind/docs/hg-manual.xml
@@ -972,6 +972,16 @@
   </listitem>
 
   <listitem>
+    <para>If your application is using thread local variables,
+    helgrind might report false positive race conditions on these
+    variables, despite being very probably race free.  On Linux, you can
+    use <option>--sim-hints=deactivate-pthread-stack-cache-via-hack</option>
+    to avoid such false positive error messages
+    (see <xref linkend="opt.sim-hints"/>).
+    </para>
+  </listitem>
+
+  <listitem>
     <para>Round up all finished threads using
     <function>pthread_join</function>.  Avoid
     detaching threads: don't create threads in the detached state, and
diff --git a/helgrind/tests/Makefile.am b/helgrind/tests/Makefile.am
index 79dc945..1a8b1f8 100644
--- a/helgrind/tests/Makefile.am
+++ b/helgrind/tests/Makefile.am
@@ -102,7 +102,9 @@
 		tc23_bogus_condwait.stderr.exp \
 		tc23_bogus_condwait.stderr.exp-mips32 \
 	tc24_nonzero_sem.vgtest tc24_nonzero_sem.stdout.exp \
-		tc24_nonzero_sem.stderr.exp
+		tc24_nonzero_sem.stderr.exp \
+	tls_threads.vgtest tls_threads.stdout.exp \
+		tls_threads.stderr.exp
 
 # XXX: tc18_semabuse uses operations that are unsupported on Darwin.  It
 # should be conditionally compiled like tc20_verifywrap is.
@@ -144,7 +146,8 @@
 	tc19_shadowmem \
 	tc21_pthonce \
 	tc23_bogus_condwait \
-	tc24_nonzero_sem
+	tc24_nonzero_sem \
+	tls_threads
 
 # DDD: it seg faults, and then the Valgrind exit path hangs
 # JRS 29 July 09: it craps out in the stack unwinder, in
diff --git a/helgrind/tests/tls_threads.c b/helgrind/tests/tls_threads.c
new file mode 100644
index 0000000..def8567
--- /dev/null
+++ b/helgrind/tests/tls_threads.c
@@ -0,0 +1,109 @@
+#include <config.h>
+#include <pthread.h>
+#include <stdio.h>
+
+#ifdef HAVE_TLS
+
+static int only_touch_stackvar;
+
+/* We should have no error on local and global
+   as these are both thread local variables. */
+static __thread int local;
+__thread int global;
+
+/* We will wrongly share this variable indirectly  through a pointer
+   We should have an error for this. */
+static __thread int badly_shared_local;
+
+/* ptr_to_badly_shared_local allows to have multiple threads seeing
+   the same thread local storage. This is however really bad sharing
+   as this can cause SEGV or whatever, as when the thread disappears,
+   the badly_shared_local var pointed to can also disappear.
+   By default, the regtest does not test this really bad sharing. */
+pthread_mutex_t protect_ptr_to_badly_shared_local = PTHREAD_MUTEX_INITIALIZER;
+int *ptr_to_badly_shared_local;
+
+static void local_false_positive(void)
+{
+   local = local + 1; // no error is expected
+}
+
+static void global_false_positive(void)
+{
+   global = global + 1; // no error is expected
+}
+
+static void badly_shared_local_error_expected(void)
+{
+   *ptr_to_badly_shared_local = *ptr_to_badly_shared_local + 1; // an error is expected
+   // This can cause a SIGSEGV.
+}
+
+static void *level2(void *p)
+{
+   int stackvar = 0;
+
+   stackvar = stackvar + only_touch_stackvar;
+   
+   local_false_positive();
+   global_false_positive();
+   if (only_touch_stackvar != 0) {
+      badly_shared_local_error_expected();
+   }
+   return 0;
+}
+
+#define NLEVEL2 10
+static void *level1(void *p)
+{
+   pthread_t threads[NLEVEL2];
+   int curthread = 0;
+   int i;
+
+   pthread_mutex_lock(&protect_ptr_to_badly_shared_local);
+   if (ptr_to_badly_shared_local == NULL)
+      ptr_to_badly_shared_local = &badly_shared_local;
+   pthread_mutex_unlock(&protect_ptr_to_badly_shared_local);
+
+   for(i = 0; i < NLEVEL2; i++) {
+      pthread_create(&threads[curthread++], NULL, level2, NULL);
+   }
+
+   for(i = 0; i < curthread; i++)
+      pthread_join(threads[i], NULL);
+
+   return 0;
+}
+
+#define NLEVEL1 3
+int main(int argc, char*argv[])
+{
+   pthread_t threads[NLEVEL1];
+   int curthread = 0;
+   int i;
+
+   only_touch_stackvar = argc > 1;
+
+   for(i = 0; i < NLEVEL1; i++) {
+      pthread_create(&threads[curthread++], NULL, level1, NULL);
+   }
+
+   fprintf(stderr, "starting join in main\n");
+   fflush(stderr);
+   for(i = 0; i < curthread; i++)
+      pthread_join(threads[i], NULL);
+   fprintf(stderr, "finished join in main\n");
+   fflush(stderr);
+   return 0;
+}
+#else
+int main()
+{
+   fprintf(stderr, "starting join in main\n");
+   fflush(stderr);
+   /* do nothing */
+   fprintf(stderr, "finished join in main\n");
+   fflush(stderr);
+   return 0;
+}
+#endif
diff --git a/helgrind/tests/tls_threads.stderr.exp b/helgrind/tests/tls_threads.stderr.exp
new file mode 100644
index 0000000..be3b890
--- /dev/null
+++ b/helgrind/tests/tls_threads.stderr.exp
@@ -0,0 +1,2 @@
+starting join in main
+finished join in main
diff --git a/helgrind/tests/tls_threads.stdout.exp b/helgrind/tests/tls_threads.stdout.exp
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/helgrind/tests/tls_threads.stdout.exp
diff --git a/helgrind/tests/tls_threads.vgtest b/helgrind/tests/tls_threads.vgtest
new file mode 100644
index 0000000..5fb4587
--- /dev/null
+++ b/helgrind/tests/tls_threads.vgtest
@@ -0,0 +1,2 @@
+prog: tls_threads
+vgopts: -q --sim-hints=no-nptl-pthread-stackcache
diff --git a/none/tests/cmdline1.stdout.exp b/none/tests/cmdline1.stdout.exp
index 122d7e5..02a0fe3 100644
--- a/none/tests/cmdline1.stdout.exp
+++ b/none/tests/cmdline1.stdout.exp
@@ -88,8 +88,8 @@
     --vgdb-prefix=<prefix>    prefix for vgdb FIFOs [/tmp/vgdb-pipe]
     --run-libc-freeres=no|yes free up glibc memory at exit on Linux? [yes]
     --sim-hints=hint1,hint2,...  activate unusual sim behaviours [none] 
-         where hint is one of no-inner-prefix lax-ioctls enable-outer
-                 fuse-compatible none
+         where hint is one of lax-ioctls fuse-compatible enable-outer
+           no-inner-prefix no-nptl-pthread-stackcache none
     --fair-sched=no|yes|try   schedule threads fairly on multicore systems [no]
     --kernel-variant=variant1,variant2,...  handle non-standard kernel variants [none]
          where variant is one of bproc none
diff --git a/none/tests/cmdline2.stdout.exp b/none/tests/cmdline2.stdout.exp
index a26abaf..7f4277c 100644
--- a/none/tests/cmdline2.stdout.exp
+++ b/none/tests/cmdline2.stdout.exp
@@ -88,8 +88,8 @@
     --vgdb-prefix=<prefix>    prefix for vgdb FIFOs [/tmp/vgdb-pipe]
     --run-libc-freeres=no|yes free up glibc memory at exit on Linux? [yes]
     --sim-hints=hint1,hint2,...  activate unusual sim behaviours [none] 
-         where hint is one of no-inner-prefix lax-ioctls enable-outer
-                 fuse-compatible none
+         where hint is one of lax-ioctls fuse-compatible enable-outer
+           no-inner-prefix no-nptl-pthread-stackcache none
     --fair-sched=no|yes|try   schedule threads fairly on multicore systems [no]
     --kernel-variant=variant1,variant2,...  handle non-standard kernel variants [none]
          where variant is one of bproc none