fix 338160: Implement QGetTlsAddr query so that GDB+V gdbsrv can print __thread variables.

To implement QGetTlsAddr, gdbsrv has to know how to get the glibc dtv
address and the module id from the link_map.
These 2 things are dependent on the internals of glibc.
The dependency is mostly isolated in a few lines of arch dependent
code or in an external utility that used a hack + -ldl lib to find
the offset of the modid in the link_map structure.

Tested on x86/amd64/ppc64/s390x. Somewhat tested on ppc32 and arm64.
Untested/a few #ifdef-ed lines not compiled on arm/mips32/mips64
and darwin.

For more background info about thread local storage handling, see
'ELF Handling For Thread-Local Storage' http://www.akkadia.org/drepper/tls.pdf

Changes:
* auxprogs/getoff.c new auxilliary program to get platform specific offsets
  (currently only the offset for the module id in struct link_map).
* configure.ac : check for dlinfo(RTLD_DI_TLS_MODID) needed for getoff.c
* new gdbserver_tests/hgtls, testing various types of __thread variables
* various m_gdbserver files:
  - implement decoding of the QGetTlsAddr query
  - for each platform: platform specific code to get the dtv
  - call to external program getoff-<platform> the first time an
    __thread variable is printed.



git-svn-id: svn://svn.valgrind.org/valgrind/trunk@14283 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/NEWS b/NEWS
index a6f4b6a..60692f8 100644
--- a/NEWS
+++ b/NEWS
@@ -55,6 +55,8 @@
 
 * New and modified GDB server monitor features:
 
+  - thread local variables/storage (__thread) can now be displayed.
+
   - The GDB server monitor command 'v.info location <address>'
     outputs information about an address. The information produced depends
     on the tool and on the options given to valgrind.
@@ -204,6 +206,7 @@
 337528  leak check heuristic for block prefixed by length as 64bit number
 338024  inlined functions are not shown if DW_AT_ranges is used
 338115  DRD: computed conflict set differs from actual after fork
+338160  implement display of thread local storage in gdbsrv
 n-i-bz  Fix KVM_CREATE_IRQCHIP ioctl handling
 n-i-bz  s390x: Fix memory corruption for multithreaded applications
 n-i-bz  vex arm->IR: allow PC as basereg in some LDRD cases
diff --git a/auxprogs/Makefile.am b/auxprogs/Makefile.am
index 7cb0308..2195338 100644
--- a/auxprogs/Makefile.am
+++ b/auxprogs/Makefile.am
@@ -51,3 +51,44 @@
 if VGCONF_PLATFORMS_INCLUDE_X86_DARWIN
 valgrind_di_server_LDFLAGS   += -Wl,-read_only_relocs -Wl,suppress
 endif
+
+#----------------------------------------------------------------------------
+# getoff-<platform>
+# Used to retrieve user space various offsets, using user space libraries.
+#----------------------------------------------------------------------------
+noinst_PROGRAMS  = getoff-@VGCONF_ARCH_PRI@-@VGCONF_OS@
+if VGCONF_HAVE_PLATFORM_SEC
+noinst_PROGRAMS += getoff-@VGCONF_ARCH_SEC@-@VGCONF_OS@
+endif
+
+getoff_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES   = getoff.c
+getoff_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS  = $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
+getoff_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS    = $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
+getoff_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CCASFLAGS = $(AM_CCASFLAGS_PRI)
+getoff_@VGCONF_ARCH_PRI@_@VGCONF_OS@_LDFLAGS   = $(AM_CFLAGS_PRI)
+if HAVE_DLINFO_RTLD_DI_TLS_MODID
+getoff_@VGCONF_ARCH_PRI@_@VGCONF_OS@_LDADD = $(LDADD) -ldl
+endif
+
+if VGCONF_HAVE_PLATFORM_SEC
+getoff_@VGCONF_ARCH_SEC@_@VGCONF_OS@_SOURCES   = getoff.c
+getoff_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CPPFLAGS  = $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
+getoff_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS    = $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
+getoff_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CCASFLAGS = $(AM_CCASFLAGS_SEC)
+getoff_@VGCONF_ARCH_SEC@_@VGCONF_OS@_LDFLAGS   = $(AM_CFLAGS_SEC)
+if HAVE_DLINFO_RTLD_DI_TLS_MODID
+getoff_@VGCONF_ARCH_SEC@_@VGCONF_OS@_LDADD = $(LDADD) -ldl
+endif
+endif
+
+#----------------------------------------------------------------------------
+# General stuff
+#----------------------------------------------------------------------------
+
+all-local: inplace-noinst_PROGRAMS inplace-noinst_DSYMS
+
+clean-local: clean-noinst_DSYMS
+
+install-exec-local: install-noinst_PROGRAMS install-noinst_DSYMS
+
+uninstall-local: uninstall-noinst_PROGRAMS uninstall-noinst_DSYMS
diff --git a/auxprogs/getoff.c b/auxprogs/getoff.c
new file mode 100644
index 0000000..36f5751
--- /dev/null
+++ b/auxprogs/getoff.c
@@ -0,0 +1,139 @@
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <config.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <link.h>
+#include <dlfcn.h>
+
+/* true if arg matches the provided option */
+static
+int is_opt(char* arg, const char *option)
+{
+   int option_len = strlen(option);
+   if (option[option_len-1] == '=')
+      return (0 == strncmp(option, arg, option_len));
+   else
+      return (0 == strcmp(option, arg));
+}
+
+static int verbose = 0;
+
+static
+void usage (char* progname)
+{
+   fprintf(stderr,
+"Usage: %s [--help] [-h] [-v] [-o <outputfile>]\n"
+"Outputs various user space offsets\n"
+"By default, outputs on stdout.\n"
+"Use -o to output to <outputfile>\n"
+"-v : be more verbose\n",
+progname);
+
+}
+/* Currently, only computes and output lm_modid_offset in struct link_map
+   of the dynamic linker. In theory, we should also compute the offset needed
+   to get the dtv from the thread register/pointer/...
+   Currently, the various valgrind-low-xxxxxx.c files are hardcoding this
+   offset as it is deemed (?) stable, and there is no clear way how to
+   compute this dtv offset.
+*/
+int main (int argc, char** argv)
+{
+   int i;
+   FILE *outputfile;
+   int nr_errors = 0;
+   
+   outputfile = stdout;
+
+   i = 1;
+   while (i < argc) {
+      if (is_opt(argv[i], "--help") || is_opt(argv[i], "-h")) {
+         usage(argv[0]);
+         exit(0);
+      } else if (is_opt(argv[i], "-v")) {
+         verbose++;
+      } else if (is_opt(argv[i], "-o")) {
+         if (i+1 == argc) {
+            fprintf(stderr, 
+                    "missing output file for -o option\n"
+                    "Use --help for more information.\n");
+            exit (1);
+         }
+         i++;
+         outputfile = fopen(argv[i], "w");
+         if (outputfile == NULL) {
+            fprintf(stderr, "Could not fopen %s in write mode\n", argv[i]);
+            perror ("fopen output file failed");
+            exit (1);
+         }
+      } else {
+         fprintf (stderr, 
+                  "unknown or invalid argument %s\n"
+                  "Use --help for more information.\n",
+                  argv[i]);
+         exit(1);
+      }
+      i++;
+   }
+
+#ifdef HAVE_DLINFO_RTLD_DI_TLS_MODID
+   /* Compute offset of lm_modid in struct link_map.
+      This is needed to support QGetTlsAddr gdbsrv query.
+      Computation is done using an ugly hack, but less ugly than
+      hardcoding the offset depending on the glibc version and
+      platform.
+      The below works, based the assumption that RTLD_DI_TLS_MODID
+      just access and returns directly the field in the dummy
+      link_map structure we have prepared.
+
+      If glibc debug info is installed on your system, you can
+      also find this offset by doing in GDB:
+          p &((struct link_map*)0x0)->l_tls_modid
+      (see also coregrind/m_gdbserver/valgrind_low.h target_get_dtv
+       comments).
+   */
+   {
+      #define MAX_LINKMAP_WORDS 10000
+      size_t dummy_link_map[MAX_LINKMAP_WORDS];
+      size_t off;
+      size_t modid_offset;
+      for (off = 0; off < MAX_LINKMAP_WORDS; off++)
+         dummy_link_map[off] = off;
+      if (dlinfo ((void*)dummy_link_map, RTLD_DI_TLS_MODID, 
+                  &modid_offset) == 0) {
+         assert(modid_offset >= 0 && modid_offset < MAX_LINKMAP_WORDS);
+         fprintf(outputfile,
+                 "lm_modid_offset 0x%x\n", modid_offset*sizeof(size_t));
+      } else {
+         fprintf(stderr, 
+                 "Error computing lm_modid_offset.\n"
+                 "dlinfo error %s\n", dlerror());
+         nr_errors++;
+      }
+      #undef MAX_LINKMAP_WORDS
+   }
+   
+   if (outputfile != stdout)
+      if (fclose (outputfile) != 0) {
+         perror ("fclose output file failed\n");
+         nr_errors++;
+      }
+#else
+   if (verbose)
+      fprintf(stderr, 
+              "cannot compute lm_modid_offset.\n"
+              "configure did not define HAVE_DLINFO_RTLD_DI_TLS_MODID.\n");
+#endif
+
+   if (nr_errors == 0)
+      exit(0);
+   else
+      exit(1);
+}
diff --git a/configure.ac b/configure.ac
index 949a989..21b7782 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1449,6 +1449,37 @@
 	       test x$ac_have_pthread_create_glibc_2_0 = xyes)
 
 
+# Check for dlinfo RTLD_DI_TLS_MODID
+AC_MSG_CHECKING([for dlinfo RTLD_DI_TLS_MODID])
+
+safe_LIBS="$LIBS"
+LIBS="-ldl"
+AC_LINK_IFELSE([AC_LANG_PROGRAM([[
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <link.h>
+#include <dlfcn.h>
+]], [[
+  size_t sizes[10000];
+  size_t modid_offset;
+  (void) dlinfo ((void*)sizes, RTLD_DI_TLS_MODID, &modid_offset);
+  return 0;
+]])], [
+ac_have_dlinfo_rtld_di_tls_modid=yes
+AC_MSG_RESULT([yes])
+AC_DEFINE([HAVE_DLINFO_RTLD_DI_TLS_MODID], 1,
+          [Define to 1 if you have a dlinfo that can do RTLD_DI_TLS_MODID.])
+], [
+ac_have_dlinfo_rtld_di_tls_modid=no
+AC_MSG_RESULT([no])
+])
+LIBS=$safe_LIBS
+
+AM_CONDITIONAL(HAVE_DLINFO_RTLD_DI_TLS_MODID,
+	       test x$ac_have_dlinfo_rtld_di_tls_modid = xyes)
+
+
 # Check for eventfd_t, eventfd() and eventfd_read()
 AC_MSG_CHECKING([for eventfd()])
 
diff --git a/coregrind/m_gdbserver/server.c b/coregrind/m_gdbserver/server.c
index 5346401..d8d3a19 100644
--- a/coregrind/m_gdbserver/server.c
+++ b/coregrind/m_gdbserver/server.c
@@ -649,6 +649,45 @@
 {
    static struct inferior_list_entry *thread_ptr;
 
+   /* thread local storage query */
+   if (strncmp ("qGetTLSAddr:", arg_own_buf, 12) == 0) {
+      char *from, *to;
+      char *end = arg_own_buf + strlen(arg_own_buf);
+      unsigned long gdb_id;
+      CORE_ADDR lm;
+      CORE_ADDR offset;
+      struct thread_info *ti;
+      
+      from = arg_own_buf + 12;
+      to = strchr(from, ',');
+      *to = 0;
+      gdb_id = strtoul (from, NULL, 16);
+      from = to + 1;
+      to = strchr(from, ',');
+      decode_address (&offset, from, to - from);
+      from = to + 1;
+      to = end;
+      decode_address (&lm, from, to - from);
+      dlog(2, "qGetTLSAddr thread %lu offset %p lm %p\n", 
+           gdb_id, (void*)offset, (void*)lm);
+
+      ti = gdb_id_to_thread (gdb_id);
+      if (ti != NULL) {
+         ThreadState *tst;
+         Addr tls_addr;
+
+         tst = (ThreadState *) inferior_target_data (ti);
+         if (valgrind_get_tls_addr(tst, offset, lm, &tls_addr)) {
+            VG_(sprintf) (arg_own_buf, "%lx", tls_addr);
+            return;
+         }
+         // else we will report we do not support qGetTLSAddr
+      } else {
+         write_enn (arg_own_buf);
+         return;
+      }
+   }
+   
    /* qRcmd, monitor command handling.  */
    if (strncmp ("qRcmd,", arg_own_buf, 6) == 0) {
       char *p = arg_own_buf + 6;
@@ -706,7 +745,7 @@
          return;
       }
    }
-   
+
    if (strcmp ("qAttached", arg_own_buf) == 0) {
       /* tell gdb to always detach, never kill the process */
       arg_own_buf[0] = '1';
diff --git a/coregrind/m_gdbserver/target.c b/coregrind/m_gdbserver/target.c
index 2fde954..73dcf78 100644
--- a/coregrind/m_gdbserver/target.c
+++ b/coregrind/m_gdbserver/target.c
@@ -547,6 +547,144 @@
    return valgrind_point (/* insert*/ False, type, addr, len);
 }
 
+/* Returns the (platform specific) offset of lm_modid field in the link map
+   struct.
+   Stores the offset in *result and returns True if offset can be determined.
+   Returns False otherwise. *result is not to be used then. */
+static Bool getplatformoffset (SizeT *result)
+{
+   static Bool getplatformoffset_called = False;
+
+   static Bool lm_modid_offset_found = False;
+   static SizeT lm_modid_offset = 1<<31; // Rubbish initial value.
+   // lm_modid_offset is a magic offset, retrieved using an external program.
+
+   if (!getplatformoffset_called) {
+      const HChar *platform = VG_PLATFORM;
+      const HChar *cmdformat = "%s/%s-%s -o %s";
+      const HChar *getoff = "getoff";
+      HChar outfile[VG_(mkstemp_fullname_bufsz) (VG_(strlen)(getoff))];
+      Int fd = VG_(mkstemp) (getoff, outfile);
+      HChar cmd[ VG_(strlen)(cmdformat)
+                 + VG_(strlen)(VG_(libdir)) - 2
+                 + VG_(strlen)(getoff)      - 2
+                 + VG_(strlen)(platform)    - 2
+                 + VG_(strlen)(outfile)     - 2
+                 + 1];
+      UInt cmdlen;
+      struct vg_stat stat_buf;
+      Int ret;
+
+      cmdlen = VG_(snprintf)(cmd, sizeof(cmd),
+                             cmdformat, 
+                             VG_(libdir), getoff, platform, outfile);
+      vg_assert (cmdlen == sizeof(cmd) - 1);
+      ret = VG_(system) (cmd);
+      if (ret != 0 || VG_(debugLog_getLevel)() >= 1)
+         VG_(dmsg) ("command %s exit code %d\n", cmd, ret);
+      ret = VG_(fstat)( fd, &stat_buf );
+      if (ret != 0)
+         VG_(dmsg) ("error VG_(fstat) %d %s\n", fd, outfile);
+      else {
+         HChar *w;
+         HChar *ssaveptr;
+         HChar *os;
+         HChar *str;
+         HChar *endptr;
+
+         os = malloc (stat_buf.size+1);
+         vg_assert (os);
+         ret = VG_(read)(fd, os, stat_buf.size);
+         vg_assert(ret == stat_buf.size);
+         os[ret] = '\0';
+         str = os;
+         while ((w = VG_(strtok_r)(str, " \n", &ssaveptr)) != NULL) {
+            if (VG_(strcmp) (w, "lm_modid_offset") == 0) {
+               w = VG_(strtok_r)(NULL, " \n", &ssaveptr);
+               lm_modid_offset = (SizeT) VG_(strtoull16) ( w, &endptr );
+               if (endptr == w)
+                  VG_(dmsg) ("%s lm_modid_offset unexpected hex value %s\n",
+                             cmd, w);
+               else
+                  lm_modid_offset_found = True;
+            } else {
+               VG_(dmsg) ("%s produced unexpected %s\n", cmd, w);
+            }
+            str = NULL; // ensure next  VG_(strtok_r) continues the parsing.
+         }
+         VG_(free) (os);
+      }
+
+      VG_(close)(fd);
+      ret = VG_(unlink)( outfile );
+      if (ret != 0)
+         VG_(umsg) ("error: could not unlink %s\n", outfile);
+      getplatformoffset_called = True;
+   }
+
+   *result = lm_modid_offset;
+   return lm_modid_offset_found;
+}
+
+Bool valgrind_get_tls_addr (ThreadState *tst,
+                            CORE_ADDR offset,
+                            CORE_ADDR lm,
+                            CORE_ADDR *tls_addr)
+{
+   CORE_ADDR **dtv_loc;
+   CORE_ADDR *dtv;
+   SizeT lm_modid_offset;
+   unsigned long int modid;
+
+#define CHECK_DEREF(addr, len, name) \
+   if (!VG_(am_is_valid_for_client) ((Addr)(addr), (len), VKI_PROT_READ)) { \
+      dlog(0, "get_tls_addr: %s at %p len %lu not addressable\n",       \
+           name, (void*)(addr), (unsigned long)(len));                  \
+      return False;                                                     \
+   }
+
+   *tls_addr = 0;
+
+   if (the_low_target.target_get_dtv == NULL) {
+      dlog(1, "low level dtv support not available\n");
+      return False;
+   }
+
+   if (!getplatformoffset (&lm_modid_offset)) {
+      dlog(0, "link_map modid field offset not available\n");
+      return False;
+   }
+   dlog (2, "link_map modid offset %p\n", (void*)lm_modid_offset);
+   vg_assert (lm_modid_offset < 0x10000); // let's say
+   
+   dtv_loc = (*the_low_target.target_get_dtv)(tst);
+   if (dtv_loc == NULL) {
+      dlog(0, "low level dtv support returned NULL\n");
+      return False;
+   }
+
+   CHECK_DEREF(dtv_loc, sizeof(CORE_ADDR), "dtv_loc");
+   dtv = *dtv_loc;
+
+   // Check we can read at least 2 address at the beginning of dtv.
+   CHECK_DEREF(dtv, 2*sizeof(CORE_ADDR), "dtv 2 first entries");
+   dlog (2, "tid %d dtv %p\n", tst->tid, (void*)dtv);
+
+   // Check we can read the modid
+   CHECK_DEREF(lm+lm_modid_offset, sizeof(unsigned long int), "link_map modid");
+   modid = *(unsigned long int *)(lm+lm_modid_offset);
+
+   // Check we can access the dtv entry for modid
+   CHECK_DEREF(dtv + 2 * modid, sizeof(CORE_ADDR), "dtv[2*modid]");
+
+   // And finally compute the address of the tls variable.
+   *tls_addr = *(dtv + 2 * modid) + offset;
+   
+   return True;
+
+#undef CHECK_DEREF
+}
+
 /* returns a pointer to the architecture state corresponding to
    the provided register set: 0 => normal guest registers,
                               1 => shadow1
diff --git a/coregrind/m_gdbserver/target.h b/coregrind/m_gdbserver/target.h
index c98b976..1cacc1e 100644
--- a/coregrind/m_gdbserver/target.h
+++ b/coregrind/m_gdbserver/target.h
@@ -200,11 +200,29 @@
 extern int valgrind_insert_watchpoint (char type, CORE_ADDR addr, int len);
 extern int valgrind_remove_watchpoint (char type, CORE_ADDR addr, int len);
 
+/* Get the address of a thread local variable.
+   'tst' is the thread for which thread local address is searched for.
+   'offset' is the offset of the variable in the tls data of the load
+   module identified by 'lm'.
+   'lm' is the link_map address of the loaded  module : it is the address
+   of the data structure used by the dynamic linker to maintain various
+   information about a loaded object.
+   
+   Returns True if the address of the variable could be found.
+      *tls_addr is then set to this address.
+   Returns False if tls support is not available for this arch, or
+   if an error occured. *tls_addr is set to NULL. */
+extern Bool valgrind_get_tls_addr (ThreadState *tst,
+                                   CORE_ADDR offset,
+                                   CORE_ADDR lm,
+                                   CORE_ADDR *tls_addr);
+
 
 /* -------------------------------------------------------------------------- */
 /* ----------- Utils functions for low level arch specific files ------------ */
 /* -------------------------------------------------------------------------- */
 
+
 /* returns a pointer to the architecture state corresponding to
    the provided register set: 0 => normal guest registers,
                               1 => shadow1
diff --git a/coregrind/m_gdbserver/valgrind-low-amd64.c b/coregrind/m_gdbserver/valgrind-low-amd64.c
index 82f85f0..c48afc2 100644
--- a/coregrind/m_gdbserver/valgrind-low-amd64.c
+++ b/coregrind/m_gdbserver/valgrind-low-amd64.c
@@ -315,6 +315,7 @@
    VG_(machine_get_VexArchInfo) (&va, &vai);
    return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX ? True : False);
 }
+
 static
 const char* target_xml (Bool shadow_mode)
 {
@@ -345,6 +346,15 @@
    }  
 }
 
+static CORE_ADDR** target_get_dtv (ThreadState *tst)
+{
+#if defined(VGA_amd64)
+   return (CORE_ADDR**)(tst->arch.vex.guest_FS_ZERO + 0x8);
+#else
+   vg_assert(0);
+#endif
+}
+
 static struct valgrind_target_ops low_target = {
    -1, // Must be computed at init time.
    regs,
@@ -353,7 +363,8 @@
    get_pc,
    set_pc,
    "amd64",
-   target_xml
+   target_xml,
+   target_get_dtv
 };
 
 void amd64_init_architecture (struct valgrind_target_ops *target)
diff --git a/coregrind/m_gdbserver/valgrind-low-arm.c b/coregrind/m_gdbserver/valgrind-low-arm.c
index 255fe79..9516d00 100644
--- a/coregrind/m_gdbserver/valgrind-low-arm.c
+++ b/coregrind/m_gdbserver/valgrind-low-arm.c
@@ -286,6 +286,16 @@
    }  
 }
 
+static CORE_ADDR** target_get_dtv (ThreadState *tst)
+{
+#if defined(VGA_arm)
+   // arm dtv is pointed to by TPIDRURO
+   return (CORE_ADDR**)(tst->arch.vex.guest_TPIDRURO);
+#else
+   vg_assert(0);
+#endif
+}
+
 static struct valgrind_target_ops low_target = {
    num_regs,
    regs,
@@ -294,7 +304,8 @@
    get_pc,
    set_pc,
    "arm",
-   target_xml
+   target_xml,
+   target_get_dtv
 };
 
 void arm_init_architecture (struct valgrind_target_ops *target)
diff --git a/coregrind/m_gdbserver/valgrind-low-arm64.c b/coregrind/m_gdbserver/valgrind-low-arm64.c
index 32f19e1..56f4ba6 100644
--- a/coregrind/m_gdbserver/valgrind-low-arm64.c
+++ b/coregrind/m_gdbserver/valgrind-low-arm64.c
@@ -261,6 +261,16 @@
 #endif 
 }
 
+static CORE_ADDR** target_get_dtv (ThreadState *tst)
+{
+#if defined(VGA_arm64)
+   // arm64 dtv is pointed to by TPIDR_EL0.
+   return (CORE_ADDR**)(tst->arch.vex.guest_TPIDR_EL0);
+#else
+   vg_assert(0);
+#endif
+}
+
 static struct valgrind_target_ops low_target = {
    num_regs,
    regs,
@@ -269,7 +279,8 @@
    get_pc,
    set_pc,
    "arm64",
-   target_xml
+   target_xml,
+   target_get_dtv
 };
 
 void arm64_init_architecture (struct valgrind_target_ops *target)
diff --git a/coregrind/m_gdbserver/valgrind-low-mips32.c b/coregrind/m_gdbserver/valgrind-low-mips32.c
index 47adf4e..1490867 100644
--- a/coregrind/m_gdbserver/valgrind-low-mips32.c
+++ b/coregrind/m_gdbserver/valgrind-low-mips32.c
@@ -354,6 +354,16 @@
    }  
 }
 
+static CORE_ADDR** target_get_dtv (ThreadState *tst)
+{
+#if defined(VGA_mips32)
+   // mips32 dtv location similar to ppc64
+   return (CORE_ADDR**)(tst->arch.vex.guest_ULR - 0x7000 - sizeof(CORE_ADDR));
+#else
+   vg_assert(0);
+#endif
+}
+
 static struct valgrind_target_ops low_target = {
    num_regs,
    regs,
@@ -362,7 +372,8 @@
    get_pc,
    set_pc,
    "mips",
-   target_xml
+   target_xml,
+   target_get_dtv
 };
 
 void mips32_init_architecture (struct valgrind_target_ops *target)
diff --git a/coregrind/m_gdbserver/valgrind-low-mips64.c b/coregrind/m_gdbserver/valgrind-low-mips64.c
index b1f3bbd..d35df41 100644
--- a/coregrind/m_gdbserver/valgrind-low-mips64.c
+++ b/coregrind/m_gdbserver/valgrind-low-mips64.c
@@ -355,6 +355,17 @@
    }  
 }
 
+static CORE_ADDR** target_get_dtv (ThreadState *tst)
+{
+#if defined(VGA_mips64)
+   // mips64 dtv location similar to ppc64
+   return (CORE_ADDR**)(tst->arch.vex.guest_ULR - 0x7000 - sizeof(CORE_ADDR));
+   return NULL;
+#else
+   vg_assert(0);
+#endif
+}
+
 static struct valgrind_target_ops low_target = {
    num_regs,
    regs,
@@ -363,7 +374,8 @@
    get_pc,
    set_pc,
    "mips64",
-   target_xml
+   target_xml,
+   target_get_dtv
 };
 
 void mips64_init_architecture (struct valgrind_target_ops *target)
diff --git a/coregrind/m_gdbserver/valgrind-low-ppc32.c b/coregrind/m_gdbserver/valgrind-low-ppc32.c
index 592e25b..6d2d44e 100644
--- a/coregrind/m_gdbserver/valgrind-low-ppc32.c
+++ b/coregrind/m_gdbserver/valgrind-low-ppc32.c
@@ -332,6 +332,17 @@
    }  
 }
 
+static CORE_ADDR** target_get_dtv (ThreadState *tst)
+{
+#if defined(VGA_ppc32)
+   // ppc32 dtv is located just before the tcb, which is 0x7000 before
+   // the thread id (r2)
+   return (CORE_ADDR**)(tst->arch.vex.guest_GPR2 - 0x7000 - sizeof(CORE_ADDR));
+#else
+   vg_assert(0);
+#endif
+}
+
 static struct valgrind_target_ops low_target = {
    num_regs,
    regs,
@@ -340,7 +351,8 @@
    get_pc,
    set_pc,
    "ppc32",
-   target_xml
+   target_xml,
+   target_get_dtv
 };
 
 void ppc32_init_architecture (struct valgrind_target_ops *target)
diff --git a/coregrind/m_gdbserver/valgrind-low-ppc64.c b/coregrind/m_gdbserver/valgrind-low-ppc64.c
index fa146c1..490322d 100644
--- a/coregrind/m_gdbserver/valgrind-low-ppc64.c
+++ b/coregrind/m_gdbserver/valgrind-low-ppc64.c
@@ -329,6 +329,17 @@
    }  
 }
 
+static CORE_ADDR** target_get_dtv (ThreadState *tst)
+{
+#if defined(VGA_ppc64be) || defined(VGA_ppc64le)
+   // ppc64 dtv is located just before the tcb, which is 0x7000 before 
+   // the thread id (r13)
+   return (CORE_ADDR**)(tst->arch.vex.guest_GPR13 - 0x7000 - sizeof(CORE_ADDR));
+#else
+   vg_assert(0);
+#endif
+}
+
 static struct valgrind_target_ops low_target = {
    num_regs,
    regs,
@@ -337,7 +348,8 @@
    get_pc,
    set_pc,
    "ppc64",
-   target_xml
+   target_xml,
+   target_get_dtv
 };
 
 void ppc64_init_architecture (struct valgrind_target_ops *target)
diff --git a/coregrind/m_gdbserver/valgrind-low-s390x.c b/coregrind/m_gdbserver/valgrind-low-s390x.c
index b06b920..8ba5180 100644
--- a/coregrind/m_gdbserver/valgrind-low-s390x.c
+++ b/coregrind/m_gdbserver/valgrind-low-s390x.c
@@ -197,6 +197,18 @@
    }  
 }
 
+static CORE_ADDR** target_get_dtv (ThreadState *tst)
+{
+#if defined(VGA_s390x)
+   // Thread pointer is in a0 (high 32 bits) and a1. Dtv is the second word.
+   return (CORE_ADDR**)(((CORE_ADDR)tst->arch.vex.guest_a0 << 32 
+                         | (CORE_ADDR)tst->arch.vex.guest_a1)
+                        + sizeof(CORE_ADDR));
+#else
+   vg_assert(0);
+#endif
+}
+
 static struct valgrind_target_ops low_target = {
    num_regs,
    regs,
@@ -205,7 +217,8 @@
    get_pc,
    set_pc,
    "s390x",
-   target_xml
+   target_xml,
+   target_get_dtv
 };
 
 void s390x_init_architecture (struct valgrind_target_ops *target)
diff --git a/coregrind/m_gdbserver/valgrind-low-x86.c b/coregrind/m_gdbserver/valgrind-low-x86.c
index f2715a0..43ac1eb 100644
--- a/coregrind/m_gdbserver/valgrind-low-x86.c
+++ b/coregrind/m_gdbserver/valgrind-low-x86.c
@@ -257,6 +257,29 @@
    }  
 }
 
+static CORE_ADDR** target_get_dtv (ThreadState *tst)
+{
+#if defined(VGA_x86)
+   // FIXME: should make the below formally visible from VEX.
+   extern ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
+                                        UInt seg_selector, UInt virtual_addr );
+
+   ULong dtv_loc_g = x86g_use_seg_selector (tst->arch.vex.guest_LDT,
+                                            tst->arch.vex.guest_GDT,
+                                            tst->arch.vex.guest_GS,
+                                            0x4);
+   if (dtv_loc_g == 1ULL << 32) {
+      dlog(0, "Error getting x86 dtv\n");
+      return NULL;
+   } else {
+      CORE_ADDR dtv_loc = dtv_loc_g;
+      return (CORE_ADDR**)dtv_loc;
+   }
+#else
+   vg_assert(0);
+#endif
+}
+
 static struct valgrind_target_ops low_target = {
    num_regs,
    regs,
@@ -265,7 +288,8 @@
    get_pc,
    set_pc,
    "i386",
-   target_xml
+   target_xml,
+   target_get_dtv
 };
 
 void x86_init_architecture (struct valgrind_target_ops *target)
diff --git a/coregrind/m_gdbserver/valgrind_low.h b/coregrind/m_gdbserver/valgrind_low.h
index e0def75..7b87856 100644
--- a/coregrind/m_gdbserver/valgrind_low.h
+++ b/coregrind/m_gdbserver/valgrind_low.h
@@ -68,6 +68,34 @@
       Returns NULL if there is no target xml file*/
    const char* (*target_xml) (Bool shadow_mode);
 
+   /* Returns the address in the thread control block where dtv is found.
+      Return NULL if an error occurs or no support for tls/dtv is available.
+      Note that the addressability of the returned result has not been
+      verified. In other words, target_get_dtv just adds some magic
+      offset to the arch specific thread register or thread pointer or ... 
+      
+      The implementation of this is of course depending on the arch
+      but also depends on the way pthread lib arranges its data.
+      For background info about tls handling, read
+      'ELF Handling For Thread-Local Storage'
+      http://www.akkadia.org/drepper/tls.pdf
+      (slightly obsolete e.g. the size of a dtv entry is 2 words now).
+      The reference is the glibc source, in particular the arch specific
+      file tls.h.
+
+      For platforms where the dtv is located in the tcb, the magic offset
+      to add to the thread pointer/register/... can be found by doing:
+        cd none/tests
+        gdb ./tls
+        set debug-file-directory /usr/lib/debug # or equivalent
+        start
+        p &((struct pthread*)0x0)->header.dtv
+      Currently the dtv offset is hardcoded, based on the assumption
+      that this is relatively stable. If that would be false, then
+      getoff-<platform> should be modified to output this offset e.g.
+      depending on the glibc version. */
+   CORE_ADDR** (*target_get_dtv)(ThreadState *tst);
+
 };
 
 extern void x86_init_architecture (struct valgrind_target_ops *target);
diff --git a/gdbserver_tests/Makefile.am b/gdbserver_tests/Makefile.am
index f2c7a0d..d5a0306 100644
--- a/gdbserver_tests/Makefile.am
+++ b/gdbserver_tests/Makefile.am
@@ -14,6 +14,11 @@
 	hginfo.stdinB.gdb \
 	hginfo.stdoutB.exp \
 	hginfo.vgtest \
+	hgtls.stderrB.exp \
+	hgtls.stderr.exp \
+	hgtls.stdinB.gdb \
+	hgtls.stdoutB.exp \
+	hgtls.vgtest \
 	mcblocklistsearch.stderr.exp \
 	mcblocklistsearch.stdinB.gdb \
 	mcblocklistsearch.vgtest \
diff --git a/gdbserver_tests/hgtls.stderr.exp b/gdbserver_tests/hgtls.stderr.exp
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/gdbserver_tests/hgtls.stderr.exp
diff --git a/gdbserver_tests/hgtls.stderrB.exp b/gdbserver_tests/hgtls.stderrB.exp
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/gdbserver_tests/hgtls.stderrB.exp
diff --git a/gdbserver_tests/hgtls.stdinB.gdb b/gdbserver_tests/hgtls.stdinB.gdb
new file mode 100644
index 0000000..2541e9e
--- /dev/null
+++ b/gdbserver_tests/hgtls.stdinB.gdb
@@ -0,0 +1,37 @@
+# connect gdb to Valgrind gdbserver:
+target remote | ./vgdb --wait=60 --vgdb-prefix=./vgdb-prefix-hgtls
+echo vgdb launched process attached\n
+monitor v.set vgdb-error 999999
+#
+#
+# insert break:
+break tls.c:55
+command
+set $tls_ip = main
+if test == &tests[0]
+  set $tls_ip = &race
+end
+if test == &tests[1]
+  set $tls_ip = &local
+end
+if test == &tests[2]
+  set $tls_ip = &global
+end
+if test == &tests[3]
+  set $tls_ip = &static_extern
+end
+if test == &tests[4]
+  set $tls_ip = &so_extern
+end
+if test == &tests[5]
+  set $tls_ip = &so_local
+end
+if test == &tests[6]
+  set $tls_ip = &global
+end
+printf "test %s tls_ip %p ip %p equal %d\n", test->name, $tls_ip, ip, $tls_ip == ip
+continue
+end
+# continue till the end
+continue
+quit
diff --git a/gdbserver_tests/hgtls.stdoutB.exp b/gdbserver_tests/hgtls.stdoutB.exp
new file mode 100644
index 0000000..c87dbc1
--- /dev/null
+++ b/gdbserver_tests/hgtls.stdoutB.exp
@@ -0,0 +1,59 @@
+Breakpoint 1 at 0x........: file tls.c, line 55.
+Continuing.
+[New Thread ....]
+Breakpoint 1, tls_ptr (p=0x........ <tests>) at tls.c:55
+55		int here = 0;
+test race tls_ip 0x........ ip 0x........ equal 1
+[New Thread ....]
+Breakpoint 1, tls_ptr (p=0x........ <tests>) at tls.c:55
+55		int here = 0;
+test race tls_ip 0x........ ip 0x........ equal 1
+[New Thread ....]
+Breakpoint 1, tls_ptr (p=0x........ <tests+16>) at tls.c:55
+55		int here = 0;
+test local tls_ip 0x........ ip 0x........ equal 1
+[New Thread ....]
+Breakpoint 1, tls_ptr (p=0x........ <tests+16>) at tls.c:55
+55		int here = 0;
+test local tls_ip 0x........ ip 0x........ equal 1
+[New Thread ....]
+Breakpoint 1, tls_ptr (p=0x........ <tests+32>) at tls.c:55
+55		int here = 0;
+test global tls_ip 0x........ ip 0x........ equal 1
+[New Thread ....]
+Breakpoint 1, tls_ptr (p=0x........ <tests+32>) at tls.c:55
+55		int here = 0;
+test global tls_ip 0x........ ip 0x........ equal 1
+[New Thread ....]
+Breakpoint 1, tls_ptr (p=0x........ <tests+48>) at tls.c:55
+55		int here = 0;
+test static_extern tls_ip 0x........ ip 0x........ equal 1
+[New Thread ....]
+Breakpoint 1, tls_ptr (p=0x........ <tests+48>) at tls.c:55
+55		int here = 0;
+test static_extern tls_ip 0x........ ip 0x........ equal 1
+[New Thread ....]
+Breakpoint 1, tls_ptr (p=0x........ <tests+64>) at tls.c:55
+55		int here = 0;
+test so_extern tls_ip 0x........ ip 0x........ equal 1
+[New Thread ....]
+Breakpoint 1, tls_ptr (p=0x........ <tests+64>) at tls.c:55
+55		int here = 0;
+test so_extern tls_ip 0x........ ip 0x........ equal 1
+[New Thread ....]
+Breakpoint 1, tls_ptr (p=0x........ <tests+80>) at tls.c:55
+55		int here = 0;
+test so_local tls_ip 0x........ ip 0x........ equal 1
+[New Thread ....]
+Breakpoint 1, tls_ptr (p=0x........ <tests+80>) at tls.c:55
+55		int here = 0;
+test so_local tls_ip 0x........ ip 0x........ equal 1
+[New Thread ....]
+Breakpoint 1, tls_ptr (p=0x........ <tests+96>) at tls.c:55
+55		int here = 0;
+test so_global tls_ip 0x........ ip 0x........ equal 1
+[New Thread ....]
+Breakpoint 1, tls_ptr (p=0x........ <tests+96>) at tls.c:55
+55		int here = 0;
+test so_global tls_ip 0x........ ip 0x........ equal 1
+Program exited normally.
diff --git a/gdbserver_tests/hgtls.vgtest b/gdbserver_tests/hgtls.vgtest
new file mode 100644
index 0000000..60a73b9
--- /dev/null
+++ b/gdbserver_tests/hgtls.vgtest
@@ -0,0 +1,12 @@
+# test tls addresses
+prog: ../none/tests/tls
+vgopts: --tool=helgrind --vgdb=yes --vgdb-error=0 --vgdb-prefix=./vgdb-prefix-hgtls -q 
+prereq: test -e ../none/tests/tls
+stdout_filter: filter_make_empty
+stderr_filter: filter_make_empty
+progB: gdb
+argsB: --quiet -l 60 --nx ../none/tests/tls
+stdinB: hgtls.stdinB.gdb
+stdoutB_filter: filter_gdb
+stderrB_filter: filter_make_empty
+# stderrB_filter_args: hg01_all_ok.c
diff --git a/none/tests/Makefile.am b/none/tests/Makefile.am
index d11004a..031f99a 100644
--- a/none/tests/Makefile.am
+++ b/none/tests/Makefile.am
@@ -277,7 +277,7 @@
 threadederrno_LDADD	= -lpthread
 tls_SOURCES		= tls.c tls2.c
 tls_DEPENDENCIES	= tls.so tls2.so
-tls_LDFLAGS		= -Wl,-rpath,$(top_builddir)/none/tests
+tls_LDFLAGS		= -Wl,-rpath,$(abs_top_builddir)/none/tests
 tls_LDADD		= tls.so tls2.so -lpthread
 tls_so_SOURCES		= tls_so.c
 tls_so_DEPENDENCIES	= tls2.so
@@ -285,7 +285,7 @@
  tls_so_LDFLAGS		= -dynamic -dynamiclib -all_load -fpic
  tls_so_LDADD		= `pwd`/tls2.so
 else
- tls_so_LDFLAGS		= -Wl,-rpath,$(top_builddir)/none/tests -shared -fPIC
+ tls_so_LDFLAGS		= -Wl,-rpath,$(abs_top_builddir)/none/tests -shared -fPIC
  tls_so_LDADD		= tls2.so
 endif
 tls_so_CFLAGS		= $(AM_CFLAGS) -fPIC
diff --git a/none/tests/tls.c b/none/tests/tls.c
index 88722dc..1810790 100644
--- a/none/tests/tls.c
+++ b/none/tests/tls.c
@@ -46,8 +46,8 @@
 struct testcase {
 	const char *name;
 	func_t func;
+        char pad[2 * (8 - sizeof(void*))];
 };
-
 static void *tls_ptr(void *p)
 {
 	struct testcase *test = (struct testcase *)p;