fix over-alignment of TLS, insufficient builtin TLS on 64-bit archs
a conservative estimate of 4*sizeof(size_t) was used as the minimum
alignment for thread-local storage, despite the only requirements
being alignment suitable for struct pthread and void* (which struct
pthread already contains). additional alignment required by the
application or libraries is encoded in their headers and is already
applied.
over-alignment prevented the builtin_tls array from ever being used in
dynamic-linked programs on 64-bit archs, thereby requiring allocation
at startup even in programs with no TLS of their own.
diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
index 0bdc988..ca10199 100644
--- a/src/ldso/dynlink.c
+++ b/src/ldso/dynlink.c
@@ -122,6 +122,13 @@
const char *__libc_get_version(void);
+static struct builtin_tls {
+ char c;
+ struct pthread pt;
+ void *space[16];
+} builtin_tls[1];
+#define MIN_TLS_ALIGN offsetof(struct builtin_tls, pt)
+
static struct dso *head, *tail, *ldso, *fini_head;
static char *env_path, *sys_path;
static unsigned long long gencnt;
@@ -132,10 +139,9 @@
static jmp_buf *rtld_fail;
static pthread_rwlock_t lock;
static struct debug debug;
-static size_t tls_cnt, tls_offset, tls_align = 4*sizeof(size_t);
+static size_t tls_cnt, tls_offset, tls_align = MIN_TLS_ALIGN;
static size_t static_tls_cnt;
static pthread_mutex_t init_fini_lock = { ._m_type = PTHREAD_MUTEX_RECURSIVE };
-static long long builtin_tls[(sizeof(struct pthread) + 64)/sizeof(long long)];
struct debug *_dl_debug_addr = &debug;