make dlerror state and message thread-local and dynamically-allocated

this fixes truncation of error messages containing long pathnames or
symbol names.

the dlerror state was previously required by POSIX to be global. the
resolution of bug 97 relaxed the requirements to allow thread-safe
implementations of dlerror with thread-local state and message buffer.
diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c
index 6963f0d..d7c0323 100644
--- a/src/thread/pthread_create.c
+++ b/src/thread/pthread_create.c
@@ -17,6 +17,7 @@
 weak_alias(dummy_0, __release_ptc);
 weak_alias(dummy_0, __pthread_tsd_run_dtors);
 weak_alias(dummy_0, __do_orphaned_stdio_locks);
+weak_alias(dummy_0, __dl_thread_cleanup);
 
 _Noreturn void __pthread_exit(void *result)
 {
@@ -92,6 +93,7 @@
 	__vm_unlock();
 
 	__do_orphaned_stdio_locks();
+	__dl_thread_cleanup();
 
 	if (self->detached && self->map_base) {
 		/* Detached threads must avoid the kernel clear_child_tid