[libcxx] Fix PR15638 - Only allocate in parent when starting a thread to prevent calling terminate.

Summary:
Hi,

When creating a new thread libc++ performs at least 2 allocations. The first allocates a tuple of args and the functor that will be passed to the new thread. The second allocation is for the thread local storage needed internally by libc++. Currently the second allocation happens in the child thread, meaning that if it throws the program will terminate with an uncaught bad alloc.

The solution to this is to allocate ALL memory in the parent thread and then pass it to the child.

See https://llvm.org/bugs/show_bug.cgi?id=15638

Reviewers: mclow.lists, danalbert, jroelofs, EricWF

Subscribers: cfe-commits

Differential Revision: http://reviews.llvm.org/D13748

llvm-svn: 266851
diff --git a/libcxx/include/thread b/libcxx/include/thread
index 6857e9e..c3eb437 100644
--- a/libcxx/include/thread
+++ b/libcxx/include/thread
@@ -339,21 +339,21 @@
 
 #ifndef _LIBCPP_HAS_NO_VARIADICS
 
-template <class _Fp, class ..._Args, size_t ..._Indices>
+template <class _TSp, class _Fp, class ..._Args, size_t ..._Indices>
 inline _LIBCPP_INLINE_VISIBILITY
 void
-__thread_execute(tuple<_Fp, _Args...>& __t, __tuple_indices<_Indices...>)
+__thread_execute(tuple<_TSp, _Fp, _Args...>& __t, __tuple_indices<_Indices...>)
 {
-    __invoke(_VSTD::move(_VSTD::get<0>(__t)), _VSTD::move(_VSTD::get<_Indices>(__t))...);
+    __invoke(_VSTD::move(_VSTD::get<1>(__t)), _VSTD::move(_VSTD::get<_Indices>(__t))...);
 }
 
 template <class _Fp>
-void*
-__thread_proxy(void* __vp)
+void* __thread_proxy(void* __vp)
 {
-    __thread_local_data().reset(new __thread_struct);
+    // _Fp = std::tuple< unique_ptr<__thread_struct>, Functor, Args...>
     std::unique_ptr<_Fp> __p(static_cast<_Fp*>(__vp));
-    typedef typename __make_tuple_indices<tuple_size<_Fp>::value, 1>::type _Index;
+    __thread_local_data().reset(_VSTD::get<0>(*__p).release());
+    typedef typename __make_tuple_indices<tuple_size<_Fp>::value, 2>::type _Index;
     __thread_execute(*__p, _Index());
     return nullptr;
 }
@@ -363,9 +363,13 @@
          >
 thread::thread(_Fp&& __f, _Args&&... __args)
 {
-    typedef tuple<typename decay<_Fp>::type, typename decay<_Args>::type...> _Gp;
-    _VSTD::unique_ptr<_Gp> __p(new _Gp(__decay_copy(_VSTD::forward<_Fp>(__f)),
-                                __decay_copy(_VSTD::forward<_Args>(__args))...));
+    typedef unique_ptr<__thread_struct> _TSPtr;
+    _TSPtr __tsp(new __thread_struct);
+    typedef tuple<_TSPtr, typename decay<_Fp>::type, typename decay<_Args>::type...> _Gp;
+    _VSTD::unique_ptr<_Gp> __p(
+            new _Gp(std::move(__tsp),
+                    __decay_copy(_VSTD::forward<_Fp>(__f)),
+                    __decay_copy(_VSTD::forward<_Args>(__args))...));
     int __ec = pthread_create(&__t_, 0, &__thread_proxy<_Gp>, __p.get());
     if (__ec == 0)
         __p.release();
@@ -376,22 +380,34 @@
 #else  // _LIBCPP_HAS_NO_VARIADICS
 
 template <class _Fp>
-void*
-__thread_proxy(void* __vp)
+struct __thread_invoke_pair {
+    // This type is used to pass memory for thread local storage and a functor
+    // to a newly created thread because std::pair doesn't work with
+    // std::unique_ptr in C++03.
+    __thread_invoke_pair(_Fp& __f) : __tsp_(new __thread_struct), __fn_(__f) {}
+    unique_ptr<__thread_struct> __tsp_;
+    _Fp __fn_;
+};
+
+template <class _Fp>
+void* __thread_proxy_cxx03(void* __vp)
 {
-    __thread_local_data().reset(new __thread_struct);
     std::unique_ptr<_Fp> __p(static_cast<_Fp*>(__vp));
-    (*__p)();
+    __thread_local_data().reset(__p->__tsp_.release());
+    (__p->__fn_)();
     return nullptr;
 }
 
 template <class _Fp>
 thread::thread(_Fp __f)
 {
-    std::unique_ptr<_Fp> __p(new _Fp(__f));
-    int __ec = pthread_create(&__t_, 0, &__thread_proxy<_Fp>, __p.get());
+
+    typedef __thread_invoke_pair<_Fp> _InvokePair;
+    typedef std::unique_ptr<_InvokePair> _PairPtr;
+    _PairPtr __pp(new _InvokePair(__f));
+    int __ec = pthread_create(&__t_, 0, &__thread_proxy_cxx03<_InvokePair>, __pp.get());
     if (__ec == 0)
-        __p.release();
+        __pp.release();
     else
         __throw_system_error(__ec, "thread constructor failed");
 }