[libcxx] Fix a data race in call_once

call_once is using relaxed atomic load to perform double-checked locking, which contains a data race. The fast-path load has to be an acquire atomic load.

Differential Revision: https://reviews.llvm.org/D24028



git-svn-id: https://llvm.org/svn/llvm-project/libcxx/trunk@280621 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/memory b/include/memory
index 31f58b7..8cb094e 100644
--- a/include/memory
+++ b/include/memory
@@ -663,6 +663,18 @@
 #endif
 }
 
+template <class _ValueType>
+inline _LIBCPP_ALWAYS_INLINE
+_ValueType __libcpp_acquire_load(_ValueType const* __value) {
+#if !defined(_LIBCPP_HAS_NO_THREADS) && \
+    defined(__ATOMIC_ACQUIRE) &&        \
+    (__has_builtin(__atomic_load_n) || _GNUC_VER >= 407)
+    return __atomic_load_n(__value, __ATOMIC_ACQUIRE);
+#else
+    return *__value;
+#endif
+}
+
 // addressof moved to <__functional_base>
 
 template <class _Tp> class allocator;
diff --git a/include/mutex b/include/mutex
index c047cf9..79befbe 100644
--- a/include/mutex
+++ b/include/mutex
@@ -574,7 +574,7 @@
 void
 call_once(once_flag& __flag, _Callable&& __func, _Args&&... __args)
 {
-    if (__libcpp_relaxed_load(&__flag.__state_) != ~0ul)
+    if (__libcpp_acquire_load(&__flag.__state_) != ~0ul)
     {
         typedef tuple<_Callable&&, _Args&&...> _Gp;
         _Gp __f(_VSTD::forward<_Callable>(__func), _VSTD::forward<_Args>(__args)...);
@@ -590,7 +590,7 @@
 void
 call_once(once_flag& __flag, _Callable& __func)
 {
-    if (__libcpp_relaxed_load(&__flag.__state_) != ~0ul)
+    if (__libcpp_acquire_load(&__flag.__state_) != ~0ul)
     {
         __call_once_param<_Callable> __p(__func);
         __call_once(__flag.__state_, &__p, &__call_once_proxy<_Callable>);