Despite my pathological distrust of spin locks, the number just don't lie.  I've put a small spin in __sp_mut::lock() on std::mutex::try_lock(), which is testing quite well.  In my experience, putting in a yield for every failed iteration is also a major performance booster.  This change makes one of the performance tests I was using (a highly contended one) run about 20 times faster.

git-svn-id: https://llvm.org/svn/llvm-project/libcxx/trunk@160967 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/src/memory.cpp b/src/memory.cpp
index 7caab26..1c108b8 100644
--- a/src/memory.cpp
+++ b/src/memory.cpp
@@ -10,6 +10,7 @@
 #define _LIBCPP_BUILDING_MEMORY
 #include "memory"
 #include "mutex"
+#include "thread"
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
@@ -129,13 +130,23 @@
 void
 __sp_mut::lock() _NOEXCEPT
 {
-    reinterpret_cast<mutex*>(_)->lock();
+    mutex& m = *static_cast<mutex*>(_);
+    unsigned count = 0;
+    while (!m.try_lock())
+    {
+        if (++count > 16)
+        {
+            m.lock();
+            break;
+        }
+        this_thread::yield();
+    }
 }
 
 void
 __sp_mut::unlock() _NOEXCEPT
 {
-    reinterpret_cast<mutex*>(_)->unlock();
+    static_cast<mutex*>(_)->unlock();
 }
 
 __sp_mut&