Revert of SkOnce: 2 bytes -> 1 byte (patchset #4 id:60001 of https://codereview.chromium.org/1904483003/ ) Reason for revert: bust the roll Original issue's description: > SkOnce: 2 bytes -> 1 byte > > This uses the same logic we worked out for SkOncePtr to reduce > the memory footprint of SkOnce from a done byte and lock byte > to a single 3-state byte: > > - NotStarted: no thread has tried to run fn() yet > - Active: a thread is running fn() > - Done: fn() is complete > > Threads which see Done return immediately. > Threads which see NotStarted try to move to Active, run fn(), then move to Done. > Threads which see Active spin until the active thread moves to Done. > > This additionally fixes a too-weak memory order bug in SkOncePtr, > and adds a big note to explain. > > BUG=skia: > GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1904483003 > > Committed: https://skia.googlesource.com/skia/+/df02d338be8e3c1c50b48a3a9faa582703a39c07 TBR=herb@google.com # Skipping CQ checks because original CL landed less than 1 days ago. NOPRESUBMIT=true NOTREECHECKS=true NOTRY=true BUG=skia: Review URL: https://codereview.chromium.org/1898413004

commit: 9134686fd9ce384e9163b44942538d033b5bff11 [log] [tgz]
author: mtklein <mtklein@chromium.org> Wed Apr 20 13:02:08 2016 -0700
committer: Commit bot <commit-bot@chromium.org> Wed Apr 20 13:02:09 2016 -0700
tree: 6de38cd425016f9411e5dc20985dee795364ac30
parent: c30c418f4eb75f365c7d7a32d5419b41ca780ba8 [diff]
diff --git a/include/private/SkOnce.h b/include/private/SkOnce.h
index 20bf427..d83b63f 100644
--- a/include/private/SkOnce.h
+++ b/include/private/SkOnce.h

@@ -8,9 +8,9 @@
 #ifndef SkOnce_DEFINED
 #define SkOnce_DEFINED
 
+#include "../private/SkSpinlock.h"
 #include <atomic>
 #include <utility>
-#include "SkTypes.h"
 
 // SkOnce provides call-once guarantees for Skia, much like std::once_flag/std::call_once().
 //
@@ -21,50 +21,20 @@
 public:
     template <typename Fn, typename... Args>
     void operator()(Fn&& fn, Args&&... args) {
-        auto state = fState.load(std::memory_order_acquire);
-
-        if (state == Done) {
-            return;
-        }
-
-        if (state == NotStarted) {
-            // Try to claim the job of calling fn() by swapping from NotStarted to Calling.
-            // See [1] below for why we use std::memory_order_acquire instead of relaxed.
-            if (fState.compare_exchange_strong(state, Calling, std::memory_order_acquire)) {
-                // Claimed!  Call fn(), then mark this SkOnce as Done.
+        // Vanilla double-checked locking.
+        if (!fDone.load(std::memory_order_acquire)) {
+            fLock.acquire();
+            if (!fDone.load(std::memory_order_relaxed)) {
                 fn(std::forward<Args>(args)...);
-                return fState.store(Done, std::memory_order_release);
+                fDone.store(true, std::memory_order_release);
             }
+            fLock.release();
         }
-
-        while (state == Calling) {
-            // Some other thread is calling fn().  Wait for them to finish.
-            state = fState.load(std::memory_order_acquire);
-        }
-        SkASSERT(state == Done);
     }
 
 private:
-    enum State : uint8_t { NotStarted, Calling, Done};
-    std::atomic<State> fState{NotStarted};
+    std::atomic<bool> fDone{false};
+    SkSpinlock        fLock;
 };
 
-/* [1]  Why do we compare_exchange_strong() with std::memory_order_acquire instead of relaxed?
- *
- * If we succeed, we really only need a relaxed compare_exchange_strong()... we're the ones
- * who are about to do a release store, so there's certainly nothing yet for an acquire to
- * synchronize with.
- *
- * If that compare_exchange_strong() fails, we're either in Calling or Done state.
- * Again, if we're in Calling state, relaxed would have been fine: the spin loop will
- * acquire up to the Calling thread's release store.
- *
- * But if that compare_exchange_strong() fails and we find ourselves in the Done state,
- * we've never done an acquire load to sync up to the store of that Done state.
- *
- * So on failure we need an acquire load.  Generally the failure memory order cannot be
- * stronger than the success memory order, so we need acquire on success too.  The single
- * memory order version of compare_exchange_strong() uses the same acquire order for both.
- */
-
 #endif  // SkOnce_DEFINED

diff --git a/include/private/SkOncePtr.h b/include/private/SkOncePtr.h
index b60d968..3c1ab63 100644
--- a/include/private/SkOncePtr.h
+++ b/include/private/SkOncePtr.h

@@ -66,9 +66,8 @@
             if (state == 0) {
                 // It looks like no one has tried to create our pointer yet.
                 // We try to claim that task by atomically swapping our state from '0' to '1'.
-                // See SkOnce.h for why we use an acquire memory order here rather than relaxed.
                 if (sk_atomic_compare_exchange(
-                    &fState, &state, (uintptr_t)1, sk_memory_order_acquire, sk_memory_order_acquire)) {
+                    &fState, &state, (uintptr_t)1, sk_memory_order_relaxed, sk_memory_order_relaxed)) {
                     // We've claimed it.  Create our pointer and store it into fState.
                     state = (uintptr_t)f();
                     SkASSERT(state > 1);
commit	9134686fd9ce384e9163b44942538d033b5bff11	[log] [tgz]
author	mtklein <mtklein@chromium.org>	Wed Apr 20 13:02:08 2016 -0700
committer	Commit bot <commit-bot@chromium.org>	Wed Apr 20 13:02:09 2016 -0700
tree	6de38cd425016f9411e5dc20985dee795364ac30
parent	c30c418f4eb75f365c7d7a32d5419b41ca780ba8 [diff]