Modernize SkMutex and SkSemaphore.

   - use <atomic>
   - fuse SkMutex and SkBaseMutex
   - fuse SkSemaphore and SkBaseSemaphore

Still TODO:
   - replace SK_DECLARE_STATIC_MUTEX(name) with static SkMutex name

I just didn't want to bother fixing all that up until I know this CL sticks.

BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1947153002

No public API changes.
TBR=reed@google.com

Review-Url: https://codereview.chromium.org/1947153002
diff --git a/include/core/SkPixelRef.h b/include/core/SkPixelRef.h
index 90e0de5..d836b2f 100644
--- a/include/core/SkPixelRef.h
+++ b/include/core/SkPixelRef.h
@@ -342,7 +342,7 @@
     /** Return the mutex associated with this pixelref. This value is assigned
         in the constructor, and cannot change during the lifetime of the object.
     */
-    SkBaseMutex* mutex() const { return &fMutex; }
+    SkMutex* mutex() const { return &fMutex; }
 
     // only call from constructor. Flags this to always be locked, removing
     // the need to grab the mutex and call onLockPixels/onUnlockPixels.
diff --git a/include/ports/SkFontConfigInterface.h b/include/ports/SkFontConfigInterface.h
index 72cad0a..9f98e35 100644
--- a/include/ports/SkFontConfigInterface.h
+++ b/include/ports/SkFontConfigInterface.h
@@ -13,7 +13,6 @@
 #include "SkRefCnt.h"
 #include "SkTypeface.h"
 
-struct SkBaseMutex;
 class SkFontMgr;
 
 /**
diff --git a/include/private/SkMutex.h b/include/private/SkMutex.h
index 8c78e12..b9af00f 100644
--- a/include/private/SkMutex.h
+++ b/include/private/SkMutex.h
@@ -8,33 +8,20 @@
 #ifndef SkMutex_DEFINED
 #define SkMutex_DEFINED
 
-// This file is not part of the public Skia API.
 #include "../private/SkSemaphore.h"
+#include "../private/SkThreadID.h"
 #include "SkTypes.h"
 
-#ifdef SK_DEBUG
-    #include "../private/SkThreadID.h"
-#endif
+// TODO: no need for this anymore.
+#define SK_DECLARE_STATIC_MUTEX(name) static SkMutex name;
 
-#define SK_MUTEX_SEMAPHORE_INIT {1, {0}}
+class SkMutex {
+public:
+    constexpr SkMutex() = default;
 
-#ifdef SK_DEBUG
-    #define SK_BASE_MUTEX_INIT {SK_MUTEX_SEMAPHORE_INIT, 0}
-#else
-    #define SK_BASE_MUTEX_INIT {SK_MUTEX_SEMAPHORE_INIT}
-#endif
+    SkMutex(const SkMutex&)            = delete;
+    SkMutex& operator=(const SkMutex&) = delete;
 
-// Using POD-style initialization prevents the generation of a static initializer.
-//
-// Without magic statics there are no thread safety guarantees on initialization
-// of local statics (even POD). As a result, it is illegal to use
-// SK_DECLARE_STATIC_MUTEX in a function.
-//
-// Because SkBaseMutex is not a primitive, a static SkBaseMutex cannot be
-// initialized in a class with this macro.
-#define SK_DECLARE_STATIC_MUTEX(name) namespace {} static SkBaseMutex name = SK_BASE_MUTEX_INIT;
-
-struct SkBaseMutex {
     void acquire() {
         fSemaphore.wait();
         SkDEBUGCODE(fOwner = SkGetThreadID();)
@@ -50,20 +37,9 @@
         SkASSERT(fOwner == SkGetThreadID());
     }
 
-    SkBaseSemaphore fSemaphore;
-    SkDEBUGCODE(SkThreadID fOwner;)
-};
-
-// This needs to use subclassing instead of encapsulation to make SkAutoMutexAcquire to work.
-class SkMutex : public SkBaseMutex {
-public:
-    SkMutex () {
-        fSemaphore = SK_MUTEX_SEMAPHORE_INIT;
-        SkDEBUGCODE(fOwner = kIllegalThreadID);
-    }
-    ~SkMutex () { fSemaphore.deleteSemaphore(); }
-    SkMutex(const SkMutex&) = delete;
-    SkMutex& operator=(const SkMutex&) = delete;
+private:
+    SkSemaphore fSemaphore{1};
+    SkDEBUGCODE(SkThreadID fOwner{kIllegalThreadID};)
 };
 
 template <typename Lock>
@@ -116,10 +92,10 @@
     Lock &fLock;
 };
 
-typedef SkAutoTAcquire<SkBaseMutex> SkAutoMutexAcquire;
+typedef SkAutoTAcquire<SkMutex> SkAutoMutexAcquire;
 #define SkAutoMutexAcquire(...) SK_REQUIRE_LOCAL_VAR(SkAutoMutexAcquire)
 
-typedef SkAutoTExclusive<SkBaseMutex> SkAutoMutexExclusive;
+typedef SkAutoTExclusive<SkMutex> SkAutoMutexExclusive;
 #define SkAutoMutexExclusive(...) SK_REQUIRE_LOCAL_VAR(SkAutoMutexExclusive)
 
 #endif//SkMutex_DEFINED
diff --git a/include/private/SkSemaphore.h b/include/private/SkSemaphore.h
index cb2f58d..adee574 100644
--- a/include/private/SkSemaphore.h
+++ b/include/private/SkSemaphore.h
@@ -8,43 +8,26 @@
 #ifndef SkSemaphore_DEFINED
 #define SkSemaphore_DEFINED
 
+#include "../private/SkOnce.h"
 #include "SkTypes.h"
-#include "../private/SkAtomics.h"
-#include "../private/SkOncePtr.h"
+#include <atomic>
 
-struct SkBaseSemaphore {
+class SkSemaphore {
+public:
+    constexpr SkSemaphore(int count = 0)
+        : fCount(count), fOSSemaphore(nullptr) {}
 
-    // Increment the counter by 1.
-    // This is a specialization for supporting SkMutex.
-    void signal() {
-        // Since this fetches the value before the add, 0 indicates that this thread is running and
-        // no threads are waiting, -1 and below means that threads are waiting, but only signal 1
-        // thread to run.
-        if (sk_atomic_fetch_add(&fCount, 1, sk_memory_order_release) < 0) {
-           this->osSignal(1);
-        }
-    }
+    ~SkSemaphore();
 
-    // Increment the counter N times.
-    // Generally it's better to call signal(N) instead of signal() N times.
-    void signal(int N);
+    // Increment the counter n times.
+    // Generally it's better to call signal(n) instead of signal() n times.
+    void signal(int n = 1);
 
     // Decrement the counter by 1,
     // then if the counter is <= 0, sleep this thread until the counter is > 0.
-    void wait() {
-        // Since this fetches the value before the subtract, zero and below means that there are no
-        // resources left, so the thread needs to wait.
-        if (sk_atomic_fetch_sub(&fCount, 1, sk_memory_order_acquire) <= 0) {
-            this->osWait();
-        }
-    }
+    void wait();
 
-    struct OSSemaphore;
-
-    void osSignal(int n);
-    void osWait();
-    void deleteSemaphore();
-
+private:
     // This implementation follows the general strategy of
     //     'A Lightweight Semaphore with Partial Spinning'
     // found here
@@ -54,33 +37,40 @@
     // We wrap an OS-provided semaphore with a user-space atomic counter that
     // lets us avoid interacting with the OS semaphore unless strictly required:
     // moving the count from >0 to <=0 or vice-versa, i.e. sleeping or waking threads.
-    int                        fCount;
-    SkBaseOncePtr<OSSemaphore> fOSSemaphore;
+    struct OSSemaphore;
+
+    void osSignal(int n);
+    void osWait();
+
+    std::atomic<int> fCount;
+    SkOnce           fOSSemaphoreOnce;
+    OSSemaphore*     fOSSemaphore;
 };
 
-/**
- * SkSemaphore is a fast mostly-user-space semaphore.
- *
- * A semaphore is logically an atomic integer with a few special properties:
- *   - The integer always starts at 0.
- *   - You can only increment or decrement it, never read or write it.
- *   - Increment is spelled 'signal()'; decrement is spelled 'wait()'.
- *   - If a call to wait() decrements the counter to <= 0,
- *     the calling thread sleeps until another thread signal()s it back above 0.
- */
-class SkSemaphore : SkNoncopyable {
-public:
-    // Initializes the counter to 0.
-    // (Though all current implementations could start from an arbitrary value.)
-    SkSemaphore();
-    ~SkSemaphore();
+inline void SkSemaphore::signal(int n) {
+    int prev = fCount.fetch_add(n, std::memory_order_release);
 
-    void wait();
+    // We only want to call the OS semaphore when our logical count crosses
+    // from <= 0 to >0 (when we need to wake sleeping threads).
+    //
+    // This is easiest to think about with specific examples of prev and n.
+    // If n == 5 and prev == -3, there are 3 threads sleeping and we signal
+    // SkTMin(-(-3), 5) == 3 times on the OS semaphore, leaving the count at 2.
+    //
+    // If prev >= 0, no threads are waiting, SkTMin(-prev, n) is always <= 0,
+    // so we don't call the OS semaphore, leaving the count at (prev + n).
+    int toSignal = SkTMin(-prev, n);
+    if (toSignal > 0) {
+        this->osSignal(toSignal);
+    }
+}
 
-    void signal(int n = 1);
-
-private:
-    SkBaseSemaphore fBaseSemaphore;
-};
+inline void SkSemaphore::wait() {
+    // Since this fetches the value before the subtract, zero and below means that there are no
+    // resources left, so the thread needs to wait.
+    if (fCount.fetch_sub(1, std::memory_order_acquire) <= 0) {
+        this->osWait();
+    }
+}
 
 #endif//SkSemaphore_DEFINED
diff --git a/src/core/SkSemaphore.cpp b/src/core/SkSemaphore.cpp
index da422e2..0646b15 100644
--- a/src/core/SkSemaphore.cpp
+++ b/src/core/SkSemaphore.cpp
@@ -9,7 +9,7 @@
 
 #if defined(SK_BUILD_FOR_MAC) || defined(SK_BUILD_FOR_IOS)
     #include <mach/mach.h>
-    struct SkBaseSemaphore::OSSemaphore {
+    struct SkSemaphore::OSSemaphore {
         semaphore_t fSemaphore;
 
         OSSemaphore()  {
@@ -21,7 +21,7 @@
         void wait() { semaphore_wait(fSemaphore); }
     };
 #elif defined(SK_BUILD_FOR_WIN32)
-    struct SkBaseSemaphore::OSSemaphore {
+    struct SkSemaphore::OSSemaphore {
         HANDLE fSemaphore;
 
         OSSemaphore()  {
@@ -41,7 +41,7 @@
     // It's important we test for Mach before this.  This code will compile but not work there.
     #include <errno.h>
     #include <semaphore.h>
-    struct SkBaseSemaphore::OSSemaphore {
+    struct SkSemaphore::OSSemaphore {
         sem_t fSemaphore;
 
         OSSemaphore()  { sem_init(&fSemaphore, 0/*cross process?*/, 0/*initial count*/); }
@@ -57,43 +57,16 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 
-void SkBaseSemaphore::signal(int n) {
-    SkASSERT(n >= 0);
-
-    // We only want to call the OS semaphore when our logical count crosses
-    // from <= 0 to >0 (when we need to wake sleeping threads).
-    //
-    // This is easiest to think about with specific examples of prev and n.
-    // If n == 5 and prev == -3, there are 3 threads sleeping and we signal
-    // SkTMin(-(-3), 5) == 3 times on the OS semaphore, leaving the count at 2.
-    //
-    // If prev >= 0, no threads are waiting, SkTMin(-prev, n) is always <= 0,
-    // so we don't call the OS semaphore, leaving the count at (prev + n).
-    int prev = sk_atomic_fetch_add(&fCount, n, sk_memory_order_release);
-    int toSignal = SkTMin(-prev, n);
-    if (toSignal > 0) {
-        this->osSignal(toSignal);
-    }
+void SkSemaphore::osSignal(int n) {
+    fOSSemaphoreOnce([this] { fOSSemaphore = new OSSemaphore; });
+    fOSSemaphore->signal(n);
 }
 
-static SkBaseSemaphore::OSSemaphore* semaphore(SkBaseSemaphore* semaphore) {
-    return semaphore->fOSSemaphore.get([](){ return new SkBaseSemaphore::OSSemaphore(); });
+void SkSemaphore::osWait() {
+    fOSSemaphoreOnce([this] { fOSSemaphore = new OSSemaphore; });
+    fOSSemaphore->wait();
 }
 
-void SkBaseSemaphore::osSignal(int n) { semaphore(this)->signal(n); }
-
-void SkBaseSemaphore::osWait() { semaphore(this)->wait(); }
-
-void SkBaseSemaphore::deleteSemaphore() {
-    delete (OSSemaphore*) fOSSemaphore;
+SkSemaphore::~SkSemaphore() {
+    delete fOSSemaphore;
 }
-
-///////////////////////////////////////////////////////////////////////////////
-
-SkSemaphore::SkSemaphore(){ fBaseSemaphore = {0, {0}}; }
-
-SkSemaphore::~SkSemaphore() { fBaseSemaphore.deleteSemaphore(); }
-
-void SkSemaphore::wait() { fBaseSemaphore.wait(); }
-
-void SkSemaphore::signal(int n) {fBaseSemaphore.signal(n); }
diff --git a/src/lazy/SkDiscardableMemoryPool.cpp b/src/lazy/SkDiscardableMemoryPool.cpp
index 1f3bcf9..2be4c75 100644
--- a/src/lazy/SkDiscardableMemoryPool.cpp
+++ b/src/lazy/SkDiscardableMemoryPool.cpp
@@ -29,7 +29,7 @@
     /**
      *  Without mutex, will be not be thread safe.
      */
-    DiscardableMemoryPool(size_t budget, SkBaseMutex* mutex = nullptr);
+    DiscardableMemoryPool(size_t budget, SkMutex* mutex = nullptr);
     virtual ~DiscardableMemoryPool();
 
     SkDiscardableMemory* create(size_t bytes) override;
@@ -52,9 +52,9 @@
     #endif  // SK_LAZY_CACHE_STATS
 
 private:
-    SkBaseMutex* fMutex;
-    size_t       fBudget;
-    size_t       fUsed;
+    SkMutex* fMutex;
+    size_t   fBudget;
+    size_t   fUsed;
     SkTInternalLList<PoolDiscardableMemory> fList;
 
     /** Function called to free memory if needed */
@@ -128,8 +128,7 @@
 
 ////////////////////////////////////////////////////////////////////////////////
 
-DiscardableMemoryPool::DiscardableMemoryPool(size_t budget,
-                                             SkBaseMutex* mutex)
+DiscardableMemoryPool::DiscardableMemoryPool(size_t budget, SkMutex* mutex)
     : fMutex(mutex)
     , fBudget(budget)
     , fUsed(0) {
@@ -241,7 +240,7 @@
 
 }  // namespace
 
-SkDiscardableMemoryPool* SkDiscardableMemoryPool::Create(size_t size, SkBaseMutex* mutex) {
+SkDiscardableMemoryPool* SkDiscardableMemoryPool::Create(size_t size, SkMutex* mutex) {
     return new DiscardableMemoryPool(size, mutex);
 }
 
diff --git a/src/lazy/SkDiscardableMemoryPool.h b/src/lazy/SkDiscardableMemoryPool.h
index 92ba48b..ad8d796 100644
--- a/src/lazy/SkDiscardableMemoryPool.h
+++ b/src/lazy/SkDiscardableMemoryPool.h
@@ -52,8 +52,7 @@
      *  the pool works.
      *  Without mutex, will be not be thread safe.
      */
-    static SkDiscardableMemoryPool* Create(
-            size_t size, SkBaseMutex* mutex = nullptr);
+    static SkDiscardableMemoryPool* Create(size_t size, SkMutex* mutex = nullptr);
 };
 
 /**