Revert "replace SkNVRefCnt with SkRefCnt"

This reverts commit 0fb1ee98cf7dfb87a3b376de8e0dff28c1b95619.

Reason for revert: looks like this increased size by ~8K.

Original change's description:
> replace SkNVRefCnt with SkRefCnt
> 
> SkNVRefCnt trades a small amount of code size (vtable) and runtime
> (vptr) memory usage for a larger amount of code size (templating).  It
> was written back in a time when all we were really thinking about was
> runtime memory usage, so I'm curious to see where performance, code
> size, and memory usage all move if it's removed.
> 
> Looking at the types I've changed here, my guess is that performance and
> memory usage will be basically unchanged, and that code size will drop a
> bit.  Nothing else it's nicer to have only one ref-counting base class.
> 
> Change-Id: I7d56a2b9e2b9fb000ff97792159ea1ff4f5e6f13
> Reviewed-on: https://skia-review.googlesource.com/c/166203
> Reviewed-by: Brian Salomon <bsalomon@google.com>
> Commit-Queue: Mike Klein <mtklein@google.com>

TBR=mtklein@google.com,bsalomon@google.com,mtklein@chromium.org

Change-Id: Ibcfcc4b523c466a535bea5ffa30d0fe2574c5bd7
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://skia-review.googlesource.com/c/166360
Reviewed-by: Mike Klein <mtklein@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
diff --git a/include/core/SkColorSpace.h b/include/core/SkColorSpace.h
index 5ca9b45..26771f3 100644
--- a/include/core/SkColorSpace.h
+++ b/include/core/SkColorSpace.h
@@ -62,7 +62,7 @@
     float fF;
 };
 
-class SK_API SkColorSpace : public SkRefCnt {
+class SK_API SkColorSpace : public SkNVRefCnt<SkColorSpace> {
 public:
     /**
      *  Create the sRGB color space.
diff --git a/include/core/SkData.h b/include/core/SkData.h
index c279d52..931749a 100644
--- a/include/core/SkData.h
+++ b/include/core/SkData.h
@@ -19,7 +19,7 @@
  *  but the actual ptr that is returned (by data() or bytes()) is guaranteed
  *  to always be the same for the life of this instance.
  */
-class SK_API SkData final : public SkRefCnt {
+class SK_API SkData final : public SkNVRefCnt<SkData> {
 public:
     /**
      *  Returns the number of bytes stored.
@@ -158,7 +158,7 @@
     static sk_sp<SkData> MakeEmpty();
 
 private:
-    friend class SkRefCnt;
+    friend class SkNVRefCnt<SkData>;
     ReleaseProc fReleaseProc;
     void*       fReleaseProcContext;
     void*       fPtr;
diff --git a/include/core/SkRefCnt.h b/include/core/SkRefCnt.h
index ee2710d..bc6ccd8 100644
--- a/include/core/SkRefCnt.h
+++ b/include/core/SkRefCnt.h
@@ -163,6 +163,44 @@
     }
 }
 
+///////////////////////////////////////////////////////////////////////////////
+
+// This is a variant of SkRefCnt that's Not Virtual, so weighs 4 bytes instead of 8 or 16.
+// There's only benefit to using this if the deriving class does not otherwise need a vtable.
+template <typename Derived>
+class SkNVRefCnt {
+public:
+    SkNVRefCnt() : fRefCnt(1) {}
+    ~SkNVRefCnt() { SkASSERTF(1 == getRefCnt(), "NVRefCnt was %d", getRefCnt()); }
+
+    // Implementation is pretty much the same as SkRefCntBase. All required barriers are the same:
+    //   - unique() needs acquire when it returns true, and no barrier if it returns false;
+    //   - ref() doesn't need any barrier;
+    //   - unref() needs a release barrier, and an acquire if it's going to call delete.
+
+    bool unique() const { return 1 == fRefCnt.load(std::memory_order_acquire); }
+    void ref() const { (void)fRefCnt.fetch_add(+1, std::memory_order_relaxed); }
+    void  unref() const {
+        if (1 == fRefCnt.fetch_add(-1, std::memory_order_acq_rel)) {
+            // restore the 1 for our destructor's assert
+            SkDEBUGCODE(fRefCnt.store(1, std::memory_order_relaxed));
+            delete (const Derived*)this;
+        }
+    }
+    void  deref() const { this->unref(); }
+
+private:
+    mutable std::atomic<int32_t> fRefCnt;
+    int32_t getRefCnt() const {
+        return fRefCnt.load(std::memory_order_relaxed);
+    }
+
+    SkNVRefCnt(SkNVRefCnt&&) = delete;
+    SkNVRefCnt(const SkNVRefCnt&) = delete;
+    SkNVRefCnt& operator=(SkNVRefCnt&&) = delete;
+    SkNVRefCnt& operator=(const SkNVRefCnt&) = delete;
+};
+
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 /**
diff --git a/include/core/SkTextBlob.h b/include/core/SkTextBlob.h
index 81c44fc..907241c 100644
--- a/include/core/SkTextBlob.h
+++ b/include/core/SkTextBlob.h
@@ -36,7 +36,7 @@
     run consists of glyphs, SkPaint, and position. Only parts of SkPaint related to
     fonts and text rendering are used by run.
 */
-class SK_API SkTextBlob final : public SkRefCnt {
+class SK_API SkTextBlob final : public SkNVRefCnt<SkTextBlob> {
 public:
 
     /** Returns conservative bounding box. Uses SkPaint associated with each glyph to
@@ -138,7 +138,7 @@
                                          const SkDeserialProcs& procs);
 
 private:
-    friend class SkRefCnt;
+    friend class SkNVRefCnt<SkTextBlob>;
     class RunRecord;
 
     enum GlyphPositioning : uint8_t;
diff --git a/include/core/SkVertices.h b/include/core/SkVertices.h
index f36a877..63a27a0 100644
--- a/include/core/SkVertices.h
+++ b/include/core/SkVertices.h
@@ -17,7 +17,7 @@
 /**
  * An immutable set of vertex data that can be used with SkCanvas::drawVertices.
  */
-class SK_API SkVertices : public SkRefCnt {
+class SK_API SkVertices : public SkNVRefCnt<SkVertices> {
 public:
     // BoneIndices indicates which (of a maximum of 4 bones) a given vertex will interpolate
     // between. To indicate that a slot is not used, the convention is to assign the bone index
@@ -250,7 +250,7 @@
     SkVertices() {}
 
     // these are needed since we've manually sized our allocation (see Builder::init)
-    friend class SkRefCnt;
+    friend class SkNVRefCnt<SkVertices>;
     void operator delete(void* p);
 
     static sk_sp<SkVertices> Alloc(int vCount, int iCount, uint32_t builderFlags,
diff --git a/include/private/GrSkSLFPFactoryCache.h b/include/private/GrSkSLFPFactoryCache.h
index 7440791..40e001a 100644
--- a/include/private/GrSkSLFPFactoryCache.h
+++ b/include/private/GrSkSLFPFactoryCache.h
@@ -19,7 +19,7 @@
 // For thread safety, it is important that GrSkSLFP only interact with the cache from methods that
 // are only called from within the rendering thread, like onCreateGLSLInstance and
 // onGetGLSLProcessorKey.
-class GrSkSLFPFactoryCache : public SkRefCnt {
+class GrSkSLFPFactoryCache : public SkNVRefCnt<GrSkSLFPFactoryCache> {
 public:
     // Returns a factory by its numeric index, or null if no such factory exists. Indices are
     // allocated by GrSkSLFP::NewIndex().
diff --git a/include/private/SkPathRef.h b/include/private/SkPathRef.h
index 630fdfb..d250fc3 100644
--- a/include/private/SkPathRef.h
+++ b/include/private/SkPathRef.h
@@ -39,7 +39,7 @@
  * logical verb or the last verb in memory).
  */
 
-class SK_API SkPathRef final : public SkRefCnt {
+class SK_API SkPathRef final : public SkNVRefCnt<SkPathRef> {
 public:
     class Editor {
     public:
diff --git a/modules/skottie/include/Skottie.h b/modules/skottie/include/Skottie.h
index b0a2d24..2f832bc 100644
--- a/modules/skottie/include/Skottie.h
+++ b/modules/skottie/include/Skottie.h
@@ -125,7 +125,7 @@
     virtual void onAnnotation(const char key[], const char value[]) = 0;
 };
 
-class SK_API Animation : public SkRefCnt {
+class SK_API Animation : public SkNVRefCnt<Animation> {
 public:
 
     class Builder final {
@@ -240,7 +240,7 @@
                                  fOutPoint,
                                  fDuration;
 
-    typedef SkRefCnt INHERITED;
+    typedef SkNVRefCnt<Animation> INHERITED;
 };
 
 } // namespace skottie
diff --git a/src/gpu/GrBackendTextureImageGenerator.h b/src/gpu/GrBackendTextureImageGenerator.h
index 5f1afae..9d76e02 100644
--- a/src/gpu/GrBackendTextureImageGenerator.h
+++ b/src/gpu/GrBackendTextureImageGenerator.h
@@ -50,7 +50,7 @@
 
     static void ReleaseRefHelper_TextureReleaseProc(void* ctx);
 
-    class RefHelper : public SkRefCnt {
+    class RefHelper : public SkNVRefCnt<RefHelper> {
     public:
         RefHelper(GrTexture* texture, uint32_t owningContextID)
             : fOriginalTexture(texture)
diff --git a/src/gpu/effects/GrSkSLFP.h b/src/gpu/effects/GrSkSLFP.h
index 619c568..c4acfd0 100644
--- a/src/gpu/effects/GrSkSLFP.h
+++ b/src/gpu/effects/GrSkSLFP.h
@@ -126,7 +126,7 @@
  * upon the inputs to the SkSL (static if's, etc.) we first create a factory for a given SkSL
  * string, then use that to create the actual GrFragmentProcessor.
  */
-class GrSkSLFPFactory : public SkRefCnt {
+class GrSkSLFPFactory : public SkNVRefCnt<GrSkSLFPFactory> {
 public:
     /**
      * Constructs a GrSkSLFPFactory for a given SkSL source string. Creating a factory will
diff --git a/src/gpu/text/GrDistanceFieldAdjustTable.h b/src/gpu/text/GrDistanceFieldAdjustTable.h
index 03e4bab..fca9a8c 100644
--- a/src/gpu/text/GrDistanceFieldAdjustTable.h
+++ b/src/gpu/text/GrDistanceFieldAdjustTable.h
@@ -14,7 +14,7 @@
 // Distance field text needs this table to compute a value for use in the fragment shader.
 // Because the GrTextContext can go out of scope before the final flush, this needs to be
 // refcnted and malloced
-struct GrDistanceFieldAdjustTable : public SkRefCnt {
+struct GrDistanceFieldAdjustTable : public SkNVRefCnt<GrDistanceFieldAdjustTable> {
     GrDistanceFieldAdjustTable() { this->buildDistanceAdjustTables(); }
     ~GrDistanceFieldAdjustTable() {
         delete[] fTable;
diff --git a/src/gpu/text/GrGlyphCache.h b/src/gpu/text/GrGlyphCache.h
index ec4ca2b..c38ba78 100644
--- a/src/gpu/text/GrGlyphCache.h
+++ b/src/gpu/text/GrGlyphCache.h
@@ -26,7 +26,7 @@
  *  of it's SkDescriptor as a key to access (or regenerate) the SkGlyphCache. GrTextStrikes are
  *  created by and owned by a GrGlyphCache.
  */
-class GrTextStrike : public SkRefCnt {
+class GrTextStrike : public SkNVRefCnt<GrTextStrike> {
 public:
     GrTextStrike(const SkDescriptor& fontScalerKey);
     ~GrTextStrike();
diff --git a/src/gpu/text/GrTextBlob.h b/src/gpu/text/GrTextBlob.h
index c5f34cd..8451d48 100644
--- a/src/gpu/text/GrTextBlob.h
+++ b/src/gpu/text/GrTextBlob.h
@@ -47,7 +47,7 @@
  *
  * *WARNING* If you add new fields to this struct, then you may need to to update AssertEqual
  */
-class GrTextBlob : public SkRefCnt {
+class GrTextBlob : public SkNVRefCnt<GrTextBlob> {
 public:
     SK_DECLARE_INTERNAL_LLIST_INTERFACE(GrTextBlob);
 
diff --git a/src/image/SkImage_Lazy.cpp b/src/image/SkImage_Lazy.cpp
index b7da07d..bd219c6 100644
--- a/src/image/SkImage_Lazy.cpp
+++ b/src/image/SkImage_Lazy.cpp
@@ -28,7 +28,7 @@
 #endif
 
 // Ref-counted tuple(SkImageGenerator, SkMutex) which allows sharing one generator among N images
-class SharedGenerator final : public SkRefCnt {
+class SharedGenerator final : public SkNVRefCnt<SharedGenerator> {
 public:
     static sk_sp<SharedGenerator> Make(std::unique_ptr<SkImageGenerator> gen) {
         return gen ? sk_sp<SharedGenerator>(new SharedGenerator(std::move(gen))) : nullptr;