Simplify SkInstCnt

This code requires fewer macros to use it (just one), has less code in macro
definitions, and has simpler synchronization code (just atomic ints, no SkOnce,
no SkMutex, etc.)

A minor downside, we lose indentation and reverse-ordering in the final report:
  Leaked SkRefCntBase: 7
     Leaked SkFontMgr: 1
     Leaked SkWeakRefCnt: 1
         Leaked SkTypeface: 1
     Leaked SkFlattenable: 3
         Leaked SkXfermode: 3
     Leaked SkPathRef: 1
     Leaked SkPixelRef: 1
         Leaked SkMallocPixelRef: 1
becomes
  Leaked SkXfermode: 3
  Leaked SkMallocPixelRef: 1
  Leaked SkPixelRef: 1
  Leaked SkPathRef: 1
  Leaked SkFlattenable: 3
  Leaked SkTypeface: 1
  Leaked SkWeakRefCnt: 1
  Leaked SkFontMgr: 1
  Leaked SkRefCntBase: 7

This is motivated by wanting to land https://codereview.chromium.org/806473006/,
which makes sure all static use of SkOnce are in global scope.  The current
implementation of SkInstCnt uses them in function scope, which isn't safe.
BUG=skia:

No public API changes.
TBR=reed@google.com

Review URL: https://codereview.chromium.org/841263004
diff --git a/include/core/SkInstCnt.h b/include/core/SkInstCnt.h
index e4b43d1..8aeb14e 100644
--- a/include/core/SkInstCnt.h
+++ b/include/core/SkInstCnt.h
@@ -5,143 +5,76 @@
  * found in the LICENSE file.
  */
 
-
 #ifndef SkInstCnt_DEFINED
 #define SkInstCnt_DEFINED
 
-/*
- * The instance counting system consists of three macros that create the
- * instance counting machinery. A class is added to the system by adding:
- *   SK_DECLARE_INST_COUNT at the top of its declaration for derived classes
- *   SK_DECLARE_INST_COUNT_ROOT at the top of its declaration for a root class
- * At the end of an application a call to all the "root" objects'
- * CheckInstanceCount methods should be made
+/* To count all instances of T, including all subclasses of T,
+ * add SK_DECLARE_INST_COUNT(T) to T's class definition.
+ * If you want to print out counts of leaked instances, set gPrintInstCount to true in main().
+ *
+ * E.g.
+ *   struct Base { SK_DECLARE_INST_COUNT(Base) };
+ *   struct A : public Base {};
+ *   struct SubBase : public Base { SK_DECLARE_INST_COUNT(SubBase); }
+ *   struct B : public SubBase {};
+ *
+ * If gPrintInstCount is true, at the program exit you will see something like:
+ *   Base: <N> leaked instances
+ *   SubBase: <M> leaked instances
+ * where N >= M.  Leaked instances of A count against Base; leaked instances of B count against
+ * both SubBase and Base.
+ *
+ * If SK_ENABLE_INST_COUNT is not defined or defined to 0, or we're in a shared library build,
+ * this entire system is compiled away to a noop.
  */
+
 #include "SkTypes.h"
 
-#if SK_ENABLE_INST_COUNT
-// Static variables inside member functions below may be defined multiple times
-// if Skia is being used as a dynamic library. Instance counting should be on
-// only for static builds. See bug skia:2058.
-#if defined(SKIA_DLL)
-#error Instance counting works only when Skia is built as a static library.
-#endif
+#if SK_ENABLE_INST_COUNT && !defined(SKIA_DLL) // See skia:2058 for why we noop on shared builds.
+    #include "SkThread.h"
+    #include <stdlib.h>
 
-#include "SkOnce.h"
-#include "SkTArray.h"
-#include "SkThread.h"
-extern bool gPrintInstCount;
+    #define SK_DECLARE_INST_COUNT(T)                           \
+        static const char* InstCountClassName() { return #T; } \
+        SkInstCount<T, T::InstCountClassName> fInstCnt;        \
+        static int32_t GetInstanceCount() { return SkInstCount<T, InstCountClassName>::Count(); }
 
-// The non-root classes just register themselves with their parent
-#define SK_DECLARE_INST_COUNT(className)                                    \
-    SK_DECLARE_INST_COUNT_INTERNAL(className,                               \
-                                   INHERITED::AddInstChild(CheckInstanceCount);)
+    extern bool gPrintInstCount;
 
-// The root classes registers a function to print out the memory stats when
-// the app ends
-#define SK_DECLARE_INST_COUNT_ROOT(className)                               \
-    SK_DECLARE_INST_COUNT_INTERNAL(className, atexit(exitPrint);)
+    template <typename T, const char*(Name)()>
+    class SkInstCount {
+    public:
+        SkInstCount()                   { Inc(); }
+        SkInstCount(const SkInstCount&) { Inc(); }
+        ~SkInstCount()                  { sk_atomic_dec(&gCount); }
 
-#define SK_DECLARE_INST_COUNT_INTERNAL(className, initStep)                 \
-    class SkInstanceCountHelper {                                           \
-    public:                                                                 \
-        SkInstanceCountHelper() {                                           \
-            SK_DECLARE_STATIC_ONCE(once);                                   \
-            SkOnce(&once, init);                                            \
-            sk_atomic_inc(GetInstanceCountPtr());                           \
-        }                                                                   \
-                                                                            \
-        static void init() {                                                \
-            initStep                                                        \
-        }                                                                   \
-                                                                            \
-        SkInstanceCountHelper(const SkInstanceCountHelper&) {               \
-            sk_atomic_inc(GetInstanceCountPtr());                           \
-        }                                                                   \
-                                                                            \
-        ~SkInstanceCountHelper() {                                          \
-            sk_atomic_dec(GetInstanceCountPtr());                           \
-        }                                                                   \
-                                                                            \
-        static int32_t* GetInstanceCountPtr() {                             \
-            static int32_t gInstanceCount;                                  \
-            return &gInstanceCount;                                         \
-        }                                                                   \
-                                                                            \
-        static SkTArray<int (*)(int, bool)>*& GetChildren() {               \
-            static SkTArray<int (*)(int, bool)>* gChildren;                 \
-            return gChildren;                                               \
-        }                                                                   \
-                                                                            \
-        static void create_mutex(SkMutex** mutex) {                         \
-            *mutex = SkNEW(SkMutex);                                        \
-        }                                                                   \
-        static SkBaseMutex& GetChildrenMutex() {                            \
-            static SkMutex* childrenMutex;                                  \
-            SK_DECLARE_STATIC_ONCE(once);                                   \
-            SkOnce(&once, className::SkInstanceCountHelper::create_mutex, &childrenMutex);\
-            return *childrenMutex;                                          \
-        }                                                                   \
-                                                                            \
-    } fInstanceCountHelper;                                                 \
-                                                                            \
-    static int32_t GetInstanceCount() {                                     \
-        return *SkInstanceCountHelper::GetInstanceCountPtr();               \
-    }                                                                       \
-                                                                            \
-    static void exitPrint() {                                               \
-        CheckInstanceCount(0, true);                                        \
-    }                                                                       \
-                                                                            \
-    static int CheckInstanceCount(int level = 0, bool cleanUp = false) {    \
-        if (gPrintInstCount && 0 != GetInstanceCount()) {                   \
-            SkDebugf("%*c Leaked %s: %d\n",                                 \
-                     4*level, ' ', #className,                              \
-                     GetInstanceCount());                                   \
-        }                                                                   \
-        if (NULL == SkInstanceCountHelper::GetChildren()) {                 \
-            return GetInstanceCount();                                      \
-        }                                                                   \
-        SkTArray<int (*)(int, bool)>* children = \
-            SkInstanceCountHelper::GetChildren();                           \
-        int childCount = children->count();                                 \
-        int count = GetInstanceCount();                                     \
-        for (int i = 0; i < childCount; ++i) {                              \
-            count -= (*(*children)[i])(level+1, cleanUp);                   \
-        }                                                                   \
-        SkASSERT(count >= 0);                                               \
-        if (gPrintInstCount && childCount > 0 && count > 0) {               \
-            SkDebugf("%*c Leaked ???: %d\n", 4*(level + 1), ' ', count);    \
-        }                                                                   \
-        if (cleanUp) {                                                      \
-            delete children;                                                \
-            SkInstanceCountHelper::GetChildren() = NULL;                    \
-        }                                                                   \
-        return GetInstanceCount();                                          \
-    }                                                                       \
-                                                                            \
-    static void AddInstChild(int (*childCheckInstCnt)(int, bool)) {         \
-        if (CheckInstanceCount != childCheckInstCnt) {                      \
-            SkAutoMutexAcquire ama(SkInstanceCountHelper::GetChildrenMutex()); \
-            if (NULL == SkInstanceCountHelper::GetChildren()) {             \
-                SkInstanceCountHelper::GetChildren() =                      \
-                    new SkTArray<int (*)(int, bool)>;                       \
-            }                                                               \
-            SkInstanceCountHelper::GetChildren()->push_back(childCheckInstCnt); \
-        }                                                                   \
-    }
+        SkInstCount& operator==(const SkInstCount&) { return *this; } // == can't change the count.
 
+        static void Inc() {
+            // If it's the first time we go from 0 to 1, register to print leaks at process exit.
+            if (0 == sk_atomic_inc(&gCount) && sk_atomic_cas(&gRegistered, 0, 1)) {
+                atexit(PrintAtExit);
+            }
+        }
+
+        static void PrintAtExit() {
+            int32_t leaks = Count();
+            if (gPrintInstCount && leaks > 0) {
+                SkDebugf("Leaked %s: %d\n", Name(), leaks);
+            }
+        }
+
+        // FIXME: Used publicly by unit tests.  Seems like a bad idea in a DM world.
+        static int32_t Count() { return sk_acquire_load(&gCount); }
+
+    private:
+        static int32_t gCount, gRegistered;
+    };
+    // As template values, these will be deduplicated.  (No one-definition rule problems.)
+    template <typename T, const char*(Name)()> int32_t SkInstCount<T, Name>::gCount      = 0;
+    template <typename T, const char*(Name)()> int32_t SkInstCount<T, Name>::gRegistered = 0;
 #else
-// Typically SK_ENABLE_INST_COUNT=0. Make sure the class declares public typedef INHERITED by
-// causing a compile-time error if the typedef is missing. This way SK_ENABLE_INST_COUNT=1 stays
-// compiling.
-#define SK_DECLARE_INST_COUNT(className) static void AddInstChild() { INHERITED::AddInstChild(); }
-#define SK_DECLARE_INST_COUNT_ROOT(className) static void AddInstChild() { }
+    #define SK_DECLARE_INST_COUNT(T)
 #endif
 
-// Following are deprecated. They are defined only for backwards API compatibility.
-#define SK_DECLARE_INST_COUNT_TEMPLATE(className) SK_DECLARE_INST_COUNT(className)
-#define SK_DEFINE_INST_COUNT(className)
-#define SK_DEFINE_INST_COUNT_TEMPLATE(templateInfo, className)
-
 #endif // SkInstCnt_DEFINED
diff --git a/include/core/SkPath.h b/include/core/SkPath.h
index f895f7f..2fe332f 100644
--- a/include/core/SkPath.h
+++ b/include/core/SkPath.h
@@ -30,7 +30,7 @@
 */
 class SK_API SkPath {
 public:
-    SK_DECLARE_INST_COUNT_ROOT(SkPath);
+    SK_DECLARE_INST_COUNT(SkPath);
 
     SkPath();
     SkPath(const SkPath&);
diff --git a/include/core/SkRefCnt.h b/include/core/SkRefCnt.h
index 5d394f0..cf3e385 100644
--- a/include/core/SkRefCnt.h
+++ b/include/core/SkRefCnt.h
@@ -27,7 +27,7 @@
 */
 class SK_API SkRefCntBase : SkNoncopyable {
 public:
-    SK_DECLARE_INST_COUNT_ROOT(SkRefCntBase)
+    SK_DECLARE_INST_COUNT(SkRefCntBase)
 
     /** Default construct, initializing the reference count to 1.
     */
diff --git a/include/core/SkStream.h b/include/core/SkStream.h
index 2d59698..abd9f92 100644
--- a/include/core/SkStream.h
+++ b/include/core/SkStream.h
@@ -170,7 +170,7 @@
 
 class SK_API SkWStream : SkNoncopyable {
 public:
-    SK_DECLARE_INST_COUNT_ROOT(SkWStream)
+    SK_DECLARE_INST_COUNT(SkWStream)
 
     virtual ~SkWStream();
 
diff --git a/include/gpu/GrGpuResource.h b/include/gpu/GrGpuResource.h
index 8c88841..4340f71 100644
--- a/include/gpu/GrGpuResource.h
+++ b/include/gpu/GrGpuResource.h
@@ -22,7 +22,7 @@
  * Base class for GrGpuResource. Handles the various types of refs we need. Separated out as a base
  * class to isolate the ref-cnting behavior and provide friendship without exposing all of
  * GrGpuResource.
- * 
+ *
  * Gpu resources can have three types of refs:
  *   1) Normal ref (+ by ref(), - by unref()): These are used by code that is issuing draw calls
  *      that read and write the resource via GrDrawTarget and by any object that must own a
@@ -40,7 +40,7 @@
  */
 template <typename DERIVED> class GrIORef : public SkNoncopyable {
 public:
-    SK_DECLARE_INST_COUNT_ROOT(GrIORef)
+    SK_DECLARE_INST_COUNT(GrIORef)
 
     // Some of the signatures are written to mirror SkRefCnt so that GrGpuResource can work with
     // templated helper classes (e.g. SkAutoTUnref). However, we have different categories of
diff --git a/include/gpu/GrGpuResourceRef.h b/include/gpu/GrGpuResourceRef.h
index 0e23eea..b8ba334 100644
--- a/include/gpu/GrGpuResourceRef.h
+++ b/include/gpu/GrGpuResourceRef.h
@@ -36,7 +36,7 @@
  */
 class GrGpuResourceRef : SkNoncopyable {
 public:
-    SK_DECLARE_INST_COUNT_ROOT(GrGpuResourceRef);
+    SK_DECLARE_INST_COUNT(GrGpuResourceRef);
 
     ~GrGpuResourceRef();
 
@@ -87,7 +87,7 @@
     typedef SkNoncopyable INHERITED;
 };
 
-/** 
+/**
  * Templated version of GrGpuResourceRef to enforce type safety.
  */
 template <typename T> class GrTGpuResourceRef : public GrGpuResourceRef {
diff --git a/include/gpu/GrProgramElement.h b/include/gpu/GrProgramElement.h
index e1adcc3..5ebf150 100644
--- a/include/gpu/GrProgramElement.h
+++ b/include/gpu/GrProgramElement.h
@@ -25,7 +25,7 @@
  */
 class GrProgramElement : public SkNoncopyable {
 public:
-    SK_DECLARE_INST_COUNT_ROOT(GrProgramElement)
+    SK_DECLARE_INST_COUNT(GrProgramElement)
 
     virtual ~GrProgramElement() {
         // fRefCnt can be one when an effect is created statically using GR_CREATE_STATIC_EFFECT
diff --git a/include/gpu/GrTextureAccess.h b/include/gpu/GrTextureAccess.h
index 5055e10..db6f752 100644
--- a/include/gpu/GrTextureAccess.h
+++ b/include/gpu/GrTextureAccess.h
@@ -113,7 +113,7 @@
  */
 class GrTextureAccess : public SkNoncopyable {
 public:
-    SK_DECLARE_INST_COUNT_ROOT(GrTextureAccess);
+    SK_DECLARE_INST_COUNT(GrTextureAccess);
 
     /**
      * A default GrTextureAccess must have reset() called on it in a GrProcessor subclass's
diff --git a/src/core/SkInstCnt.cpp b/src/core/SkInstCnt.cpp
index 2f9a57d..0211b12 100644
--- a/src/core/SkInstCnt.cpp
+++ b/src/core/SkInstCnt.cpp
@@ -7,6 +7,6 @@
 
 #include "SkInstCnt.h"
 
-#if SK_ENABLE_INST_COUNT
-bool gPrintInstCount = false;
+#if SK_ENABLE_INST_COUNT && !defined(SKIA_DLL)  // See SkInstCnt.h
+    bool gPrintInstCount = false;
 #endif
diff --git a/src/gpu/gl/GrGLCreateNullInterface.cpp b/src/gpu/gl/GrGLCreateNullInterface.cpp
index 6275ced..14919bb 100644
--- a/src/gpu/gl/GrGLCreateNullInterface.cpp
+++ b/src/gpu/gl/GrGLCreateNullInterface.cpp
@@ -14,7 +14,7 @@
 
 class BufferObj {
 public:
-    SK_DECLARE_INST_COUNT_ROOT(BufferObj);
+    SK_DECLARE_INST_COUNT(BufferObj);
 
     BufferObj(GrGLuint id) : fID(id), fDataPtr(NULL), fSize(0), fMapped(false) {
     }
@@ -47,7 +47,7 @@
 // This class maintains a sparsely populated array of buffer pointers.
 class BufferManager {
 public:
-    SK_DECLARE_INST_COUNT_ROOT(BufferManager);
+    SK_DECLARE_INST_COUNT(BufferManager);
 
     BufferManager() : fFreeListHead(kFreeListEnd) {}
 
@@ -116,7 +116,7 @@
  */
 struct ThreadContext {
 public:
-    SK_DECLARE_INST_COUNT_ROOT(ThreadContext);
+    SK_DECLARE_INST_COUNT(ThreadContext);
 
     BufferManager   fBufferManager;
     GrGLuint        fCurrArrayBuffer;
diff --git a/src/gpu/gl/GrGLGpuProgramCache.cpp b/src/gpu/gl/GrGLGpuProgramCache.cpp
index 688b22e..a920221 100644
--- a/src/gpu/gl/GrGLGpuProgramCache.cpp
+++ b/src/gpu/gl/GrGLGpuProgramCache.cpp
@@ -23,7 +23,7 @@
 typedef GrGLProgramDataManager::UniformHandle UniformHandle;
 
 struct GrGLGpu::ProgramCache::Entry {
-    SK_DECLARE_INST_COUNT_ROOT(Entry);
+    SK_DECLARE_INST_COUNT(Entry);
     Entry() : fProgram(NULL), fLRUStamp(0) {}
 
     SkAutoTUnref<GrGLProgram>   fProgram;
diff --git a/tests/GrMemoryPoolTest.cpp b/tests/GrMemoryPoolTest.cpp
index 0848d97..1ba184b 100644
--- a/tests/GrMemoryPoolTest.cpp
+++ b/tests/GrMemoryPoolTest.cpp
@@ -44,7 +44,7 @@
         }
     }
 
-    SK_DECLARE_INST_COUNT_ROOT(A);
+    SK_DECLARE_INST_COUNT(A);
 
     static A* Create(SkRandom* r);
 
diff --git a/tests/LListTest.cpp b/tests/LListTest.cpp
index 8fb0117..9826072 100644
--- a/tests/LListTest.cpp
+++ b/tests/LListTest.cpp
@@ -24,7 +24,7 @@
     int fID;
 
 private:
-    SK_DECLARE_INST_COUNT_ROOT(ListElement);
+    SK_DECLARE_INST_COUNT(ListElement);
     SK_DECLARE_INTERNAL_LLIST_INTERFACE(ListElement);
 };