Move Google3-specific stack limitation logic to template classes.

Remove #ifdefs in other files.

Reapplies https://codereview.chromium.org/1656143003; removing the implicit constructors for GLPtr and GLPtrAlias resolves the build issue on Android.

Also reverts https://codereview.chromium.org/1663013004

Does not change the public API.

TBR=reed
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1666203002

Review URL: https://codereview.chromium.org/1666203002
diff --git a/include/core/SkTypes.h b/include/core/SkTypes.h
index 0d31efc..4592168 100644
--- a/include/core/SkTypes.h
+++ b/include/core/SkTypes.h
@@ -606,17 +606,16 @@
 #define SkAutoMalloc(...) SK_REQUIRE_LOCAL_VAR(SkAutoMalloc)
 
 /**
- *  Manage an allocated block of memory. If the requested size is <= kSize, then
- *  the allocation will come from the stack rather than the heap. This object
- *  is the sole manager of the lifetime of the block, so the caller must not
- *  call sk_free() or delete on the block.
+ *  Manage an allocated block of memory. If the requested size is <= kSizeRequested (or slightly
+ *  more), then the allocation will come from the stack rather than the heap. This object is the
+ *  sole manager of the lifetime of the block, so the caller must not call sk_free() or delete on
+ *  the block.
  */
-template <size_t kSize> class SkAutoSMalloc : SkNoncopyable {
+template <size_t kSizeRequested> class SkAutoSMalloc : SkNoncopyable {
 public:
     /**
-     *  Creates initially empty storage. get() returns a ptr, but it is to
-     *  a zero-byte allocation. Must call reset(size) to return an allocated
-     *  block.
+     *  Creates initially empty storage. get() returns a ptr, but it is to a zero-byte allocation.
+     *  Must call reset(size) to return an allocated block.
      */
     SkAutoSMalloc() {
         fPtr = fStorage;
@@ -624,9 +623,8 @@
     }
 
     /**
-     *  Allocate a block of the specified size. If size <= kSize, then the
-     *  allocation will come from the stack, otherwise it will be dynamically
-     *  allocated.
+     *  Allocate a block of the specified size. If size <= kSizeRequested (or slightly more), then
+     *  the allocation will come from the stack, otherwise it will be dynamically allocated.
      */
     explicit SkAutoSMalloc(size_t size) {
         fPtr = fStorage;
@@ -635,8 +633,8 @@
     }
 
     /**
-     *  Free the allocated block (if any). If the block was small enought to
-     *  have been allocated on the stack (size <= kSize) then this does nothing.
+     *  Free the allocated block (if any). If the block was small enough to have been allocated on
+     *  the stack, then this does nothing.
      */
     ~SkAutoSMalloc() {
         if (fPtr != (void*)fStorage) {
@@ -645,18 +643,16 @@
     }
 
     /**
-     *  Return the allocated block. May return non-null even if the block is
-     *  of zero size. Since this may be on the stack or dynamically allocated,
-     *  the caller must not call sk_free() on it, but must rely on SkAutoSMalloc
-     *  to manage it.
+     *  Return the allocated block. May return non-null even if the block is of zero size. Since
+     *  this may be on the stack or dynamically allocated, the caller must not call sk_free() on it,
+     *  but must rely on SkAutoSMalloc to manage it.
      */
     void* get() const { return fPtr; }
 
     /**
-     *  Return a new block of the requested size, freeing (as necessary) any
-     *  previously allocated block. As with the constructor, if size <= kSize
-     *  then the return block may be allocated locally, rather than from the
-     *  heap.
+     *  Return a new block of the requested size, freeing (as necessary) any previously allocated
+     *  block. As with the constructor, if size <= kSizeRequested (or slightly more) then the return
+     *  block may be allocated locally, rather than from the heap.
      */
     void* reset(size_t size,
                 SkAutoMalloc::OnShrink shrink = SkAutoMalloc::kAlloc_OnShrink,
@@ -686,9 +682,20 @@
     }
 
 private:
+    // Align up to 32 bits.
+    static const size_t kSizeAlign4 = SkAlign4(kSizeRequested);
+#if defined(GOOGLE3)
+    // Stack frame size is limited for GOOGLE3. 4k is less than the actual max, but some functions
+    // have multiple large stack allocations.
+    static const size_t kMaxBytes = 4 * 1024;
+    static const size_t kSize = kSizeRequested > kMaxBytes ? kMaxBytes : kSizeAlign4;
+#else
+    static const size_t kSize = kSizeAlign4;
+#endif
+
     void*       fPtr;
     size_t      fSize;  // can be larger than the requested size (see kReuse)
-    uint32_t    fStorage[(kSize + 3) >> 2];
+    uint32_t    fStorage[kSize >> 2];
 };
 // Can't guard the constructor because it's a template class.
 
diff --git a/include/gpu/gl/GrGLInterface.h b/include/gpu/gl/GrGLInterface.h
index 31429a8..5fa31ed 100644
--- a/include/gpu/gl/GrGLInterface.h
+++ b/include/gpu/gl/GrGLInterface.h
@@ -109,7 +109,8 @@
     template <typename FNPTR_TYPE> class GLPtr {
     public:
         GLPtr() : fPtr(NULL) {}
-        GLPtr operator=(FNPTR_TYPE ptr) { fPtr = ptr; return *this; }
+        GLPtr(const GLPtr&) = delete;
+        GLPtr& operator=(FNPTR_TYPE ptr) { fPtr = ptr; return *this; }
         operator FNPTR_TYPE() const { return fPtr; }
     private:
         FNPTR_TYPE fPtr;
@@ -119,7 +120,8 @@
     // they're updated to use the Functions struct.
     template <typename FNPTR_TYPE> class GLPtrAlias {
     public:
-        GLPtrAlias(GLPtr<FNPTR_TYPE>* base) : fBase(base) {}
+        explicit GLPtrAlias(GLPtr<FNPTR_TYPE>* base) : fBase(base) {}
+        GLPtrAlias(const GLPtrAlias&) = delete;
         void operator=(FNPTR_TYPE ptr) { *fBase = ptr; }
     private:
         GLPtr<FNPTR_TYPE>* fBase;
diff --git a/include/private/SkTemplates.h b/include/private/SkTemplates.h
index 496cf42..e36910e 100644
--- a/include/private/SkTemplates.h
+++ b/include/private/SkTemplates.h
@@ -164,9 +164,9 @@
     SkDEBUGCODE(int fCount;)
 };
 
-/** Wraps SkAutoTArray, with room for up to N elements preallocated
+/** Wraps SkAutoTArray, with room for kCountRequested elements preallocated.
  */
-template <int N, typename T> class SkAutoSTArray : SkNoncopyable {
+template <int kCountRequested, typename T> class SkAutoSTArray : SkNoncopyable {
 public:
     /** Initialize with no objects */
     SkAutoSTArray() {
@@ -195,13 +195,13 @@
         }
 
         if (fCount != count) {
-            if (fCount > N) {
+            if (fCount > kCount) {
                 // 'fArray' was allocated last time so free it now
                 SkASSERT((T*) fStorage != fArray);
                 sk_free(fArray);
             }
 
-            if (count > N) {
+            if (count > kCount) {
                 const uint64_t size64 = sk_64_mul(count, sizeof(T));
                 const size_t size = static_cast<size_t>(size64);
                 if (size != size64) {
@@ -240,10 +240,21 @@
     }
 
 private:
+#if defined(GOOGLE3)
+    // Stack frame size is limited for GOOGLE3. 4k is less than the actual max, but some functions
+    // have multiple large stack allocations.
+    static const int kMaxBytes = 4 * 1024;
+    static const int kCount = kCountRequested * sizeof(T) > kMaxBytes
+        ? kMaxBytes / sizeof(T)
+        : kCountRequested;
+#else
+    static const int kCount = kCountRequested;
+#endif
+
     int     fCount;
     T*      fArray;
     // since we come right after fArray, fStorage should be properly aligned
-    char    fStorage[N * sizeof(T)];
+    char    fStorage[kCount * sizeof(T)];
 };
 
 /** Manages an array of T elements, freeing the array in the destructor.
@@ -317,12 +328,12 @@
     T* fPtr;
 };
 
-template <size_t N, typename T> class SkAutoSTMalloc : SkNoncopyable {
+template <size_t kCountRequested, typename T> class SkAutoSTMalloc : SkNoncopyable {
 public:
     SkAutoSTMalloc() : fPtr(fTStorage) {}
 
     SkAutoSTMalloc(size_t count) {
-        if (count > N) {
+        if (count > kCount) {
             fPtr = (T*)sk_malloc_flags(count * sizeof(T), SK_MALLOC_THROW | SK_MALLOC_TEMP);
         } else {
             fPtr = fTStorage;
@@ -340,7 +351,7 @@
         if (fPtr != fTStorage) {
             sk_free(fPtr);
         }
-        if (count > N) {
+        if (count > kCount) {
             fPtr = (T*)sk_malloc_throw(count * sizeof(T));
         } else {
             fPtr = fTStorage;
@@ -368,10 +379,10 @@
 
     // Reallocs the array, can be used to shrink the allocation.  Makes no attempt to be intelligent
     void realloc(size_t count) {
-        if (count > N) {
+        if (count > kCount) {
             if (fPtr == fTStorage) {
                 fPtr = (T*)sk_malloc_throw(count * sizeof(T));
-                memcpy(fPtr, fTStorage, N * sizeof(T));
+                memcpy(fPtr, fTStorage, kCount * sizeof(T));
             } else {
                 fPtr = (T*)sk_realloc_throw(fPtr, count * sizeof(T));
             }
@@ -381,9 +392,22 @@
     }
 
 private:
+    // Since we use uint32_t storage, we might be able to get more elements for free.
+    static const size_t kCountWithPadding = SkAlign4(kCountRequested*sizeof(T)) / sizeof(T);
+#if defined(GOOGLE3)
+    // Stack frame size is limited for GOOGLE3. 4k is less than the actual max, but some functions
+    // have multiple large stack allocations.
+    static const size_t kMaxBytes = 4 * 1024;
+    static const size_t kCount = kCountRequested * sizeof(T) > kMaxBytes
+        ? kMaxBytes / sizeof(T)
+        : kCountWithPadding;
+#else
+    static const size_t kCount = kCountWithPadding;
+#endif
+
     T*          fPtr;
     union {
-        uint32_t    fStorage32[(N*sizeof(T) + 3) >> 2];
+        uint32_t    fStorage32[SkAlign4(kCount*sizeof(T)) >> 2];
         T           fTStorage[1];   // do NOT want to invoke T::T()
     };
 };
diff --git a/src/gpu/GrContext.cpp b/src/gpu/GrContext.cpp
index e0ee801..187a3ca 100644
--- a/src/gpu/GrContext.cpp
+++ b/src/gpu/GrContext.cpp
@@ -286,12 +286,7 @@
     }
 
     // temp buffer for doing sw premul conversion, if needed.
-#if defined(GOOGLE3)
-    // Stack frame size is limited in GOOGLE3.
-    SkAutoSTMalloc<48 * 48, uint32_t> tmpPixels(0);
-#else
     SkAutoSTMalloc<128 * 128, uint32_t> tmpPixels(0);
-#endif
     if (tempTexture) {
         SkAutoTUnref<const GrFragmentProcessor> fp;
         SkMatrix textureMatrix;
diff --git a/src/gpu/GrTextureToYUVPlanes.cpp b/src/gpu/GrTextureToYUVPlanes.cpp
index 19283ee..6a8d7b6 100644
--- a/src/gpu/GrTextureToYUVPlanes.cpp
+++ b/src/gpu/GrTextureToYUVPlanes.cpp
@@ -170,12 +170,7 @@
             SkASSERT(sizes[0] == sizes[1] && sizes[1] == sizes[2]);
             SkISize yuvSize = sizes[0];
             // We have no kRGB_888 pixel format, so readback rgba and then copy three channels.
-#if defined(GOOGLE3)
-            // Stack frame size is limited in GOOGLE3.
-            SkAutoSTMalloc<48 * 48, uint32_t> tempYUV(yuvSize.fWidth * yuvSize.fHeight);
-#else
             SkAutoSTMalloc<128 * 128, uint32_t> tempYUV(yuvSize.fWidth * yuvSize.fHeight);
-#endif
             if (!yuvTex->readPixels(0, 0, yuvSize.fWidth, yuvSize.fHeight,
                                     kRGBA_8888_GrPixelConfig, tempYUV.get(), 0)) {
                 return false;
@@ -212,12 +207,7 @@
                 SkASSERT(sizes[1].fWidth == sizes[2].fWidth);
                 SkISize uvSize = sizes[1];
                 // We have no kRG_88 pixel format, so readback rgba and then copy two channels.
-#if defined(GOOGLE3)
-                // Stack frame size is limited in GOOGLE3.
-                SkAutoSTMalloc<48 * 48, uint32_t> tempUV(uvSize.fWidth * uvSize.fHeight);
-#else
                 SkAutoSTMalloc<128 * 128, uint32_t> tempUV(uvSize.fWidth * uvSize.fHeight);
-#endif
                 if (!uvTex->readPixels(0, 0, uvSize.fWidth, uvSize.fHeight,
                                        kRGBA_8888_GrPixelConfig, tempUV.get(), 0)) {
                     return false;
diff --git a/src/gpu/batches/GrDrawPathBatch.cpp b/src/gpu/batches/GrDrawPathBatch.cpp
index ff62539..a99f4eb 100644
--- a/src/gpu/batches/GrDrawPathBatch.cpp
+++ b/src/gpu/batches/GrDrawPathBatch.cpp
@@ -137,14 +137,8 @@
                                                  instances.count());
     } else {
         int floatsPerTransform = GrPathRendering::PathTransformSize(this->transformType());
-#if defined(GOOGLE3)
-        //Stack frame size is limited in GOOGLE3.
-        SkAutoSTMalloc<512, float> transformStorage(floatsPerTransform * fTotalPathCount);
-        SkAutoSTMalloc<256, uint16_t> indexStorage(fTotalPathCount);
-#else
         SkAutoSTMalloc<4096, float> transformStorage(floatsPerTransform * fTotalPathCount);
         SkAutoSTMalloc<2048, uint16_t> indexStorage(fTotalPathCount);
-#endif
         int idx = 0;
         for (DrawList::Iter iter(fDraws); iter.get(); iter.next()) {
             const Draw& draw = *iter.get();
diff --git a/src/gpu/gl/GrGLGpu.cpp b/src/gpu/gl/GrGLGpu.cpp
index 3b7ccd3..001cd48 100644
--- a/src/gpu/gl/GrGLGpu.cpp
+++ b/src/gpu/gl/GrGLGpu.cpp
@@ -5,7 +5,6 @@
  * found in the LICENSE file.
  */
 
-
 #include "GrGLGpu.h"
 #include "GrGLGLSL.h"
 #include "GrGLStencilAttachment.h"
@@ -857,12 +856,7 @@
     size_t trimRowBytes = width * bpp;
 
     // in case we need a temporary, trimmed copy of the src pixels
-#if defined(GOOGLE3)
-    // Stack frame size is limited in GOOGLE3.
-    SkAutoSMalloc<64 * 128> tempStorage;
-#else
     SkAutoSMalloc<128 * 128> tempStorage;
-#endif
 
     // Internal format comes from the texture desc.
     GrGLenum internalFormat;