Reland "Redesign program key construction"

This is a reland of bbbf1a7f50a303bd76163793bd5968c72f5f4432

Original change's description:
> Redesign program key construction
>
> This does two things:
> 1) Moves responsibility for bit-packing portions of the key into the key
>    itself. A new GrKeyBuilder type manages adding bits, with asserts to
>    ensure a value always fits in the requested number. In theory this
>    will let us generate smaller keys overall, at the expense of slightly
>    more complex code during construction.
> 2) Adds a string label parameter for key methods that fold in data. For
>    new methods, the label is required. To ease migration, the old add32
>    does not require a label (yet). This will let us generate detailed,
>    human readable keys, either based on SK_DEBUG, or a runtime option
>    (if we're comfortable paying the cost).
>
> Bug: skia:11372
> Change-Id: Ib0f941551e0dbadabbd2a7de912b00e9e766b166
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/377876
> Commit-Queue: Brian Osman <brianosman@google.com>
> Reviewed-by: Brian Salomon <bsalomon@google.com>

Bug: skia:11372
Cq-Include-Trybots: luci.skia.skia.primary:Test-Win10-MSVC-Golo-GPU-QuadroP400-x86_64-Debug-All-Vulkan
Change-Id: I179ed581bc9ba772191e727274ac0ac6979ebdf3
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/378778
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>
diff --git a/src/gpu/GrProgramDesc.h b/src/gpu/GrProgramDesc.h
index 7eaf880..c691e79 100644
--- a/src/gpu/GrProgramDesc.h
+++ b/src/gpu/GrProgramDesc.h
@@ -8,15 +8,126 @@
 #ifndef GrProgramDesc_DEFINED
 #define GrProgramDesc_DEFINED
 
+#include "include/core/SkString.h"
 #include "include/private/GrTypesPriv.h"
 #include "include/private/SkTArray.h"
 #include "include/private/SkTo.h"
 
+#include <limits.h>
+
 class GrCaps;
 class GrProgramInfo;
 class GrRenderTarget;
 class GrShaderCaps;
 
+class GrKeyBuilder {
+public:
+    GrKeyBuilder() = default;
+    GrKeyBuilder(const GrKeyBuilder& other) = default;
+
+    void reset() { *this = GrKeyBuilder{}; }
+
+    void addBits(uint32_t numBits, uint32_t val, const char* label) {
+        SkASSERT(numBits > 0 && numBits <= 32);
+        SkASSERT(numBits == 32 || (val < (1u << numBits)));
+
+        SkDEBUGCODE(fDescription.appendf("%s: %u\n", label, val);)
+
+        fCurValue |= (val << fBitsUsed);
+        fBitsUsed += numBits;
+
+        if (fBitsUsed >= 32) {
+            // Overflow, start a new working value
+            fData.push_back(fCurValue);
+            uint32_t excess = fBitsUsed - 32;
+            fCurValue = excess ? (val >> (numBits - excess)) : 0;
+            fBitsUsed = excess;
+        }
+
+        SkASSERT(fCurValue < (1u << fBitsUsed));
+    }
+
+    void addBytes(uint32_t numBytes, const void* data, const char* label) {
+        // TODO: Make this smarter/faster?
+        const uint8_t* bytes = reinterpret_cast<const uint8_t*>(data);
+        for (; numBytes --> 0; bytes++) {
+            this->addBits(8, *bytes, label);
+        }
+    }
+
+    template <typename StringFunc>
+    void addString(StringFunc&& sf) {
+        #ifdef SK_DEBUG
+            fDescription.append(sf());
+            fDescription.append("\n");
+        #endif
+    }
+
+    void flush() {
+        if (fBitsUsed) {
+            fData.push_back(fCurValue);
+            fCurValue = 0;
+            fBitsUsed = 0;
+        }
+    }
+
+    bool empty() const { return fData.empty() && !fBitsUsed; }
+
+    const uint32_t* data() const {
+        SkASSERT(fBitsUsed == 0);  // flush() must be called when construction is complete
+        return fData.begin();
+    }
+
+    size_t size() const {
+        return (fData.count() + (fBitsUsed ? 1 : 0)) * sizeof(uint32_t);
+    }
+
+    size_t sizeInBits() const {
+        return (fData.count() * sizeof(uint32_t) * CHAR_BIT) + fBitsUsed;
+    }
+
+    GrKeyBuilder& operator=(const GrKeyBuilder& other) = default;
+
+    bool operator==(const GrKeyBuilder& that) const {
+        return fBitsUsed == that.fBitsUsed &&
+               fCurValue == that.fCurValue &&
+               fData == that.fData;
+    }
+
+    bool operator!= (const GrKeyBuilder& other) const {
+        return !(*this == other);
+    }
+
+    void setData(const void* data, size_t length) {
+        SkASSERT(SkIsAlign4(length));
+        fData.reset(length / 4);
+        memcpy(fData.begin(), data, length);
+    }
+
+    SkString description() const {
+        #ifdef SK_DEBUG
+            return fDescription;
+        #else
+            return SkString{};
+        #endif
+    }
+
+private:
+    enum {
+        kHeaderSize            = 1,    // "header" in ::Build
+        kMaxPreallocProcessors = 8,
+        kIntsPerProcessor      = 4,    // This is an overestimate of the average effect key size.
+        kPreAllocSize = kHeaderSize +
+                        kMaxPreallocProcessors * kIntsPerProcessor,
+    };
+
+    SkSTArray<kPreAllocSize, uint32_t, true> fData;
+    uint32_t fCurValue = 0;
+    uint32_t fBitsUsed = 0;  // ... in current value
+
+    SkDEBUGCODE(SkString fDescription;)
+};
+
 /** This class is used to generate a generic program cache key. The Dawn, Metal and Vulkan
  *  backends derive backend-specific versions which add additional information.
  */
@@ -28,38 +139,21 @@
 
     // Returns this as a uint32_t array to be used as a key in the program cache.
     const uint32_t* asKey() const {
-        return reinterpret_cast<const uint32_t*>(fKey.begin());
+        return fKey.data();
     }
 
     // Gets the number of bytes in asKey(). It will be a 4-byte aligned value.
     uint32_t keyLength() const {
-        SkASSERT(0 == (fKey.count() % 4));
-        return fKey.count();
+        SkASSERT(0 == (fKey.size() % 4));
+        return fKey.size();
     }
 
-    GrProgramDesc& operator= (const GrProgramDesc& other) {
-        uint32_t keyLength = other.keyLength();
-        fKey.reset(SkToInt(keyLength));
-        memcpy(fKey.begin(), other.fKey.begin(), keyLength);
-        fInitialKeyLength = other.fInitialKeyLength;
-        return *this;
-    }
+    SkString description() const { return fKey.description(); }
+
+    GrProgramDesc& operator= (const GrProgramDesc& other) = default;
 
     bool operator== (const GrProgramDesc& that) const {
-        if (this->keyLength() != that.keyLength()) {
-            return false;
-        }
-
-        SkASSERT(SkIsAlign4(this->keyLength()));
-        int l = this->keyLength() >> 2;
-        const uint32_t* aKey = this->asKey();
-        const uint32_t* bKey = that.asKey();
-        for (int i = 0; i < l; ++i) {
-            if (aKey[i] != bKey[i]) {
-                return false;
-            }
-        }
-        return true;
+        return this->fKey == that.fKey;
     }
 
     bool operator!= (const GrProgramDesc& other) const {
@@ -96,23 +190,14 @@
         if (!SkTFitsIn<int>(keyLength)) {
             return false;
         }
-        desc->fKey.reset(SkToInt(keyLength));
-        memcpy(desc->fKey.begin(), keyData, keyLength);
+        desc->fKey.setData(keyData, keyLength);
         return true;
     }
 
-    enum {
-        kHeaderSize            = 4,    // "header" in ::Build
-        kMaxPreallocProcessors = 8,
-        kIntsPerProcessor      = 4,    // This is an overestimate of the average effect key size.
-        kPreAllocSize = kHeaderSize +
-                        kMaxPreallocProcessors * sizeof(uint32_t) * kIntsPerProcessor,
-    };
-
-    SkSTArray<kPreAllocSize, uint8_t, true>& key() { return fKey; }
+    GrKeyBuilder& key() { return fKey; }
 
 private:
-    SkSTArray<kPreAllocSize, uint8_t, true> fKey;
+    GrKeyBuilder fKey;
     uint32_t fInitialKeyLength = 0;
 };