Make program key descriptions available in release builds

Whenever we need the description (as we're putting something in the
persistent cache), go back and re-compute the key to get the
description. This pattern means that we pay a (very) small overhead for
virtual dispatch in the common case, and only pay for the expensive
string work when we're already doing expensive work (compiling).

Bug: skia:11372
Change-Id: I3d4dc19f2d8883f8117f5f6489fc852cf9503eb3
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/380359
Reviewed-by: Brian Salomon <bsalomon@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>
diff --git a/src/gpu/GrProgramDesc.h b/src/gpu/GrProgramDesc.h
index 2ab878d..3289cf4 100644
--- a/src/gpu/GrProgramDesc.h
+++ b/src/gpu/GrProgramDesc.h
@@ -22,23 +22,23 @@
 
 class GrProcessorKeyBuilder {
 public:
-    GrProcessorKeyBuilder() = default;
-    GrProcessorKeyBuilder(const GrProcessorKeyBuilder& other) = default;
+    GrProcessorKeyBuilder(SkTArray<uint32_t, true>* data) : fData(data) {}
 
-    void reset() { *this = GrProcessorKeyBuilder{}; }
+    virtual ~GrProcessorKeyBuilder() {
+        // Ensure that flush was called before we went out of scope
+        SkASSERT(fBitsUsed == 0);
+    }
 
-    void addBits(uint32_t numBits, uint32_t val, const char* label) {
+    virtual void addBits(uint32_t numBits, uint32_t val, const char* label) {
         SkASSERT(numBits > 0 && numBits <= 32);
         SkASSERT(numBits == 32 || (val < (1u << numBits)));
 
-        SkDEBUGCODE(fDescription.appendf("%s: %u\n", label, val);)
-
         fCurValue |= (val << fBitsUsed);
         fBitsUsed += numBits;
 
         if (fBitsUsed >= 32) {
             // Overflow, start a new working value
-            fData.push_back(fCurValue);
+            fData->push_back(fCurValue);
             uint32_t excess = fBitsUsed - 32;
             fCurValue = excess ? (val >> (numBits - excess)) : 0;
             fBitsUsed = excess;
@@ -48,7 +48,6 @@
     }
 
     void addBytes(uint32_t numBytes, const void* data, const char* label) {
-        // TODO: Make this smarter/faster?
         const uint8_t* bytes = reinterpret_cast<const uint8_t*>(data);
         for (; numBytes --> 0; bytes++) {
             this->addBits(8, *bytes, label);
@@ -63,75 +62,42 @@
         this->addBits(32, v, label);
     }
 
-    template <typename StringFunc>
-    void appendComment(StringFunc&& sf) {
-        #ifdef SK_DEBUG
-            fDescription.append(sf());
-            fDescription.append("\n");
-        #endif
-    }
+    virtual void appendComment(const char* comment) {}
 
     // Introduces a word-boundary in the key. Must be called before using the key with any cache,
     // but can also be called to create a break between generic data and backend-specific data.
     void flush() {
         if (fBitsUsed) {
-            fData.push_back(fCurValue);
+            fData->push_back(fCurValue);
             fCurValue = 0;
             fBitsUsed = 0;
         }
     }
 
-    bool empty() const { return fData.empty() && !fBitsUsed; }
-
-    const uint32_t* data() const {
-        SkASSERT(fBitsUsed == 0);  // flush() must be called when construction is complete
-        return fData.begin();
-    }
-
-    size_t size() const {
-        return (fData.count() + (fBitsUsed ? 1 : 0)) * sizeof(uint32_t);
-    }
-
-    GrProcessorKeyBuilder& operator=(const GrProcessorKeyBuilder& other) = default;
-
-    bool operator==(const GrProcessorKeyBuilder& that) const {
-        return fBitsUsed == that.fBitsUsed &&
-               fCurValue == that.fCurValue &&
-               fData == that.fData;
-    }
-
-    bool operator!= (const GrProcessorKeyBuilder& other) const {
-        return !(*this == other);
-    }
-
-    void setData(const void* data, size_t length) {
-        SkASSERT(SkIsAlign4(length));
-        fData.reset(length / 4);
-        memcpy(fData.begin(), data, length);
-    }
-
-    SkString description() const {
-        #ifdef SK_DEBUG
-            return fDescription;
-        #else
-            return SkString{};
-        #endif
-    }
-
 private:
-    enum {
-        kHeaderSize            = 1,    // "header" in ::Build
-        kMaxPreallocProcessors = 8,
-        kIntsPerProcessor      = 4,    // This is an overestimate of the average effect key size.
-        kPreAllocSize = kHeaderSize +
-                        kMaxPreallocProcessors * kIntsPerProcessor,
-    };
-
-    SkSTArray<kPreAllocSize, uint32_t, true> fData;
+    SkTArray<uint32_t, true>* fData;
     uint32_t fCurValue = 0;
     uint32_t fBitsUsed = 0;  // ... in current value
+};
 
-    SkDEBUGCODE(SkString fDescription;)
+class GrProcessorStringKeyBuilder : public GrProcessorKeyBuilder {
+public:
+    GrProcessorStringKeyBuilder(SkTArray<uint32_t, true>* data) : INHERITED(data) {}
+
+    void addBits(uint32_t numBits, uint32_t val, const char* label) override {
+        INHERITED::addBits(numBits, val, label);
+        fDescription.appendf("%s: %u\n", label, val);
+    }
+
+    void appendComment(const char* comment) override {
+        fDescription.appendf("%s\n", comment);
+    }
+
+    SkString description() const { return fDescription; }
+
+private:
+    using INHERITED = GrProcessorKeyBuilder;
+    SkString fDescription;
 };
 
 /** This class is used to generate a generic program cache key. The Dawn, Metal and Vulkan
@@ -142,6 +108,7 @@
     GrProgramDesc(const GrProgramDesc& other) = default;
 
     bool isValid() const { return !fKey.empty(); }
+    void reset() { *this = GrProgramDesc{}; }
 
     // Returns this as a uint32_t array to be used as a key in the program cache.
     const uint32_t* asKey() const {
@@ -150,14 +117,9 @@
 
     // Gets the number of bytes in asKey(). It will be a 4-byte aligned value.
     uint32_t keyLength() const {
-        SkASSERT(0 == (fKey.size() % 4));
-        return fKey.size();
+        return fKey.size() * sizeof(uint32_t);
     }
 
-    SkString description() const { return fKey.description(); }
-
-    GrProgramDesc& operator= (const GrProgramDesc& other) = default;
-
     bool operator== (const GrProgramDesc& that) const {
         return this->fKey == that.fKey;
     }
@@ -168,6 +130,10 @@
 
     uint32_t initialKeyLength() const { return fInitialKeyLength; }
 
+    // TODO(skia:11372): Incorporate this into caps interface (part of makeDesc, or a parallel
+    // function), so other backends can include their information in the description.
+    static SkString Describe(GrRenderTarget*, const GrProgramInfo&, const GrCaps&);
+
 protected:
     friend class GrDawnCaps;
     friend class GrD3DCaps;
@@ -189,21 +155,32 @@
      * @param programInfo   Program information need to build the key
      * @param caps          the caps
      **/
-    static bool Build(GrProgramDesc*, GrRenderTarget*, const GrProgramInfo&, const GrCaps&);
+    static void Build(GrProgramDesc*, GrRenderTarget*, const GrProgramInfo&, const GrCaps&);
 
     // This is strictly an OpenGL call since the other backends have additional data in their keys.
     static bool BuildFromData(GrProgramDesc* desc, const void* keyData, size_t keyLength) {
-        if (!SkTFitsIn<int>(keyLength)) {
+        if (!SkTFitsIn<int>(keyLength) || !SkIsAlign4(keyLength)) {
             return false;
         }
-        desc->fKey.setData(keyData, keyLength);
+        desc->fKey.reset(keyLength / 4);
+        memcpy(desc->fKey.begin(), keyData, keyLength);
         return true;
     }
 
-    GrProcessorKeyBuilder* key() { return &fKey; }
+    enum {
+        kHeaderSize            = 1,    // "header" in ::Build
+        kMaxPreallocProcessors = 8,
+        kIntsPerProcessor      = 4,    // This is an overestimate of the average effect key size.
+        kPreAllocSize = kHeaderSize +
+                        kMaxPreallocProcessors * kIntsPerProcessor,
+    };
+
+    using KeyType = SkSTArray<kPreAllocSize, uint32_t, true>;
+
+    KeyType* key() { return &fKey; }
 
 private:
-    GrProcessorKeyBuilder fKey;
+    SkSTArray<kPreAllocSize, uint32_t, true> fKey;
     uint32_t fInitialKeyLength = 0;
 };