Add support for pre-compiling cached SkSL shaders

The client can do a test run of their application with
a persistent cache set to SkSL mode. They store the key
and data blobs that are produced.

Ship those blobs with the application. At startup, call
GrContext::precompileShader for each key/data pair. This
compiles the shaders, and stores the GL program ID, plus
a small amount of metadata in our runtime program cache.

Caveats:
* Currently only implemented for the GL backend. Other
  backends will require more metadata to do any useful
  amount of work. Metal may need a more drastic workflow
  change, involving offline compilation of the shaders.
* Currently only implemented for cached SkSL (not GLSL
  or program binaries). Supporting other formats again
  requires more metadata, and the cached shaders become
  increasingly specialized to GPU and driver versions.
* Reusing the cached SkSL on different hardware is not
  supported. Many driver workarounds are implemented in
  the SkSL -> GLSL transformation, but some are higher
  level. Limiting device variance by artificially hiding
  extensions may help, but there are no guarantees.

* The 'gltestprecompile' DM config exercises this code
  similarly to 'gltestpersistentcache', ensuring that
  results are visually identical when precompiling, and
  that no cache misses occur after precompiling.

Change-Id: Id314c5d5f5a58fe503a0505a613bd4a540cc3589
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/239438
Reviewed-by: Greg Daniel <egdaniel@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>
diff --git a/src/gpu/GrContext.cpp b/src/gpu/GrContext.cpp
index 78e302f..5f7c231 100644
--- a/src/gpu/GrContext.cpp
+++ b/src/gpu/GrContext.cpp
@@ -508,6 +508,10 @@
     fGpu->deleteBackendTexture(backendTex);
 }
 
+bool GrContext::precompileShader(const SkData& key, const SkData& data) {
+    return fGpu->precompileShader(key, data);
+}
+
 #ifdef SK_ENABLE_DUMP_GPU
 #include "src/utils/SkJSONWriter.h"
 SkString GrContext::dump() const {
diff --git a/src/gpu/GrGpu.h b/src/gpu/GrGpu.h
index e4ebed3..6b8a45e 100644
--- a/src/gpu/GrGpu.h
+++ b/src/gpu/GrGpu.h
@@ -454,6 +454,8 @@
      */
     virtual void deleteBackendTexture(const GrBackendTexture&) = 0;
 
+    virtual bool precompileShader(const SkData& key, const SkData& data) { return false; }
+
 #if GR_TEST_UTILS
     /** Check a handle represents an actual texture in the backend API that has not been freed. */
     virtual bool isTestingOnlyBackendTexture(const GrBackendTexture&) const = 0;
diff --git a/src/gpu/GrPersistentCacheUtils.h b/src/gpu/GrPersistentCacheUtils.h
index 79abb6a..e96f5a0 100644
--- a/src/gpu/GrPersistentCacheUtils.h
+++ b/src/gpu/GrPersistentCacheUtils.h
@@ -23,7 +23,8 @@
 static inline sk_sp<SkData> PackCachedShaders(SkFourByteTag shaderType,
                                               const SkSL::String shaders[],
                                               const SkSL::Program::Inputs inputs[],
-                                              int numInputs) {
+                                              int numInputs,
+                                              const SkSL::Program::Settings* settings) {
     // For consistency (so tools can blindly pack and unpack cached shaders), we always write
     // kGrShaderTypeCount inputs. If the backend gives us fewer, we just replicate the last one.
     SkASSERT(numInputs >= 1 && numInputs <= kGrShaderTypeCount);
@@ -34,13 +35,20 @@
         writer.writeString(shaders[i].c_str(), shaders[i].size());
         writer.writePad(&inputs[SkTMin(i, numInputs - 1)], sizeof(SkSL::Program::Inputs));
     }
+    writer.writeBool(SkToBool(settings));
+    if (settings) {
+        writer.writeBool(settings->fFlipY);
+        writer.writeBool(settings->fFragColorIsInOut);
+        writer.writeBool(settings->fForceHighPrecision);
+    }
     return writer.snapshotAsData();
 }
 
 static inline void UnpackCachedShaders(SkReader32* reader,
                                        SkSL::String shaders[],
                                        SkSL::Program::Inputs inputs[],
-                                       int numInputs) {
+                                       int numInputs,
+                                       SkSL::Program::Settings* settings = nullptr) {
     for (int i = 0; i < kGrShaderTypeCount; ++i) {
         size_t stringLen = 0;
         const char* string = reader->readString(&stringLen);
@@ -53,6 +61,11 @@
             reader->skip(sizeof(SkSL::Program::Inputs));
         }
     }
+    if (reader->readBool() && settings) {
+        settings->fFlipY = reader->readBool();
+        settings->fFragColorIsInOut = reader->readBool();
+        settings->fForceHighPrecision = reader->readBool();
+    }
 }
 
 }
diff --git a/src/gpu/GrProgramDesc.h b/src/gpu/GrProgramDesc.h
index ed919ac..4854646 100644
--- a/src/gpu/GrProgramDesc.h
+++ b/src/gpu/GrProgramDesc.h
@@ -42,6 +42,15 @@
     static bool Build(GrProgramDesc*, const GrRenderTarget*, const GrPrimitiveProcessor&,
                       bool hasPointSize, const GrPipeline&, GrGpu*);
 
+    static bool BuildFromData(GrProgramDesc* desc, const void* keyData, size_t keyLength) {
+        if (!SkTFitsIn<int>(keyLength)) {
+            return false;
+        }
+        desc->fKey.reset(SkToInt(keyLength));
+        memcpy(desc->fKey.begin(), keyData, keyLength);
+        return true;
+    }
+
     // Returns this as a uint32_t array to be used as a key in the program cache.
     const uint32_t* asKey() const {
         return reinterpret_cast<const uint32_t*>(fKey.begin());
diff --git a/src/gpu/gl/GrGLGpu.h b/src/gpu/gl/GrGLGpu.h
index 5193452..efacc32 100644
--- a/src/gpu/gl/GrGLGpu.h
+++ b/src/gpu/gl/GrGLGpu.h
@@ -136,6 +136,10 @@
                                           GrProtected isProtected) override;
     void deleteBackendTexture(const GrBackendTexture&) override;
 
+    bool precompileShader(const SkData& key, const SkData& data) override {
+        return fProgramCache->precompileShader(key, data);
+    }
+
 #if GR_TEST_UTILS
     bool isTestingOnlyBackendTexture(const GrBackendTexture&) const override;
 
@@ -317,6 +321,7 @@
                                 const GrPrimitiveProcessor&,
                                 const GrTextureProxy* const primProcProxies[],
                                 const GrPipeline&, bool hasPointSize);
+        bool precompileShader(const SkData& key, const SkData& data);
 
     private:
         struct Entry;
diff --git a/src/gpu/gl/GrGLGpuProgramCache.cpp b/src/gpu/gl/GrGLGpuProgramCache.cpp
index 279dd03..d9efe08 100644
--- a/src/gpu/gl/GrGLGpuProgramCache.cpp
+++ b/src/gpu/gl/GrGLGpuProgramCache.cpp
@@ -15,9 +15,14 @@
 #include "src/gpu/glsl/GrGLSLFragmentProcessor.h"
 
 struct GrGLGpu::ProgramCache::Entry {
-    Entry(sk_sp<GrGLProgram> program) : fProgram(std::move(program)) {}
+    Entry(sk_sp<GrGLProgram> program)
+        : fProgram(std::move(program)) {}
+
+    Entry(const GrGLPrecompiledProgram& precompiledProgram)
+        : fPrecompiledProgram(precompiledProgram) {}
 
     sk_sp<GrGLProgram> fProgram;
+    GrGLPrecompiledProgram fPrecompiledProgram;
 };
 
 GrGLGpu::ProgramCache::ProgramCache(GrGLGpu* gpu)
@@ -28,7 +33,9 @@
 
 void GrGLGpu::ProgramCache::abandon() {
     fMap.foreach([](std::unique_ptr<Entry>* e) {
-        (*e)->fProgram->abandon();
+        if ((*e)->fProgram) {
+            (*e)->fProgram->abandon();
+        }
     });
 
     this->reset();
@@ -56,7 +63,21 @@
     desc.setSurfaceOriginKey(GrGLSLFragmentShaderBuilder::KeyForSurfaceOrigin(origin));
 
     std::unique_ptr<Entry>* entry = fMap.find(desc);
-    if (!entry) {
+    if (entry && !(*entry)->fProgram) {
+        // We've pre-compiled the GL program, but don't have the GrGLProgram scaffolding
+        const GrGLPrecompiledProgram* precompiledProgram = &((*entry)->fPrecompiledProgram);
+        SkASSERT(precompiledProgram->fProgramID != 0);
+        GrGLProgram* program = GrGLProgramBuilder::CreateProgram(renderTarget, origin,
+                                                                 primProc, primProcProxies,
+                                                                 pipeline, &desc, fGpu,
+                                                                 precompiledProgram);
+        if (nullptr == program) {
+            // Should we purge the program ID from the cache at this point?
+            SkDEBUGFAIL("Couldn't create program from precompiled program");
+            return nullptr;
+        }
+        (*entry)->fProgram.reset(program);
+    } else if (!entry) {
         // We have a cache miss
         GrGLProgram* program = GrGLProgramBuilder::CreateProgram(renderTarget, origin,
                                                                  primProc, primProcProxies,
@@ -69,3 +90,24 @@
 
     return SkRef((*entry)->fProgram.get());
 }
+
+bool GrGLGpu::ProgramCache::precompileShader(const SkData& key, const SkData& data) {
+    GrProgramDesc desc;
+    if (!GrProgramDesc::BuildFromData(&desc, key.data(), key.size())) {
+        return false;
+    }
+
+    std::unique_ptr<Entry>* entry = fMap.find(desc);
+    if (entry) {
+        // We've already seen/compiled this shader
+        return true;
+    }
+
+    GrGLPrecompiledProgram precompiledProgram;
+    if (!GrGLProgramBuilder::PrecompileProgram(&precompiledProgram, fGpu, data)) {
+        return false;
+    }
+
+    fMap.insert(desc, std::unique_ptr<Entry>(new Entry(precompiledProgram)));
+    return true;
+}
diff --git a/src/gpu/gl/builders/GrGLProgramBuilder.cpp b/src/gpu/gl/builders/GrGLProgramBuilder.cpp
index b08eda3..98b6a88 100644
--- a/src/gpu/gl/builders/GrGLProgramBuilder.cpp
+++ b/src/gpu/gl/builders/GrGLProgramBuilder.cpp
@@ -33,12 +33,25 @@
 #define GL_CALL(X) GR_GL_CALL(this->gpu()->glInterface(), X)
 #define GL_CALL_RET(R, X) GR_GL_CALL_RET(this->gpu()->glInterface(), R, X)
 
+static void cleanup_shaders(GrGLGpu* gpu, const SkTDArray<GrGLuint>& shaderIDs) {
+    for (int i = 0; i < shaderIDs.count(); ++i) {
+        GR_GL_CALL(gpu->glInterface(), DeleteShader(shaderIDs[i]));
+    }
+}
+
+static void cleanup_program(GrGLGpu* gpu, GrGLuint programID,
+                            const SkTDArray<GrGLuint>& shaderIDs) {
+    GR_GL_CALL(gpu->glInterface(), DeleteProgram(programID));
+    cleanup_shaders(gpu, shaderIDs);
+}
+
 GrGLProgram* GrGLProgramBuilder::CreateProgram(GrRenderTarget* renderTarget, GrSurfaceOrigin origin,
                                                const GrPrimitiveProcessor& primProc,
                                                const GrTextureProxy* const primProcProxies[],
                                                const GrPipeline& pipeline,
                                                GrProgramDesc* desc,
-                                               GrGLGpu* gpu) {
+                                               GrGLGpu* gpu,
+                                               const GrGLPrecompiledProgram* precompiledProgram) {
     SkASSERT(!pipeline.isBad());
 
     ATRACE_ANDROID_FRAMEWORK("Shader Compile");
@@ -50,7 +63,7 @@
                                pipeline, primProc, primProcProxies, desc);
 
     auto persistentCache = gpu->getContext()->priv().getPersistentCache();
-    if (persistentCache) {
+    if (persistentCache && !precompiledProgram) {
         sk_sp<SkData> key = SkData::MakeWithoutCopy(desc->asKey(), desc->keyLength());
         builder.fCached = persistentCache->load(*key);
         // the eventual end goal is to completely skip emitAndInstallProcs on a cache hit, but it's
@@ -60,7 +73,7 @@
     if (!builder.emitAndInstallProcs()) {
         return nullptr;
     }
-    return builder.finalize();
+    return builder.finalize(precompiledProgram);
 }
 
 /////////////////////////////////////////////////////////////////////////////
@@ -149,7 +162,8 @@
 static constexpr SkFourByteTag kGLPB_Tag = SkSetFourByteTag('G', 'L', 'P', 'B');
 
 void GrGLProgramBuilder::storeShaderInCache(const SkSL::Program::Inputs& inputs, GrGLuint programID,
-                                            const SkSL::String shaders[], bool isSkSL) {
+                                            const SkSL::String shaders[], bool isSkSL,
+                                            const SkSL::Program::Settings& settings) {
     if (!this->gpu()->getContext()->priv().getPersistentCache()) {
         return;
     }
@@ -176,24 +190,29 @@
     } else {
         // source cache
         auto data = GrPersistentCacheUtils::PackCachedShaders(isSkSL ? kSKSL_Tag : kGLSL_Tag,
-                                                              shaders, &inputs, 1);
+                                                              shaders, &inputs, 1, &settings);
         this->gpu()->getContext()->priv().getPersistentCache()->store(*key, *data);
     }
 }
 
-GrGLProgram* GrGLProgramBuilder::finalize() {
+GrGLProgram* GrGLProgramBuilder::finalize(const GrGLPrecompiledProgram* precompiledProgram) {
     TRACE_EVENT0("skia.gpu", TRACE_FUNC);
 
     // verify we can get a program id
     GrGLuint programID;
-    GL_CALL_RET(programID, CreateProgram());
+    if (precompiledProgram) {
+        programID = precompiledProgram->fProgramID;
+    } else {
+        GL_CALL_RET(programID, CreateProgram());
+    }
     if (0 == programID) {
         return nullptr;
     }
 
     if (this->gpu()->glCaps().programBinarySupport() &&
         this->gpu()->glCaps().programParameterSupport() &&
-        this->gpu()->getContext()->priv().getPersistentCache()) {
+        this->gpu()->getContext()->priv().getPersistentCache() &&
+        !precompiledProgram) {
         GL_CALL(ProgramParameteri(programID, GR_GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GR_GL_TRUE));
     }
 
@@ -225,7 +244,13 @@
         &fFS.fCompilerString,
     };
     SkSL::String cached_sksl[kGrShaderTypeCount];
-    if (cached) {
+    if (precompiledProgram) {
+        // This is very similar to when we get program binaries. We even set that flag, as it's
+        // used to prevent other compile work later, and to force re-querying uniform locations.
+        this->addInputVars(precompiledProgram->fInputs);
+        this->computeCountsAndStrides(programID, primProc, false);
+        usedProgramBinaries = true;
+    } else if (cached) {
         SkReader32 reader(fCached->data(), fCached->size());
         SkFourByteTag shaderType = reader.readU32();
 
@@ -288,7 +313,7 @@
                                                              &glsl[kFragment_GrShaderType],
                                                              errorHandler);
             if (!fs) {
-                this->cleanupProgram(programID, shadersToDelete);
+                cleanup_program(fGpu, programID, shadersToDelete);
                 return nullptr;
             }
             inputs = fs->fInputs;
@@ -300,7 +325,7 @@
         }
         if (!this->compileAndAttachShaders(glsl[kFragment_GrShaderType], programID,
                                            GR_GL_FRAGMENT_SHADER, &shadersToDelete, errorHandler)) {
-            this->cleanupProgram(programID, shadersToDelete);
+            cleanup_program(fGpu, programID, shadersToDelete);
             return nullptr;
         }
 
@@ -313,13 +338,13 @@
                                                              &glsl[kVertex_GrShaderType],
                                                              errorHandler);
             if (!vs) {
-                this->cleanupProgram(programID, shadersToDelete);
+                cleanup_program(fGpu, programID, shadersToDelete);
                 return nullptr;
             }
         }
         if (!this->compileAndAttachShaders(glsl[kVertex_GrShaderType], programID,
                                            GR_GL_VERTEX_SHADER, &shadersToDelete, errorHandler)) {
-            this->cleanupProgram(programID, shadersToDelete);
+            cleanup_program(fGpu, programID, shadersToDelete);
             return nullptr;
         }
 
@@ -340,14 +365,14 @@
                                   &glsl[kGeometry_GrShaderType],
                                   errorHandler);
                 if (!gs) {
-                    this->cleanupProgram(programID, shadersToDelete);
+                    cleanup_program(fGpu, programID, shadersToDelete);
                     return nullptr;
                 }
             }
             if (!this->compileAndAttachShaders(glsl[kGeometry_GrShaderType], programID,
                                                GR_GL_GEOMETRY_SHADER, &shadersToDelete,
                                                errorHandler)) {
-                this->cleanupProgram(programID, shadersToDelete);
+                cleanup_program(fGpu, programID, shadersToDelete);
                 return nullptr;
             }
         }
@@ -363,13 +388,15 @@
     }
     this->resolveProgramResourceLocations(programID, usedProgramBinaries);
 
-    this->cleanupShaders(shadersToDelete);
+    cleanup_shaders(fGpu, shadersToDelete);
 
     // With ANGLE, we can't cache path-rendering programs. We use ProgramPathFragmentInputGen,
     // and ANGLE's deserialized program state doesn't restore enough state to handle that.
     // The native NVIDIA drivers do, but this is such an edge case that it's easier to just
     // black-list caching these programs in all cases. See: anglebug.com/3619
-    if (!cached && !primProc.isPathRendering()) {
+    // We also can't cache SkSL or GLSL if we were given a precompiled program, but there's not
+    // much point in doing so.
+    if (!cached && !primProc.isPathRendering() && !precompiledProgram) {
         bool isSkSL = false;
         if (fGpu->getContext()->priv().options().fShaderCacheStrategy ==
                 GrContextOptions::ShaderCacheStrategy::kSkSL) {
@@ -378,7 +405,7 @@
             }
             isSkSL = true;
         }
-        this->storeShaderInCache(inputs, programID, glsl, isSkSL);
+        this->storeShaderInCache(inputs, programID, glsl, isSkSL, settings);
     }
     return this->createProgram(programID);
 }
@@ -463,16 +490,6 @@
     }
 }
 
-void GrGLProgramBuilder::cleanupProgram(GrGLuint programID, const SkTDArray<GrGLuint>& shaderIDs) {
-    GL_CALL(DeleteProgram(programID));
-    this->cleanupShaders(shaderIDs);
-}
-void GrGLProgramBuilder::cleanupShaders(const SkTDArray<GrGLuint>& shaderIDs) {
-    for (int i = 0; i < shaderIDs.count(); ++i) {
-        GL_CALL(DeleteShader(shaderIDs[i]));
-    }
-}
-
 GrGLProgram* GrGLProgramBuilder::createProgram(GrGLuint programID) {
     return new GrGLProgram(fGpu,
                            fUniformHandles,
@@ -490,3 +507,76 @@
                            fVertexStride,
                            fInstanceStride);
 }
+
+bool GrGLProgramBuilder::PrecompileProgram(GrGLPrecompiledProgram* precompiledProgram,
+                                           GrGLGpu* gpu,
+                                           const SkData& cachedData) {
+    SkReader32 reader(cachedData.data(), cachedData.size());
+    SkFourByteTag shaderType = reader.readU32();
+    if (shaderType != kSKSL_Tag) {
+        // TODO: Support GLSL, and maybe even program binaries, too?
+        return false;
+    }
+
+    const GrGLInterface* gl = gpu->glInterface();
+    auto errorHandler = gpu->getContext()->priv().getShaderErrorHandler();
+    GrGLuint programID;
+    GR_GL_CALL_RET(gl, programID, CreateProgram());
+    if (0 == programID) {
+        return false;
+    }
+
+    SkTDArray<GrGLuint> shadersToDelete;
+
+    SkSL::Program::Settings settings;
+    settings.fCaps = gpu->glCaps().shaderCaps();
+    settings.fSharpenTextures = gpu->getContext()->priv().options().fSharpenMipmappedTextures;
+
+    SkSL::String shaders[kGrShaderTypeCount];
+    SkSL::Program::Inputs inputs;
+    GrPersistentCacheUtils::UnpackCachedShaders(&reader, shaders, &inputs, 1, &settings);
+
+    auto compileShader = [&](SkSL::Program::Kind kind, const SkSL::String& sksl, GrGLenum type) {
+        SkSL::String glsl;
+        auto program = GrSkSLtoGLSL(gpu->glContext(), kind, sksl, settings, &glsl, errorHandler);
+        if (!program) {
+            return false;
+        }
+
+        if (GrGLuint shaderID = GrGLCompileAndAttachShader(gpu->glContext(), programID, type, glsl,
+                                                           gpu->stats(), errorHandler)) {
+            shadersToDelete.push_back(shaderID);
+            return true;
+        } else {
+            return false;
+        }
+    };
+
+    if (!compileShader(SkSL::Program::kFragment_Kind,
+                       shaders[kFragment_GrShaderType],
+                       GR_GL_FRAGMENT_SHADER) ||
+        !compileShader(SkSL::Program::kVertex_Kind,
+                       shaders[kVertex_GrShaderType],
+                       GR_GL_VERTEX_SHADER) ||
+        (!shaders[kGeometry_GrShaderType].empty() &&
+         !compileShader(SkSL::Program::kGeometry_Kind,
+                       shaders[kGeometry_GrShaderType],
+                       GR_GL_GEOMETRY_SHADER))) {
+        cleanup_program(gpu, programID, shadersToDelete);
+        return false;
+    }
+
+    GR_GL_CALL(gpu->glInterface(), LinkProgram(programID));
+    GrGLint linked = GR_GL_INIT_ZERO;
+    GR_GL_CALL(gpu->glInterface(), GetProgramiv(programID, GR_GL_LINK_STATUS, &linked));
+    if (!linked) {
+        cleanup_program(gpu, programID, shadersToDelete);
+        return false;
+    }
+
+    cleanup_shaders(gpu, shadersToDelete);
+
+    precompiledProgram->fProgramID = programID;
+    precompiledProgram->fInputs = inputs;
+    return true;
+}
diff --git a/src/gpu/gl/builders/GrGLProgramBuilder.h b/src/gpu/gl/builders/GrGLProgramBuilder.h
index 680dfd0..de97d42 100644
--- a/src/gpu/gl/builders/GrGLProgramBuilder.h
+++ b/src/gpu/gl/builders/GrGLProgramBuilder.h
@@ -23,6 +23,16 @@
 class GrGLSLShaderBuilder;
 class GrShaderCaps;
 
+struct GrGLPrecompiledProgram {
+    GrGLPrecompiledProgram(GrGLuint programID = 0,
+                           SkSL::Program::Inputs inputs = SkSL::Program::Inputs())
+        : fProgramID(programID)
+        , fInputs(inputs) {}
+
+    GrGLuint fProgramID;
+    SkSL::Program::Inputs fInputs;
+};
+
 class GrGLProgramBuilder : public GrGLSLProgramBuilder {
 public:
     /** Generates a shader program.
@@ -33,6 +43,8 @@
      * This function may modify the GrProgramDesc by setting the surface origin
      * key to 0 (unspecified) if it turns out the program does not care about
      * the surface origin.
+     * If a GL program has already been created, the program ID and inputs can
+     * be supplied to skip the shader compilation.
      * @return true if generation was successful.
      */
     static GrGLProgram* CreateProgram(GrRenderTarget*, GrSurfaceOrigin,
@@ -40,7 +52,10 @@
                                       const GrTextureProxy* const primProcProxies[],
                                       const GrPipeline&,
                                       GrProgramDesc*,
-                                      GrGLGpu*);
+                                      GrGLGpu*,
+                                      const GrGLPrecompiledProgram* = nullptr);
+
+    static bool PrecompileProgram(GrGLPrecompiledProgram*, GrGLGpu*, const SkData&);
 
     const GrCaps* caps() const override;
 
@@ -61,14 +76,13 @@
     void computeCountsAndStrides(GrGLuint programID, const GrPrimitiveProcessor& primProc,
                                  bool bindAttribLocations);
     void storeShaderInCache(const SkSL::Program::Inputs& inputs, GrGLuint programID,
-                            const SkSL::String shaders[], bool isSkSL);
-    GrGLProgram* finalize();
+                            const SkSL::String shaders[], bool isSkSL,
+                            const SkSL::Program::Settings& settings);
+    GrGLProgram* finalize(const GrGLPrecompiledProgram*);
     void bindProgramResourceLocations(GrGLuint programID);
     bool checkLinkStatus(GrGLuint programID, GrContextOptions::ShaderErrorHandler* errorHandler,
                          SkSL::String* sksl[], const SkSL::String glsl[]);
     void resolveProgramResourceLocations(GrGLuint programID, bool force);
-    void cleanupProgram(GrGLuint programID, const SkTDArray<GrGLuint>& shaderIDs);
-    void cleanupShaders(const SkTDArray<GrGLuint>& shaderIDs);
 
     // Subclasses create different programs
     GrGLProgram* createProgram(GrGLuint programID);
diff --git a/src/gpu/vk/GrVkPipelineStateBuilder.cpp b/src/gpu/vk/GrVkPipelineStateBuilder.cpp
index 8428924..0919827 100644
--- a/src/gpu/vk/GrVkPipelineStateBuilder.cpp
+++ b/src/gpu/vk/GrVkPipelineStateBuilder.cpp
@@ -138,12 +138,14 @@
 
 void GrVkPipelineStateBuilder::storeShadersInCache(const SkSL::String shaders[],
                                                    const SkSL::Program::Inputs inputs[],
-                                                   bool isSkSL) {
+                                                   bool isSkSL,
+                                                   const SkSL::Program::Settings& settings) {
     Desc* desc = static_cast<Desc*>(this->desc());
     sk_sp<SkData> key = SkData::MakeWithoutCopy(desc->asKey(), desc->shaderKeyLength());
     sk_sp<SkData> data = GrPersistentCacheUtils::PackCachedShaders(isSkSL ? kSKSL_Tag : kSPIRV_Tag,
                                                                    shaders,
-                                                                   inputs, kGrShaderTypeCount);
+                                                                   inputs, kGrShaderTypeCount,
+                                                                   &settings);
     this->gpu()->getContext()->priv().getPersistentCache()->store(*key, *data);
 }
 
@@ -288,7 +290,7 @@
                 }
                 isSkSL = true;
             }
-            this->storeShadersInCache(shaders, inputs, isSkSL);
+            this->storeShadersInCache(shaders, inputs, isSkSL, settings);
         }
     }
     GrVkPipeline* pipeline = resourceProvider.createPipeline(
diff --git a/src/gpu/vk/GrVkPipelineStateBuilder.h b/src/gpu/vk/GrVkPipelineStateBuilder.h
index 980e6ac..4cb22d0 100644
--- a/src/gpu/vk/GrVkPipelineStateBuilder.h
+++ b/src/gpu/vk/GrVkPipelineStateBuilder.h
@@ -96,7 +96,7 @@
                              VkPipelineShaderStageCreateInfo* outStageInfo);
 
     void storeShadersInCache(const SkSL::String shaders[], const SkSL::Program::Inputs inputs[],
-                             bool isSkSL);
+                             bool isSkSL, const SkSL::Program::Settings& settings);
 
     bool createVkShaderModule(VkShaderStageFlagBits stage,
                               const SkSL::String& sksl,