Add support for pre-compiling cached SkSL shaders

The client can do a test run of their application with
a persistent cache set to SkSL mode. They store the key
and data blobs that are produced.

Ship those blobs with the application. At startup, call
GrContext::precompileShader for each key/data pair. This
compiles the shaders, and stores the GL program ID, plus
a small amount of metadata in our runtime program cache.

Caveats:
* Currently only implemented for the GL backend. Other
  backends will require more metadata to do any useful
  amount of work. Metal may need a more drastic workflow
  change, involving offline compilation of the shaders.
* Currently only implemented for cached SkSL (not GLSL
  or program binaries). Supporting other formats again
  requires more metadata, and the cached shaders become
  increasingly specialized to GPU and driver versions.
* Reusing the cached SkSL on different hardware is not
  supported. Many driver workarounds are implemented in
  the SkSL -> GLSL transformation, but some are higher
  level. Limiting device variance by artificially hiding
  extensions may help, but there are no guarantees.

* The 'gltestprecompile' DM config exercises this code
  similarly to 'gltestpersistentcache', ensuring that
  results are visually identical when precompiling, and
  that no cache misses occur after precompiling.

Change-Id: Id314c5d5f5a58fe503a0505a613bd4a540cc3589
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/239438
Reviewed-by: Greg Daniel <egdaniel@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>
diff --git a/src/gpu/gl/builders/GrGLProgramBuilder.cpp b/src/gpu/gl/builders/GrGLProgramBuilder.cpp
index b08eda3..98b6a88 100644
--- a/src/gpu/gl/builders/GrGLProgramBuilder.cpp
+++ b/src/gpu/gl/builders/GrGLProgramBuilder.cpp
@@ -33,12 +33,25 @@
 #define GL_CALL(X) GR_GL_CALL(this->gpu()->glInterface(), X)
 #define GL_CALL_RET(R, X) GR_GL_CALL_RET(this->gpu()->glInterface(), R, X)
 
+static void cleanup_shaders(GrGLGpu* gpu, const SkTDArray<GrGLuint>& shaderIDs) {
+    for (int i = 0; i < shaderIDs.count(); ++i) {
+        GR_GL_CALL(gpu->glInterface(), DeleteShader(shaderIDs[i]));
+    }
+}
+
+static void cleanup_program(GrGLGpu* gpu, GrGLuint programID,
+                            const SkTDArray<GrGLuint>& shaderIDs) {
+    GR_GL_CALL(gpu->glInterface(), DeleteProgram(programID));
+    cleanup_shaders(gpu, shaderIDs);
+}
+
 GrGLProgram* GrGLProgramBuilder::CreateProgram(GrRenderTarget* renderTarget, GrSurfaceOrigin origin,
                                                const GrPrimitiveProcessor& primProc,
                                                const GrTextureProxy* const primProcProxies[],
                                                const GrPipeline& pipeline,
                                                GrProgramDesc* desc,
-                                               GrGLGpu* gpu) {
+                                               GrGLGpu* gpu,
+                                               const GrGLPrecompiledProgram* precompiledProgram) {
     SkASSERT(!pipeline.isBad());
 
     ATRACE_ANDROID_FRAMEWORK("Shader Compile");
@@ -50,7 +63,7 @@
                                pipeline, primProc, primProcProxies, desc);
 
     auto persistentCache = gpu->getContext()->priv().getPersistentCache();
-    if (persistentCache) {
+    if (persistentCache && !precompiledProgram) {
         sk_sp<SkData> key = SkData::MakeWithoutCopy(desc->asKey(), desc->keyLength());
         builder.fCached = persistentCache->load(*key);
         // the eventual end goal is to completely skip emitAndInstallProcs on a cache hit, but it's
@@ -60,7 +73,7 @@
     if (!builder.emitAndInstallProcs()) {
         return nullptr;
     }
-    return builder.finalize();
+    return builder.finalize(precompiledProgram);
 }
 
 /////////////////////////////////////////////////////////////////////////////
@@ -149,7 +162,8 @@
 static constexpr SkFourByteTag kGLPB_Tag = SkSetFourByteTag('G', 'L', 'P', 'B');
 
 void GrGLProgramBuilder::storeShaderInCache(const SkSL::Program::Inputs& inputs, GrGLuint programID,
-                                            const SkSL::String shaders[], bool isSkSL) {
+                                            const SkSL::String shaders[], bool isSkSL,
+                                            const SkSL::Program::Settings& settings) {
     if (!this->gpu()->getContext()->priv().getPersistentCache()) {
         return;
     }
@@ -176,24 +190,29 @@
     } else {
         // source cache
         auto data = GrPersistentCacheUtils::PackCachedShaders(isSkSL ? kSKSL_Tag : kGLSL_Tag,
-                                                              shaders, &inputs, 1);
+                                                              shaders, &inputs, 1, &settings);
         this->gpu()->getContext()->priv().getPersistentCache()->store(*key, *data);
     }
 }
 
-GrGLProgram* GrGLProgramBuilder::finalize() {
+GrGLProgram* GrGLProgramBuilder::finalize(const GrGLPrecompiledProgram* precompiledProgram) {
     TRACE_EVENT0("skia.gpu", TRACE_FUNC);
 
     // verify we can get a program id
     GrGLuint programID;
-    GL_CALL_RET(programID, CreateProgram());
+    if (precompiledProgram) {
+        programID = precompiledProgram->fProgramID;
+    } else {
+        GL_CALL_RET(programID, CreateProgram());
+    }
     if (0 == programID) {
         return nullptr;
     }
 
     if (this->gpu()->glCaps().programBinarySupport() &&
         this->gpu()->glCaps().programParameterSupport() &&
-        this->gpu()->getContext()->priv().getPersistentCache()) {
+        this->gpu()->getContext()->priv().getPersistentCache() &&
+        !precompiledProgram) {
         GL_CALL(ProgramParameteri(programID, GR_GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GR_GL_TRUE));
     }
 
@@ -225,7 +244,13 @@
         &fFS.fCompilerString,
     };
     SkSL::String cached_sksl[kGrShaderTypeCount];
-    if (cached) {
+    if (precompiledProgram) {
+        // This is very similar to when we get program binaries. We even set that flag, as it's
+        // used to prevent other compile work later, and to force re-querying uniform locations.
+        this->addInputVars(precompiledProgram->fInputs);
+        this->computeCountsAndStrides(programID, primProc, false);
+        usedProgramBinaries = true;
+    } else if (cached) {
         SkReader32 reader(fCached->data(), fCached->size());
         SkFourByteTag shaderType = reader.readU32();
 
@@ -288,7 +313,7 @@
                                                              &glsl[kFragment_GrShaderType],
                                                              errorHandler);
             if (!fs) {
-                this->cleanupProgram(programID, shadersToDelete);
+                cleanup_program(fGpu, programID, shadersToDelete);
                 return nullptr;
             }
             inputs = fs->fInputs;
@@ -300,7 +325,7 @@
         }
         if (!this->compileAndAttachShaders(glsl[kFragment_GrShaderType], programID,
                                            GR_GL_FRAGMENT_SHADER, &shadersToDelete, errorHandler)) {
-            this->cleanupProgram(programID, shadersToDelete);
+            cleanup_program(fGpu, programID, shadersToDelete);
             return nullptr;
         }
 
@@ -313,13 +338,13 @@
                                                              &glsl[kVertex_GrShaderType],
                                                              errorHandler);
             if (!vs) {
-                this->cleanupProgram(programID, shadersToDelete);
+                cleanup_program(fGpu, programID, shadersToDelete);
                 return nullptr;
             }
         }
         if (!this->compileAndAttachShaders(glsl[kVertex_GrShaderType], programID,
                                            GR_GL_VERTEX_SHADER, &shadersToDelete, errorHandler)) {
-            this->cleanupProgram(programID, shadersToDelete);
+            cleanup_program(fGpu, programID, shadersToDelete);
             return nullptr;
         }
 
@@ -340,14 +365,14 @@
                                   &glsl[kGeometry_GrShaderType],
                                   errorHandler);
                 if (!gs) {
-                    this->cleanupProgram(programID, shadersToDelete);
+                    cleanup_program(fGpu, programID, shadersToDelete);
                     return nullptr;
                 }
             }
             if (!this->compileAndAttachShaders(glsl[kGeometry_GrShaderType], programID,
                                                GR_GL_GEOMETRY_SHADER, &shadersToDelete,
                                                errorHandler)) {
-                this->cleanupProgram(programID, shadersToDelete);
+                cleanup_program(fGpu, programID, shadersToDelete);
                 return nullptr;
             }
         }
@@ -363,13 +388,15 @@
     }
     this->resolveProgramResourceLocations(programID, usedProgramBinaries);
 
-    this->cleanupShaders(shadersToDelete);
+    cleanup_shaders(fGpu, shadersToDelete);
 
     // With ANGLE, we can't cache path-rendering programs. We use ProgramPathFragmentInputGen,
     // and ANGLE's deserialized program state doesn't restore enough state to handle that.
     // The native NVIDIA drivers do, but this is such an edge case that it's easier to just
     // black-list caching these programs in all cases. See: anglebug.com/3619
-    if (!cached && !primProc.isPathRendering()) {
+    // We also can't cache SkSL or GLSL if we were given a precompiled program, but there's not
+    // much point in doing so.
+    if (!cached && !primProc.isPathRendering() && !precompiledProgram) {
         bool isSkSL = false;
         if (fGpu->getContext()->priv().options().fShaderCacheStrategy ==
                 GrContextOptions::ShaderCacheStrategy::kSkSL) {
@@ -378,7 +405,7 @@
             }
             isSkSL = true;
         }
-        this->storeShaderInCache(inputs, programID, glsl, isSkSL);
+        this->storeShaderInCache(inputs, programID, glsl, isSkSL, settings);
     }
     return this->createProgram(programID);
 }
@@ -463,16 +490,6 @@
     }
 }
 
-void GrGLProgramBuilder::cleanupProgram(GrGLuint programID, const SkTDArray<GrGLuint>& shaderIDs) {
-    GL_CALL(DeleteProgram(programID));
-    this->cleanupShaders(shaderIDs);
-}
-void GrGLProgramBuilder::cleanupShaders(const SkTDArray<GrGLuint>& shaderIDs) {
-    for (int i = 0; i < shaderIDs.count(); ++i) {
-        GL_CALL(DeleteShader(shaderIDs[i]));
-    }
-}
-
 GrGLProgram* GrGLProgramBuilder::createProgram(GrGLuint programID) {
     return new GrGLProgram(fGpu,
                            fUniformHandles,
@@ -490,3 +507,76 @@
                            fVertexStride,
                            fInstanceStride);
 }
+
+bool GrGLProgramBuilder::PrecompileProgram(GrGLPrecompiledProgram* precompiledProgram,
+                                           GrGLGpu* gpu,
+                                           const SkData& cachedData) {
+    SkReader32 reader(cachedData.data(), cachedData.size());
+    SkFourByteTag shaderType = reader.readU32();
+    if (shaderType != kSKSL_Tag) {
+        // TODO: Support GLSL, and maybe even program binaries, too?
+        return false;
+    }
+
+    const GrGLInterface* gl = gpu->glInterface();
+    auto errorHandler = gpu->getContext()->priv().getShaderErrorHandler();
+    GrGLuint programID;
+    GR_GL_CALL_RET(gl, programID, CreateProgram());
+    if (0 == programID) {
+        return false;
+    }
+
+    SkTDArray<GrGLuint> shadersToDelete;
+
+    SkSL::Program::Settings settings;
+    settings.fCaps = gpu->glCaps().shaderCaps();
+    settings.fSharpenTextures = gpu->getContext()->priv().options().fSharpenMipmappedTextures;
+
+    SkSL::String shaders[kGrShaderTypeCount];
+    SkSL::Program::Inputs inputs;
+    GrPersistentCacheUtils::UnpackCachedShaders(&reader, shaders, &inputs, 1, &settings);
+
+    auto compileShader = [&](SkSL::Program::Kind kind, const SkSL::String& sksl, GrGLenum type) {
+        SkSL::String glsl;
+        auto program = GrSkSLtoGLSL(gpu->glContext(), kind, sksl, settings, &glsl, errorHandler);
+        if (!program) {
+            return false;
+        }
+
+        if (GrGLuint shaderID = GrGLCompileAndAttachShader(gpu->glContext(), programID, type, glsl,
+                                                           gpu->stats(), errorHandler)) {
+            shadersToDelete.push_back(shaderID);
+            return true;
+        } else {
+            return false;
+        }
+    };
+
+    if (!compileShader(SkSL::Program::kFragment_Kind,
+                       shaders[kFragment_GrShaderType],
+                       GR_GL_FRAGMENT_SHADER) ||
+        !compileShader(SkSL::Program::kVertex_Kind,
+                       shaders[kVertex_GrShaderType],
+                       GR_GL_VERTEX_SHADER) ||
+        (!shaders[kGeometry_GrShaderType].empty() &&
+         !compileShader(SkSL::Program::kGeometry_Kind,
+                       shaders[kGeometry_GrShaderType],
+                       GR_GL_GEOMETRY_SHADER))) {
+        cleanup_program(gpu, programID, shadersToDelete);
+        return false;
+    }
+
+    GR_GL_CALL(gpu->glInterface(), LinkProgram(programID));
+    GrGLint linked = GR_GL_INIT_ZERO;
+    GR_GL_CALL(gpu->glInterface(), GetProgramiv(programID, GR_GL_LINK_STATUS, &linked));
+    if (!linked) {
+        cleanup_program(gpu, programID, shadersToDelete);
+        return false;
+    }
+
+    cleanup_shaders(gpu, shadersToDelete);
+
+    precompiledProgram->fProgramID = programID;
+    precompiledProgram->fInputs = inputs;
+    return true;
+}