Add support for pre-compiling cached SkSL shaders
The client can do a test run of their application with
a persistent cache set to SkSL mode. They store the key
and data blobs that are produced.
Ship those blobs with the application. At startup, call
GrContext::precompileShader for each key/data pair. This
compiles the shaders, and stores the GL program ID, plus
a small amount of metadata in our runtime program cache.
Caveats:
* Currently only implemented for the GL backend. Other
backends will require more metadata to do any useful
amount of work. Metal may need a more drastic workflow
change, involving offline compilation of the shaders.
* Currently only implemented for cached SkSL (not GLSL
or program binaries). Supporting other formats again
requires more metadata, and the cached shaders become
increasingly specialized to GPU and driver versions.
* Reusing the cached SkSL on different hardware is not
supported. Many driver workarounds are implemented in
the SkSL -> GLSL transformation, but some are higher
level. Limiting device variance by artificially hiding
extensions may help, but there are no guarantees.
* The 'gltestprecompile' DM config exercises this code
similarly to 'gltestpersistentcache', ensuring that
results are visually identical when precompiling, and
that no cache misses occur after precompiling.
Change-Id: Id314c5d5f5a58fe503a0505a613bd4a540cc3589
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/239438
Reviewed-by: Greg Daniel <egdaniel@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>
diff --git a/src/gpu/gl/builders/GrGLProgramBuilder.cpp b/src/gpu/gl/builders/GrGLProgramBuilder.cpp
index b08eda3..98b6a88 100644
--- a/src/gpu/gl/builders/GrGLProgramBuilder.cpp
+++ b/src/gpu/gl/builders/GrGLProgramBuilder.cpp
@@ -33,12 +33,25 @@
#define GL_CALL(X) GR_GL_CALL(this->gpu()->glInterface(), X)
#define GL_CALL_RET(R, X) GR_GL_CALL_RET(this->gpu()->glInterface(), R, X)
+static void cleanup_shaders(GrGLGpu* gpu, const SkTDArray<GrGLuint>& shaderIDs) {
+ for (int i = 0; i < shaderIDs.count(); ++i) {
+ GR_GL_CALL(gpu->glInterface(), DeleteShader(shaderIDs[i]));
+ }
+}
+
+static void cleanup_program(GrGLGpu* gpu, GrGLuint programID,
+ const SkTDArray<GrGLuint>& shaderIDs) {
+ GR_GL_CALL(gpu->glInterface(), DeleteProgram(programID));
+ cleanup_shaders(gpu, shaderIDs);
+}
+
GrGLProgram* GrGLProgramBuilder::CreateProgram(GrRenderTarget* renderTarget, GrSurfaceOrigin origin,
const GrPrimitiveProcessor& primProc,
const GrTextureProxy* const primProcProxies[],
const GrPipeline& pipeline,
GrProgramDesc* desc,
- GrGLGpu* gpu) {
+ GrGLGpu* gpu,
+ const GrGLPrecompiledProgram* precompiledProgram) {
SkASSERT(!pipeline.isBad());
ATRACE_ANDROID_FRAMEWORK("Shader Compile");
@@ -50,7 +63,7 @@
pipeline, primProc, primProcProxies, desc);
auto persistentCache = gpu->getContext()->priv().getPersistentCache();
- if (persistentCache) {
+ if (persistentCache && !precompiledProgram) {
sk_sp<SkData> key = SkData::MakeWithoutCopy(desc->asKey(), desc->keyLength());
builder.fCached = persistentCache->load(*key);
// the eventual end goal is to completely skip emitAndInstallProcs on a cache hit, but it's
@@ -60,7 +73,7 @@
if (!builder.emitAndInstallProcs()) {
return nullptr;
}
- return builder.finalize();
+ return builder.finalize(precompiledProgram);
}
/////////////////////////////////////////////////////////////////////////////
@@ -149,7 +162,8 @@
static constexpr SkFourByteTag kGLPB_Tag = SkSetFourByteTag('G', 'L', 'P', 'B');
void GrGLProgramBuilder::storeShaderInCache(const SkSL::Program::Inputs& inputs, GrGLuint programID,
- const SkSL::String shaders[], bool isSkSL) {
+ const SkSL::String shaders[], bool isSkSL,
+ const SkSL::Program::Settings& settings) {
if (!this->gpu()->getContext()->priv().getPersistentCache()) {
return;
}
@@ -176,24 +190,29 @@
} else {
// source cache
auto data = GrPersistentCacheUtils::PackCachedShaders(isSkSL ? kSKSL_Tag : kGLSL_Tag,
- shaders, &inputs, 1);
+ shaders, &inputs, 1, &settings);
this->gpu()->getContext()->priv().getPersistentCache()->store(*key, *data);
}
}
-GrGLProgram* GrGLProgramBuilder::finalize() {
+GrGLProgram* GrGLProgramBuilder::finalize(const GrGLPrecompiledProgram* precompiledProgram) {
TRACE_EVENT0("skia.gpu", TRACE_FUNC);
// verify we can get a program id
GrGLuint programID;
- GL_CALL_RET(programID, CreateProgram());
+ if (precompiledProgram) {
+ programID = precompiledProgram->fProgramID;
+ } else {
+ GL_CALL_RET(programID, CreateProgram());
+ }
if (0 == programID) {
return nullptr;
}
if (this->gpu()->glCaps().programBinarySupport() &&
this->gpu()->glCaps().programParameterSupport() &&
- this->gpu()->getContext()->priv().getPersistentCache()) {
+ this->gpu()->getContext()->priv().getPersistentCache() &&
+ !precompiledProgram) {
GL_CALL(ProgramParameteri(programID, GR_GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GR_GL_TRUE));
}
@@ -225,7 +244,13 @@
&fFS.fCompilerString,
};
SkSL::String cached_sksl[kGrShaderTypeCount];
- if (cached) {
+ if (precompiledProgram) {
+ // This is very similar to when we get program binaries. We even set that flag, as it's
+ // used to prevent other compile work later, and to force re-querying uniform locations.
+ this->addInputVars(precompiledProgram->fInputs);
+ this->computeCountsAndStrides(programID, primProc, false);
+ usedProgramBinaries = true;
+ } else if (cached) {
SkReader32 reader(fCached->data(), fCached->size());
SkFourByteTag shaderType = reader.readU32();
@@ -288,7 +313,7 @@
&glsl[kFragment_GrShaderType],
errorHandler);
if (!fs) {
- this->cleanupProgram(programID, shadersToDelete);
+ cleanup_program(fGpu, programID, shadersToDelete);
return nullptr;
}
inputs = fs->fInputs;
@@ -300,7 +325,7 @@
}
if (!this->compileAndAttachShaders(glsl[kFragment_GrShaderType], programID,
GR_GL_FRAGMENT_SHADER, &shadersToDelete, errorHandler)) {
- this->cleanupProgram(programID, shadersToDelete);
+ cleanup_program(fGpu, programID, shadersToDelete);
return nullptr;
}
@@ -313,13 +338,13 @@
&glsl[kVertex_GrShaderType],
errorHandler);
if (!vs) {
- this->cleanupProgram(programID, shadersToDelete);
+ cleanup_program(fGpu, programID, shadersToDelete);
return nullptr;
}
}
if (!this->compileAndAttachShaders(glsl[kVertex_GrShaderType], programID,
GR_GL_VERTEX_SHADER, &shadersToDelete, errorHandler)) {
- this->cleanupProgram(programID, shadersToDelete);
+ cleanup_program(fGpu, programID, shadersToDelete);
return nullptr;
}
@@ -340,14 +365,14 @@
&glsl[kGeometry_GrShaderType],
errorHandler);
if (!gs) {
- this->cleanupProgram(programID, shadersToDelete);
+ cleanup_program(fGpu, programID, shadersToDelete);
return nullptr;
}
}
if (!this->compileAndAttachShaders(glsl[kGeometry_GrShaderType], programID,
GR_GL_GEOMETRY_SHADER, &shadersToDelete,
errorHandler)) {
- this->cleanupProgram(programID, shadersToDelete);
+ cleanup_program(fGpu, programID, shadersToDelete);
return nullptr;
}
}
@@ -363,13 +388,15 @@
}
this->resolveProgramResourceLocations(programID, usedProgramBinaries);
- this->cleanupShaders(shadersToDelete);
+ cleanup_shaders(fGpu, shadersToDelete);
// With ANGLE, we can't cache path-rendering programs. We use ProgramPathFragmentInputGen,
// and ANGLE's deserialized program state doesn't restore enough state to handle that.
// The native NVIDIA drivers do, but this is such an edge case that it's easier to just
// black-list caching these programs in all cases. See: anglebug.com/3619
- if (!cached && !primProc.isPathRendering()) {
+ // We also can't cache SkSL or GLSL if we were given a precompiled program, but there's not
+ // much point in doing so.
+ if (!cached && !primProc.isPathRendering() && !precompiledProgram) {
bool isSkSL = false;
if (fGpu->getContext()->priv().options().fShaderCacheStrategy ==
GrContextOptions::ShaderCacheStrategy::kSkSL) {
@@ -378,7 +405,7 @@
}
isSkSL = true;
}
- this->storeShaderInCache(inputs, programID, glsl, isSkSL);
+ this->storeShaderInCache(inputs, programID, glsl, isSkSL, settings);
}
return this->createProgram(programID);
}
@@ -463,16 +490,6 @@
}
}
-void GrGLProgramBuilder::cleanupProgram(GrGLuint programID, const SkTDArray<GrGLuint>& shaderIDs) {
- GL_CALL(DeleteProgram(programID));
- this->cleanupShaders(shaderIDs);
-}
-void GrGLProgramBuilder::cleanupShaders(const SkTDArray<GrGLuint>& shaderIDs) {
- for (int i = 0; i < shaderIDs.count(); ++i) {
- GL_CALL(DeleteShader(shaderIDs[i]));
- }
-}
-
GrGLProgram* GrGLProgramBuilder::createProgram(GrGLuint programID) {
return new GrGLProgram(fGpu,
fUniformHandles,
@@ -490,3 +507,76 @@
fVertexStride,
fInstanceStride);
}
+
+bool GrGLProgramBuilder::PrecompileProgram(GrGLPrecompiledProgram* precompiledProgram,
+ GrGLGpu* gpu,
+ const SkData& cachedData) {
+ SkReader32 reader(cachedData.data(), cachedData.size());
+ SkFourByteTag shaderType = reader.readU32();
+ if (shaderType != kSKSL_Tag) {
+ // TODO: Support GLSL, and maybe even program binaries, too?
+ return false;
+ }
+
+ const GrGLInterface* gl = gpu->glInterface();
+ auto errorHandler = gpu->getContext()->priv().getShaderErrorHandler();
+ GrGLuint programID;
+ GR_GL_CALL_RET(gl, programID, CreateProgram());
+ if (0 == programID) {
+ return false;
+ }
+
+ SkTDArray<GrGLuint> shadersToDelete;
+
+ SkSL::Program::Settings settings;
+ settings.fCaps = gpu->glCaps().shaderCaps();
+ settings.fSharpenTextures = gpu->getContext()->priv().options().fSharpenMipmappedTextures;
+
+ SkSL::String shaders[kGrShaderTypeCount];
+ SkSL::Program::Inputs inputs;
+ GrPersistentCacheUtils::UnpackCachedShaders(&reader, shaders, &inputs, 1, &settings);
+
+ auto compileShader = [&](SkSL::Program::Kind kind, const SkSL::String& sksl, GrGLenum type) {
+ SkSL::String glsl;
+ auto program = GrSkSLtoGLSL(gpu->glContext(), kind, sksl, settings, &glsl, errorHandler);
+ if (!program) {
+ return false;
+ }
+
+ if (GrGLuint shaderID = GrGLCompileAndAttachShader(gpu->glContext(), programID, type, glsl,
+ gpu->stats(), errorHandler)) {
+ shadersToDelete.push_back(shaderID);
+ return true;
+ } else {
+ return false;
+ }
+ };
+
+ if (!compileShader(SkSL::Program::kFragment_Kind,
+ shaders[kFragment_GrShaderType],
+ GR_GL_FRAGMENT_SHADER) ||
+ !compileShader(SkSL::Program::kVertex_Kind,
+ shaders[kVertex_GrShaderType],
+ GR_GL_VERTEX_SHADER) ||
+ (!shaders[kGeometry_GrShaderType].empty() &&
+ !compileShader(SkSL::Program::kGeometry_Kind,
+ shaders[kGeometry_GrShaderType],
+ GR_GL_GEOMETRY_SHADER))) {
+ cleanup_program(gpu, programID, shadersToDelete);
+ return false;
+ }
+
+ GR_GL_CALL(gpu->glInterface(), LinkProgram(programID));
+ GrGLint linked = GR_GL_INIT_ZERO;
+ GR_GL_CALL(gpu->glInterface(), GetProgramiv(programID, GR_GL_LINK_STATUS, &linked));
+ if (!linked) {
+ cleanup_program(gpu, programID, shadersToDelete);
+ return false;
+ }
+
+ cleanup_shaders(gpu, shadersToDelete);
+
+ precompiledProgram->fProgramID = programID;
+ precompiledProgram->fInputs = inputs;
+ return true;
+}