Reland "Redesign program key construction"
This is a reland of bbbf1a7f50a303bd76163793bd5968c72f5f4432
Original change's description:
> Redesign program key construction
>
> This does two things:
> 1) Moves responsibility for bit-packing portions of the key into the key
> itself. A new GrKeyBuilder type manages adding bits, with asserts to
> ensure a value always fits in the requested number. In theory this
> will let us generate smaller keys overall, at the expense of slightly
> more complex code during construction.
> 2) Adds a string label parameter for key methods that fold in data. For
> new methods, the label is required. To ease migration, the old add32
> does not require a label (yet). This will let us generate detailed,
> human readable keys, either based on SK_DEBUG, or a runtime option
> (if we're comfortable paying the cost).
>
> Bug: skia:11372
> Change-Id: Ib0f941551e0dbadabbd2a7de912b00e9e766b166
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/377876
> Commit-Queue: Brian Osman <brianosman@google.com>
> Reviewed-by: Brian Salomon <bsalomon@google.com>
Bug: skia:11372
Cq-Include-Trybots: luci.skia.skia.primary:Test-Win10-MSVC-Golo-GPU-QuadroP400-x86_64-Debug-All-Vulkan
Change-Id: I179ed581bc9ba772191e727274ac0ac6979ebdf3
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/378778
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>
diff --git a/src/gpu/GrProgramDesc.cpp b/src/gpu/GrProgramDesc.cpp
index e11d06a..6c7c6b9 100644
--- a/src/gpu/GrProgramDesc.cpp
+++ b/src/gpu/GrProgramDesc.cpp
@@ -70,6 +70,15 @@
}
}
+// Currently we allow 8 bits for the class id and 24 bits for the overall processor key size
+// (as measured in bits, so the byte count of the processor key must be < 2^21).
+static constexpr uint32_t kClassIDBits = 8;
+static constexpr uint32_t kKeySizeBits = 24;
+
+static bool processor_meta_data_fits(uint32_t classID, size_t keySize) {
+ return (classID < (1u << kClassIDBits)) && (keySize < (1u << kKeySizeBits));
+}
+
/**
* A function which emits a meta key into the key builder. This is required because shader code may
* be dependent on properties of the effect that the effect itself doesn't use
@@ -83,12 +92,10 @@
const GrCaps& caps,
uint32_t transformKey,
GrProcessorKeyBuilder* b) {
- size_t processorKeySize = b->size();
+ size_t processorKeySize = b->sizeInBits();
uint32_t classID = fp.classID();
- // Currently we allow 16 bits for the class id and the overall processor key size.
- static const uint32_t kMetaKeyInvalidMask = ~((uint32_t)UINT16_MAX);
- if ((processorKeySize | classID) & kMetaKeyInvalidMask) {
+ if (!processor_meta_data_fits(classID, processorKeySize)) {
return false;
}
@@ -99,54 +106,48 @@
caps.addExtraSamplerKey(b, te.samplerState(), backendFormat);
});
- uint32_t* key = b->add32n(2);
- key[0] = (classID << 16) | SkToU32(processorKeySize);
- key[1] = transformKey;
+ b->addBits(kClassIDBits, classID, "fpClassID");
+ b->addBits(kKeySizeBits, processorKeySize, "fpKeySize");
+ b->add32(transformKey, "fpTransforms");
return true;
}
static bool gen_pp_meta_key(const GrPrimitiveProcessor& pp,
const GrCaps& caps,
- uint32_t transformKey,
GrProcessorKeyBuilder* b) {
- size_t processorKeySize = b->size();
+ size_t processorKeySize = b->sizeInBits();
uint32_t classID = pp.classID();
- // Currently we allow 16 bits for the class id and the overall processor key size.
- static const uint32_t kMetaKeyInvalidMask = ~((uint32_t)UINT16_MAX);
- if ((processorKeySize | classID) & kMetaKeyInvalidMask) {
+ if (!processor_meta_data_fits(classID, processorKeySize)) {
return false;
}
add_pp_sampler_keys(b, pp, caps);
- uint32_t* key = b->add32n(2);
- key[0] = (classID << 16) | SkToU32(processorKeySize);
- key[1] = transformKey;
+ b->addBits(kClassIDBits, classID, "ppClassID");
+ b->addBits(kKeySizeBits, processorKeySize, "ppKeySize");
return true;
}
static bool gen_xp_meta_key(const GrXferProcessor& xp, GrProcessorKeyBuilder* b) {
- size_t processorKeySize = b->size();
+ size_t processorKeySize = b->sizeInBits();
uint32_t classID = xp.classID();
- // Currently we allow 16 bits for the class id and the overall processor key size.
- static const uint32_t kMetaKeyInvalidMask = ~((uint32_t)UINT16_MAX);
- if ((processorKeySize | classID) & kMetaKeyInvalidMask) {
+ if (!processor_meta_data_fits(classID, processorKeySize)) {
return false;
}
- b->add32((classID << 16) | SkToU32(processorKeySize));
+ b->addBits(kClassIDBits, classID, "xpClassID");
+ b->addBits(kKeySizeBits, processorKeySize, "xpKeySize");
return true;
}
-static bool gen_frag_proc_and_meta_keys(const GrPrimitiveProcessor& primProc,
- const GrFragmentProcessor& fp,
+static bool gen_frag_proc_and_meta_keys(const GrFragmentProcessor& fp,
const GrCaps& caps,
GrProcessorKeyBuilder* b) {
for (int i = 0; i < fp.numChildProcessors(); ++i) {
if (auto child = fp.childProcessor(i)) {
- if (!gen_frag_proc_and_meta_keys(primProc, *child, caps, b)) {
+ if (!gen_frag_proc_and_meta_keys(*child, caps, b)) {
return false;
}
} else {
@@ -155,9 +156,10 @@
}
}
+ b->addString([&](){ return fp.name(); });
fp.getGLSLProcessorKey(*caps.shaderCaps(), b);
- return gen_fp_meta_key(fp, caps, primProc.computeCoordTransformsKey(fp), b);
+ return gen_fp_meta_key(fp, caps, GrPrimitiveProcessor::ComputeCoordTransformsKey(fp), b);
}
bool GrProgramDesc::Build(GrProgramDesc* desc,
@@ -179,9 +181,10 @@
GrProcessorKeyBuilder b(&desc->key());
const GrPrimitiveProcessor& primitiveProcessor = programInfo.primProc();
+ b.addString([&](){ return primitiveProcessor.name(); });
primitiveProcessor.getGLSLProcessorKey(*caps.shaderCaps(), &b);
primitiveProcessor.getAttributeKey(&b);
- if (!gen_pp_meta_key(primitiveProcessor, caps, 0, &b)) {
+ if (!gen_pp_meta_key(primitiveProcessor, caps, &b)) {
desc->key().reset();
return false;
}
@@ -190,7 +193,7 @@
int numColorFPs = 0, numCoverageFPs = 0;
for (int i = 0; i < pipeline.numFragmentProcessors(); ++i) {
const GrFragmentProcessor& fp = pipeline.getFragmentProcessor(i);
- if (!gen_frag_proc_and_meta_keys(primitiveProcessor, fp, caps, &b)) {
+ if (!gen_frag_proc_and_meta_keys(fp, caps, &b)) {
desc->key().reset();
return false;
}
@@ -208,6 +211,7 @@
origin = pipeline.dstProxyView().origin();
originIfDstTexture = &origin;
}
+ b.addString([&](){ return xp.name(); });
xp.getGLSLProcessorKey(*caps.shaderCaps(), &b, originIfDstTexture, pipeline.dstSampleType());
if (!gen_xp_meta_key(xp, &b)) {
desc->key().reset();
@@ -220,27 +224,21 @@
}
// Add "header" metadata
- uint32_t header = 0;
- SkDEBUGCODE(uint32_t header_bits = 0);
- auto add_bits = [&](uint32_t nbits, uint32_t val) {
- SkASSERT(val < (1u << nbits));
- SkASSERT((header_bits += nbits) <= 32);
- header = (header << nbits) | val;
- };
- add_bits(16, pipeline.writeSwizzle().asKey());
- add_bits( 1, numColorFPs);
- add_bits( 2, numCoverageFPs);
+ b.addBits(16, pipeline.writeSwizzle().asKey(), "writeSwizzle");
+ b.addBits( 1, numColorFPs, "numColorFPs");
+ b.addBits( 2, numCoverageFPs, "numCoverageFPs");
// If we knew the shader won't depend on origin, we could skip this (and use the same program
// for both origins). Instrumenting all fragment processors would be difficult and error prone.
- add_bits( 2, GrGLSLFragmentShaderBuilder::KeyForSurfaceOrigin(programInfo.origin()));
- add_bits( 1, static_cast<uint32_t>(programInfo.requestedFeatures()));
- add_bits( 1, pipeline.snapVerticesToPixelCenters());
+ b.addBits( 2, GrGLSLFragmentShaderBuilder::KeyForSurfaceOrigin(programInfo.origin()), "origin");
+ b.addBits( 1, static_cast<uint32_t>(programInfo.requestedFeatures()), "requestedFeatures");
+ b.addBits( 1, pipeline.snapVerticesToPixelCenters(), "snapVertices");
// The base descriptor only stores whether or not the primitiveType is kPoints. Backend-
// specific versions (e.g., Vulkan) require more detail
- add_bits( 1, (programInfo.primitiveType() == GrPrimitiveType::kPoints));
+ b.addBits( 1, (programInfo.primitiveType() == GrPrimitiveType::kPoints), "isPoints");
- b.add32(header);
-
+ // Put a clean break between the "common" data written by this function, and any backend data
+ // appended later. The initial key length will just be this portion (rounded to 4 bytes).
+ b.flush();
desc->fInitialKeyLength = desc->keyLength();
return true;