Support specializing uniforms in runtime FPs

http://go/rteffect-uniform-enhancements

Change-Id: Icb69b87049488a3baf234d45fe1e6a3c96a16d5e
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/417856
Commit-Queue: Brian Osman <brianosman@google.com>
Reviewed-by: John Stiles <johnstiles@google.com>
Reviewed-by: Michael Ludwig <michaelludwig@google.com>
diff --git a/src/gpu/effects/GrSkSLFP.cpp b/src/gpu/effects/GrSkSLFP.cpp
index 8cbf561..e6e3a96 100644
--- a/src/gpu/effects/GrSkSLFP.cpp
+++ b/src/gpu/effects/GrSkSLFP.cpp
@@ -9,6 +9,7 @@
 
 #include "include/effects/SkRuntimeEffect.h"
 #include "include/private/GrContext_Base.h"
+#include "include/private/SkSLString.h"
 #include "src/core/SkRuntimeEffectPriv.h"
 #include "src/core/SkVM.h"
 #include "src/gpu/GrBaseContextPriv.h"
@@ -41,8 +42,15 @@
             FPCallbacks(GrGLSLSkSLFP* self,
                         EmitArgs& args,
                         const char* inputColor,
-                        const SkSL::Context& context)
-                    : fSelf(self), fArgs(args), fInputColor(inputColor), fContext(context) {}
+                        const SkSL::Context& context,
+                        const uint8_t* uniformData,
+                        const GrSkSLFP::UniformFlags* uniformFlags)
+                    : fSelf(self)
+                    , fArgs(args)
+                    , fInputColor(inputColor)
+                    , fContext(context)
+                    , fUniformData(uniformData)
+                    , fUniformFlags(uniformFlags) {}
 
             using String = SkSL::String;
 
@@ -56,6 +64,11 @@
                 }
 
                 const SkSL::Type* type = &var.type();
+                size_t sizeInBytes = type->slotCount() * sizeof(float);
+                const float* floatData = reinterpret_cast<const float*>(fUniformData);
+                const int* intData = reinterpret_cast<const int*>(fUniformData);
+                fUniformData += sizeInBytes;
+
                 bool isArray = false;
                 if (type->isArray()) {
                     type = &type->componentType();
@@ -64,6 +77,23 @@
 
                 GrSLType gpuType;
                 SkAssertResult(SkSL::type_to_grsltype(fContext, *type, &gpuType));
+
+                if (*fUniformFlags++ & GrSkSLFP::kSpecialize_Flag) {
+                    SkASSERTF(!isArray, "specializing array uniforms is not allowed");
+                    String value = GrGLSLTypeString(gpuType);
+                    value.append("(");
+
+                    bool isFloat = GrSLTypeIsFloatType(gpuType);
+                    size_t slots = type->slotCount();
+                    for (size_t i = 0; i < slots; ++i) {
+                        value.append(isFloat ? SkSL::to_string(floatData[i])
+                                             : SkSL::to_string(intData[i]));
+                        value.append(",");
+                    }
+                    value.back() = ')';
+                    return value;
+                }
+
                 const char* uniformName = nullptr;
                 auto handle =
                         fArgs.fUniformHandler->addUniformArray(&fArgs.fFp.cast<GrSkSLFP>(),
@@ -120,10 +150,13 @@
                                       .c_str());
             }
 
-            GrGLSLSkSLFP*        fSelf;
-            EmitArgs&            fArgs;
-            const char*          fInputColor;
-            const SkSL::Context& fContext;
+            GrGLSLSkSLFP*                 fSelf;
+            EmitArgs&                     fArgs;
+            const char*                   fInputColor;
+            const SkSL::Context&          fContext;
+            const uint8_t*                fUniformData;
+            const GrSkSLFP::UniformFlags* fUniformFlags;
+            int                           fUniformIndex = 0;
         };
 
         // Snap off a global copy of the input color at the start of main. We need this when
@@ -143,9 +176,14 @@
             args.fFragBuilder->codeAppendf("float2 %s = %s;\n", coords, args.fSampleCoord);
         }
 
-        FPCallbacks callbacks(this, args, inputColorCopy.c_str(), *program.fContext);
-        SkSL::PipelineStage::ConvertProgram(program, coords, args.fInputColor, "half4(1)",
-                                            &callbacks);
+        FPCallbacks callbacks(this,
+                              args,
+                              inputColorCopy.c_str(),
+                              *program.fContext,
+                              fp.uniformData(),
+                              fp.uniformFlags());
+        SkSL::PipelineStage::ConvertProgram(
+                program, coords, args.fInputColor, "half4(1)", &callbacks);
     }
 
     void onSetData(const GrGLSLProgramDataManager& pdman,
@@ -153,8 +191,12 @@
         using Type = SkRuntimeEffect::Uniform::Type;
         size_t uniIndex = 0;
         const GrSkSLFP& outer = _proc.cast<GrSkSLFP>();
-        const uint8_t* uniformData = (const uint8_t*)outer.uniformData();
+        const uint8_t* uniformData = outer.uniformData();
+        const GrSkSLFP::UniformFlags* uniformFlags = outer.uniformFlags();
         for (const auto& v : outer.fEffect->uniforms()) {
+            if (*uniformFlags++ & GrSkSLFP::kSpecialize_Flag) {
+                continue;
+            }
             const UniformHandle handle = fUniformHandles[uniIndex++];
             auto floatData = [=] { return SkTAddOffset<const float>(uniformData, v.offset); };
             auto intData = [=] { return SkTAddOffset<const int>(uniformData, v.offset); };
@@ -190,7 +232,9 @@
         return nullptr;
     }
     size_t uniformSize = uniforms->size();
-    std::unique_ptr<GrSkSLFP> fp(new (uniformSize) GrSkSLFP(std::move(effect), name));
+    size_t uniformFlagSize = effect->uniforms().count() * sizeof(UniformFlags);
+    std::unique_ptr<GrSkSLFP> fp(new (uniformSize + uniformFlagSize)
+                                         GrSkSLFP(std::move(effect), name));
     sk_careful_memcpy(fp->uniformData(), uniforms->data(), uniformSize);
     return fp;
 }
@@ -203,6 +247,7 @@
         , fEffect(std::move(effect))
         , fName(name)
         , fUniformSize(fEffect->uniformSize()) {
+    memset(this->uniformFlags(), 0, fEffect->uniforms().count() * sizeof(UniformFlags));
     if (fEffect->usesSampleCoords()) {
         this->setUsesSampleCoordsDirectly();
     }
@@ -213,6 +258,9 @@
         , fEffect(other.fEffect)
         , fName(other.fName)
         , fUniformSize(other.fUniformSize) {
+    sk_careful_memcpy(this->uniformFlags(),
+                      other.uniformFlags(),
+                      fEffect->uniforms().count() * sizeof(UniformFlags));
     sk_careful_memcpy(this->uniformData(), other.uniformData(), fUniformSize);
 
     if (fEffect->usesSampleCoords()) {
@@ -222,10 +270,6 @@
     this->cloneAndRegisterAllChildProcessors(other);
 }
 
-const char* GrSkSLFP::name() const {
-    return fName;
-}
-
 void GrSkSLFP::addChild(std::unique_ptr<GrFragmentProcessor> child) {
     int childIndex = this->numChildProcessors();
     SkASSERT((size_t)childIndex < fEffect->fSampleUsages.size());
@@ -243,17 +287,34 @@
     // amount of uniform data.
     b->add32(fEffect->hash());
     b->add32(SkToU32(fUniformSize));
+
+    const UniformFlags* flags = this->uniformFlags();
+    const uint8_t* uniformData = this->uniformData();
+    size_t uniformCount = fEffect->uniforms().count();
+    auto iter = fEffect->uniforms().begin();
+
+    for (size_t i = 0; i < uniformCount; ++i) {
+        bool specialize = flags[i] & kSpecialize_Flag;
+        b->addBool(specialize, "specialize");
+        if (specialize) {
+            b->addBytes(iter->sizeInBytes(), uniformData + iter->offset, iter->name.c_str());
+        }
+    }
 }
 
 bool GrSkSLFP::onIsEqual(const GrFragmentProcessor& other) const {
     const GrSkSLFP& sk = other.cast<GrSkSLFP>();
+    const size_t uniformFlagSize = fEffect->uniforms().count() * sizeof(UniformFlags);
     return fEffect->hash() == sk.fEffect->hash() &&
+           fEffect->uniforms().count() == sk.fEffect->uniforms().count() &&
            fUniformSize == sk.fUniformSize &&
-           !sk_careful_memcmp(this->uniformData(), sk.uniformData(), fUniformSize);
+           !sk_careful_memcmp(
+                   this->uniformData(), sk.uniformData(), fUniformSize + uniformFlagSize);
 }
 
 std::unique_ptr<GrFragmentProcessor> GrSkSLFP::clone() const {
-    return std::unique_ptr<GrFragmentProcessor>(new (fUniformSize) GrSkSLFP(*this));
+    return std::unique_ptr<GrFragmentProcessor>(new (UniformPayloadSize(fEffect.get()))
+                                                        GrSkSLFP(*this));
 }
 
 SkPMColor4f GrSkSLFP::constantOutputForConstantInput(const SkPMColor4f& inputColor) const {
diff --git a/src/gpu/effects/GrSkSLFP.h b/src/gpu/effects/GrSkSLFP.h
index fa9f548..94498c7 100644
--- a/src/gpu/effects/GrSkSLFP.h
+++ b/src/gpu/effects/GrSkSLFP.h
@@ -43,6 +43,23 @@
 
 class GrSkSLFP : public GrFragmentProcessor {
 public:
+    template <typename T> struct GrSpecializedUniform {
+        bool specialize;
+        T value;
+    };
+    template <typename T>
+    static GrSpecializedUniform<T> Specialize(const T& value) {
+        return {true, value};
+    }
+    template <typename T>
+    static GrSpecializedUniform<T> SpecializeIf(bool condition, const T& value) {
+        return {condition, value};
+    }
+
+    enum UniformFlags : uint8_t {
+        kSpecialize_Flag = 0x1,
+    };
+
     /**
      * Creates a new fragment processor from an SkRuntimeEffect and a data blob containing values
      * for all of the 'uniform' variables in the SkSL source. The layout of the uniforms blob is
@@ -52,7 +69,7 @@
                                           const char* name,
                                           sk_sp<SkData> uniforms);
 
-    const char* name() const override;
+    const char* name() const override { return fName; }
 
     void addChild(std::unique_ptr<GrFragmentProcessor> child);
 
@@ -98,9 +115,9 @@
                   std::forward<Args>(args)...);
 #endif
 
-        size_t uniformSize = effect->uniformSize();
-        std::unique_ptr<GrSkSLFP> fp(new (uniformSize) GrSkSLFP(std::move(effect), name));
-        fp->appendArgs(fp->uniformData(), std::forward<Args>(args)...);
+        size_t uniformPayloadSize = UniformPayloadSize(effect.get());
+        std::unique_ptr<GrSkSLFP> fp(new (uniformPayloadSize) GrSkSLFP(std::move(effect), name));
+        fp->appendArgs(fp->uniformData(), fp->uniformFlags(), std::forward<Args>(args)...);
         return fp;
     }
 
@@ -116,22 +133,67 @@
 
     SkPMColor4f constantOutputForConstantInput(const SkPMColor4f&) const override;
 
-    void* uniformData() const { return (void*)(this + 1); }
+    // An instance of GrSkSLFP is always allocated with a payload immediately following the FP.
+    // First the the values of all the uniforms, and then a set of flags (one per uniform).
+    static size_t UniformPayloadSize(const SkRuntimeEffect* effect) {
+        return effect->uniformSize() + effect->uniforms().count() * sizeof(UniformFlags);
+    }
+
+    const uint8_t* uniformData() const { return reinterpret_cast<const uint8_t*>(this + 1); }
+          uint8_t* uniformData()       { return reinterpret_cast<      uint8_t*>(this + 1); }
+
+    const UniformFlags* uniformFlags() const {
+        return reinterpret_cast<const UniformFlags*>(this->uniformData() + fUniformSize);
+    }
+    UniformFlags* uniformFlags() {
+        return reinterpret_cast<UniformFlags*>(this->uniformData() + fUniformSize);
+    }
 
     // Helpers to attach variadic template args to a newly constructed FP:
-    void appendArgs(void* ptr) {}
+
+    void appendArgs(uint8_t* uniformDataPtr, UniformFlags* uniformFlagsPtr) {
+        // Base case -- no more args to append, so we're done
+    }
     template <typename... Args>
-    void appendArgs(void* ptr,
+    void appendArgs(uint8_t* uniformDataPtr,
+                    UniformFlags* uniformFlagsPtr,
                     const char* name,
                     std::unique_ptr<GrFragmentProcessor>&& child,
                     Args&&... remainder) {
+        // Child FP case -- register the child, then continue processing the remaining arguments.
+        // Children aren't "uniforms" here, so the data & flags pointers don't advance.
         this->addChild(std::move(child));
-        this->appendArgs(ptr, std::forward<Args>(remainder)...);
+        this->appendArgs(uniformDataPtr, uniformFlagsPtr, std::forward<Args>(remainder)...);
     }
     template <typename T, typename... Args>
-    void appendArgs(void* ptr, const char* name, const T& val, Args&&... remainder) {
-        memcpy(ptr, &val, sizeof(val));
-        this->appendArgs(SkTAddOffset<void>(ptr, sizeof(val)), std::forward<Args>(remainder)...);
+    void appendArgs(uint8_t* uniformDataPtr,
+                    UniformFlags* uniformFlagsPtr,
+                    const char* name,
+                    const GrSpecializedUniform<T>& val,
+                    Args&&... remainder) {
+        // Specialized uniform case -- This just handles the specialization logic. If we want to
+        // specialize on this particular value, set the flag. Then, continue processing the actual
+        // value (by just peeling off the wrapper). This lets our generic `const T&` case (below)
+        // handle copying the data into our uniform block, and advancing the per-value uniform
+        // data and flags pointers.
+        if (val.specialize) {
+            *uniformFlagsPtr = static_cast<UniformFlags>(*uniformFlagsPtr | kSpecialize_Flag);
+        }
+        this->appendArgs(
+                uniformDataPtr, uniformFlagsPtr, name, val.value, std::forward<Args>(remainder)...);
+    }
+    template <typename T, typename... Args>
+    void appendArgs(uint8_t* uniformDataPtr,
+                    UniformFlags* uniformFlagsPtr,
+                    const char* name,
+                    const T& val,
+                    Args&&... remainder) {
+        // Raw uniform value case -- We copy the supplied value into our uniform data area,
+        // then advance our uniform data and flags pointers.
+        memcpy(uniformDataPtr, &val, sizeof(val));
+        uniformDataPtr += sizeof(val);
+        uniformFlagsPtr++;
+        this->appendArgs(uniformDataPtr, uniformFlagsPtr, std::forward<Args>(remainder)...);
     }
 
 #ifdef SK_DEBUG
@@ -169,6 +231,17 @@
                           child_iterator cIter,
                           child_iterator cEnd,
                           const char* name,
+                          const GrSpecializedUniform<T>& val,
+                          Args&&... remainder) {
+        static_assert(!std::is_array<T>::value);  // No specializing arrays
+        checkArgs(uIter, uEnd, cIter, cEnd, name, val.value, std::forward<Args>(remainder)...);
+    }
+    template <typename T, typename... Args>
+    static void checkArgs(uniform_iterator uIter,
+                          uniform_iterator uEnd,
+                          child_iterator cIter,
+                          child_iterator cEnd,
+                          const char* name,
                           const T& val,
                           Args&&... remainder) {
         SkASSERTF(uIter != uEnd, "Too many uniforms, wasn't expecting '%s'", name);
diff --git a/tests/SkRuntimeEffectTest.cpp b/tests/SkRuntimeEffectTest.cpp
index dea6c42..5e889ac 100644
--- a/tests/SkRuntimeEffectTest.cpp
+++ b/tests/SkRuntimeEffectTest.cpp
@@ -17,6 +17,7 @@
 #include "src/core/SkRuntimeEffectPriv.h"
 #include "src/core/SkTLazy.h"
 #include "src/gpu/GrColor.h"
+#include "src/gpu/GrDirectContextPriv.h"
 #include "src/gpu/GrFragmentProcessor.h"
 #include "src/gpu/effects/GrSkSLFP.h"
 #include "tests/Test.h"
@@ -632,3 +633,39 @@
     test("half4 helper(float2 xy) { return sample(child, xy); }"
          "half4 main(float2 xy) { return helper(xy); }", true, true);
 }
+
+DEF_GPUTEST_FOR_ALL_CONTEXTS(GrSkSLFP_Specialized, r, ctxInfo) {
+    struct FpAndKey {
+        std::unique_ptr<GrFragmentProcessor> fp;
+        SkTArray<uint32_t, true>             key;
+    };
+
+    // Constant color, but with a similar option to GrOverrideInputFragmentProcessor
+    // specialize decides if the color is inserted in the SkSL as a literal, or left as a uniform
+    auto make_color_fp = [&](SkPMColor4f color, bool specialize) {
+        auto effect = SkMakeRuntimeEffect(SkRuntimeEffect::MakeForShader, R"(
+            uniform half4 color;
+            half4 main(float2 xy) { return color; }
+        )");
+        FpAndKey result;
+        result.fp = GrSkSLFP::Make(
+                std::move(effect), "color_fp", "color", GrSkSLFP::SpecializeIf(specialize, color));
+        GrProcessorKeyBuilder builder(&result.key);
+        result.fp->getGLSLProcessorKey(*ctxInfo.directContext()->priv().caps()->shaderCaps(),
+                                       &builder);
+        builder.flush();
+        return result;
+    };
+
+    FpAndKey uRed   = make_color_fp({1, 0, 0, 1}, false),
+             uGreen = make_color_fp({0, 1, 0, 1}, false),
+             sRed   = make_color_fp({1, 0, 0, 1}, true),
+             sGreen = make_color_fp({0, 1, 0, 1}, true);
+
+    // uRed and uGreen should have the same key - they just have different uniforms
+    SkASSERT(uRed.key == uGreen.key);
+    // sRed and sGreen should have keys that are different from the uniform case, and each other
+    SkASSERT(sRed.key != uRed.key);
+    SkASSERT(sGreen.key != uRed.key);
+    SkASSERT(sRed.key != sGreen.key);
+}