Revert "Support large kernels on GPU in matrix convolution effect"

This reverts commit 1ed4391fe7d218caf21754de0932b5e96043621e.

Reason for revert: Looks like some bad images showed up at gold.skia.org and that the ProcessorCloneTest is crashing on Windows bots:
https://logs.chromium.org/logs/skia/4bfabe0bad476911/+/steps/dm/0/stdout

Original change's description:
> Support large kernels on GPU in matrix convolution effect
> 
> Currently matrix convolution falls back to CPU execution for large kernels, due to the argument limit for fragment shaders.
> 
> Now for large kernels, we store them in a texture and sample them in a shader to sidestep the limit.
> 
> Change-Id: Icc069a701ea8e9cd0adf75f4bfd149fd22e31afd
> Bug: skia:8449
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/263495
> Reviewed-by: Michael Ludwig <michaelludwig@google.com>
> Commit-Queue: Adlai Holler <adlai@google.com>

TBR=robertphillips@google.com,michaelludwig@google.com,adlai@google.com

Change-Id: Iaf4858131046a343481bcf0fd9cc3919d9fc2bda
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: skia:8449
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/287736
Reviewed-by: Brian Salomon <bsalomon@google.com>
Commit-Queue: Brian Salomon <bsalomon@google.com>
diff --git a/src/gpu/effects/GrMatrixConvolutionEffect.cpp b/src/gpu/effects/GrMatrixConvolutionEffect.cpp
index c108d34..8bb5a56 100644
--- a/src/gpu/effects/GrMatrixConvolutionEffect.cpp
+++ b/src/gpu/effects/GrMatrixConvolutionEffect.cpp
@@ -6,10 +6,6 @@
  */
 #include "src/gpu/effects/GrMatrixConvolutionEffect.h"
 
-#include "src/gpu/GrBitmapTextureMaker.h"
-#include "src/gpu/GrContextPriv.h"
-#include "src/gpu/GrProxyProvider.h"
-#include "src/gpu/GrRecordingContextPriv.h"
 #include "src/gpu/GrTexture.h"
 #include "src/gpu/GrTextureProxy.h"
 #include "src/gpu/effects/GrTextureEffect.h"
@@ -34,151 +30,52 @@
     UniformHandle               fKernelOffsetUni;
     UniformHandle               fGainUni;
     UniformHandle               fBiasUni;
-    UniformHandle               fKernelBiasUni;
 
     typedef GrGLSLFragmentProcessor INHERITED;
 };
 
-GrMatrixConvolutionEffect::KernelWrapper GrMatrixConvolutionEffect::KernelWrapper::Make(
-        GrRecordingContext* context, SkISize size, const SkScalar* values) {
-    if (nullptr == context || nullptr == values || size.isEmpty()) {
-        return {};
-    }
-    const int length = size.area();
-    // Small kernel -> just fill the array.
-    KernelWrapper result(size);
-    if (length <= kMaxUniformSize) {
-        for (int i = 0; i < length; i++) {
-            result.fArray[i] = SkScalarToFloat(values[i]);
-        }
-        return result;
-    }
-
-    ScalableSampler& scalableSampler = result.fScalableSampler;
-    // Determine min and max values to figure out inner gain & bias.
-    SkScalar min = values[0];
-    SkScalar max = values[0];
-    for (int i = 1; i < length; i++) {
-        if (values[i] < min) {
-            min = values[i];
-        }
-        if (values[i] > max) {
-            max = values[i];
-        }
-    }
-    // Treat near-0 gain (i.e. box blur) as 1, and let the kernelBias
-    // move everything up to the final value.
-    const SkScalar computedGain = max - min;
-    scalableSampler.fGain = SkScalarNearlyZero(computedGain) ? 1.0f : SkScalarToFloat(computedGain);
-    // Inner bias is pre-inner-gain so we divide that out.
-    scalableSampler.fBias = SkScalarToFloat(min) / scalableSampler.fGain;
-
-    static const GrUniqueKey::Domain kDomain = GrUniqueKey::GenerateDomain();
-    GrUniqueKey key;
-    GrUniqueKey::Builder builder(&key, kDomain, length, "Matrix Convolution Kernel");
-    // Texture cache key is the exact content of the kernel.
-    static_assert(sizeof(float) == 4);
-    for (int i = 0; i < length; i++) {
-        builder[i] = *(const uint32_t*)&values[i];
-    }
-    builder.finish();
-
-    // Find or create a texture.
-    GrProxyProvider* proxyProvider = context->priv().proxyProvider();
-    GrSurfaceProxyView view;
-    if (sk_sp<GrTextureProxy> kernelProxy = proxyProvider->findOrCreateProxyByUniqueKey(key)) {
-        GrSwizzle swizzle =
-            context->priv().caps()->getReadSwizzle(kernelProxy->backendFormat(),
-                                                   GrColorType::kAlpha_8);
-        view = {std::move(kernelProxy), kTopLeft_GrSurfaceOrigin, swizzle};
-    } else {
-        SkBitmap bm;
-        if (!bm.tryAllocPixels(SkImageInfo::MakeA8(GrNextPow2(length), 1))) {
-            return {};
-        }
-        for (int i = 0; i < length; i++) {
-            *(bm.getAddr8(i, 0)) =
-                SkScalarRoundToInt((values[i] - min) / scalableSampler.fGain * 255);
-        }
-        bm.setImmutable();
-        GrBitmapTextureMaker maker(context, bm, GrImageTexGenPolicy::kNew_Uncached_Budgeted);
-        view = maker.view(GrMipMapped::kNo);
-        if (!view) {
-            return {};
-        }
-        proxyProvider->assignUniqueKeyToProxy(key, view.asTextureProxy());
-    }
-    scalableSampler.fSampler = { std::move(view) };
-    return result;
-}
-
-bool GrMatrixConvolutionEffect::KernelWrapper::operator==(const KernelWrapper& k) const {
-    return fSize == k.fSize &&
-           (this->isSampled() ? fScalableSampler == k.fScalableSampler : fArray == k.fArray);
-}
-
-bool GrMatrixConvolutionEffect::KernelWrapper::ScalableSampler::operator==(
-                                                                const ScalableSampler& k) const {
-    return fSampler == k.fSampler && fGain == k.fGain && fBias == k.fBias;
-}
-
 void GrGLMatrixConvolutionEffect::emitCode(EmitArgs& args) {
     const GrMatrixConvolutionEffect& mce = args.fFp.cast<GrMatrixConvolutionEffect>();
 
-    int kernelWidth = mce.kernelSize().width();
-    int kernelHeight = mce.kernelSize().height();
+    int kWidth = mce.kernelSize().width();
+    int kHeight = mce.kernelSize().height();
 
-    int arrayCount = (kernelWidth * kernelHeight + 3) / 4;
-    SkASSERT(4 * arrayCount >= kernelWidth * kernelHeight);
+    int arrayCount = (kWidth * kHeight + 3) / 4;
+    SkASSERT(4 * arrayCount >= kWidth * kHeight);
 
     GrGLSLUniformHandler* uniformHandler = args.fUniformHandler;
-    if (mce.kernelIsSampled()) {
-        fKernelBiasUni = uniformHandler->addUniform(&mce, kFragment_GrShaderFlag,
-                                                    kFloat_GrSLType, "KernelBias");
-    } else {
-        fKernelUni = uniformHandler->addUniformArray(&mce, kFragment_GrShaderFlag,
-                                                     kFloat4_GrSLType, "Kernel", arrayCount);
-    }
+    fKernelUni = uniformHandler->addUniformArray(&mce, kFragment_GrShaderFlag, kHalf4_GrSLType,
+                                                 "Kernel",
+                                                 arrayCount);
     fKernelOffsetUni = uniformHandler->addUniform(&mce, kFragment_GrShaderFlag, kHalf2_GrSLType,
                                                   "KernelOffset");
-    fGainUni = uniformHandler->addUniform(&mce, kFragment_GrShaderFlag, kFloat_GrSLType, "Gain");
-    fBiasUni = uniformHandler->addUniform(&mce, kFragment_GrShaderFlag, kFloat_GrSLType, "Bias");
+    fGainUni = uniformHandler->addUniform(&mce, kFragment_GrShaderFlag, kHalf_GrSLType, "Gain");
+    fBiasUni = uniformHandler->addUniform(&mce, kFragment_GrShaderFlag, kHalf_GrSLType, "Bias");
 
     const char* kernelOffset = uniformHandler->getUniformCStr(fKernelOffsetUni);
+    const char* kernel = uniformHandler->getUniformCStr(fKernelUni);
     const char* gain = uniformHandler->getUniformCStr(fGainUni);
     const char* bias = uniformHandler->getUniformCStr(fBiasUni);
 
     GrGLSLFPFragmentBuilder* fragBuilder = args.fFragBuilder;
     SkString coords2D = fragBuilder->ensureCoords2D(args.fTransformedCoords[0].fVaryingPoint,
                                                     mce.sampleMatrix());
-    fragBuilder->codeAppend("float4 sum = float4(0, 0, 0, 0);");
+    fragBuilder->codeAppend("half4 sum = half4(0, 0, 0, 0);");
     fragBuilder->codeAppendf("float2 coord = %s - %s;", coords2D.c_str(), kernelOffset);
-    fragBuilder->codeAppend("float4 c;");
+    fragBuilder->codeAppend("half4 c;");
 
-    for (int y = 0; y < kernelHeight; y++) {
-        for (int x = 0; x < kernelWidth; x++) {
+    const char* kVecSuffix[4] = { ".x", ".y", ".z", ".w" };
+    for (int y = 0; y < kHeight; y++) {
+        for (int x = 0; x < kWidth; x++) {
             GrGLSLShaderBuilder::ShaderBlock block(fragBuilder);
-            int offset = y*kernelWidth + x;
+            int offset = y*kWidth + x;
 
-            if (mce.kernelIsSampled()) {
-                const char* kernelBias = uniformHandler->getUniformCStr(fKernelBiasUni);
-                float xCoord = offset / (float)GrNextPow2(mce.kernelSize().area());
-
-                fragBuilder->codeAppend("float k = ");
-                fragBuilder->appendTextureLookup(args.fTexSamplers[0],
-                                         SkSL::String::printf("half2(%f, 0.5)", xCoord).c_str());
-                fragBuilder->codeAppendf(".w + %s;", kernelBias);
-            } else {
-                static constexpr const char* kVecSuffix[4] = { ".x", ".y", ".z", ".w" };
-                const char* kernel = uniformHandler->getUniformCStr(fKernelUni);
-                fragBuilder->codeAppendf("float k = %s[%d]%s;", kernel, offset / 4,
-                                         kVecSuffix[offset & 0x3]);
-            }
-
+            fragBuilder->codeAppendf("half k = %s[%d]%s;", kernel, offset / 4,
+                                     kVecSuffix[offset & 0x3]);
             SkSL::String coord;
             coord.appendf("coord + half2(%d, %d)", x, y);
             auto sample = this->invokeChild(0, args, coord);
-            fragBuilder->codeAppendf("float4 c = %s;", sample.c_str());
+            fragBuilder->codeAppendf("half4 c = %s;", sample.c_str());
             if (!mce.convolveAlpha()) {
                 fragBuilder->codeAppend("c.rgb /= c.a;");
                 fragBuilder->codeAppend("c.rgb = saturate(c.rgb);");
@@ -187,16 +84,15 @@
         }
     }
     if (mce.convolveAlpha()) {
-        fragBuilder->codeAppendf("%s = half4(sum * %s + %s);", args.fOutputColor, gain, bias);
+        fragBuilder->codeAppendf("%s = sum * %s + %s;", args.fOutputColor, gain, bias);
         fragBuilder->codeAppendf("%s.a = saturate(%s.a);", args.fOutputColor, args.fOutputColor);
         fragBuilder->codeAppendf("%s.rgb = clamp(%s.rgb, 0.0, %s.a);",
                                  args.fOutputColor, args.fOutputColor, args.fOutputColor);
     } else {
         auto sample = this->invokeChild(0, args, coords2D.c_str());
         fragBuilder->codeAppendf("c = %s;", sample.c_str());
-        fragBuilder->codeAppendf("%s.a = half(c.a);", args.fOutputColor);
-        fragBuilder->codeAppendf("%s.rgb = half3(saturate(sum.rgb * %s + %s));",
-                                 args.fOutputColor, gain, bias);
+        fragBuilder->codeAppendf("%s.a = c.a;", args.fOutputColor);
+        fragBuilder->codeAppendf("%s.rgb = saturate(sum.rgb * %s + %s);", args.fOutputColor, gain, bias);
         fragBuilder->codeAppendf("%s.rgb *= %s.a;", args.fOutputColor, args.fOutputColor);
     }
     fragBuilder->codeAppendf("%s *= %s;\n", args.fOutputColor, args.fInputColor);
@@ -215,22 +111,17 @@
                                             const GrFragmentProcessor& processor) {
     const GrMatrixConvolutionEffect& conv = processor.cast<GrMatrixConvolutionEffect>();
     pdman.set2fv(fKernelOffsetUni, 1, conv.kernelOffset().ptr());
-    float totalGain = conv.gain();
-    if (conv.kernelIsSampled()) {
-        totalGain *= conv.kernelSampleGain();
-        pdman.set1f(fKernelBiasUni, conv.kernelSampleBias());
-    } else {
-        int kernelCount = conv.kernelSize().area();
-        int arrayCount = (kernelCount + 3) / 4;
-        SkASSERT(4 * arrayCount >= kernelCount);
-        pdman.set4fv(fKernelUni, arrayCount, conv.kernel());
-    }
+    int kernelCount = conv.kernelSize().width() * conv.kernelSize().height();
+    int arrayCount = (kernelCount + 3) / 4;
+    SkASSERT(4 * arrayCount >= kernelCount);
+    pdman.set4fv(fKernelUni, arrayCount, conv.kernel());
+    pdman.set1f(fGainUni, conv.gain());
     pdman.set1f(fBiasUni, conv.bias());
-    pdman.set1f(fGainUni, totalGain);
 }
 
 GrMatrixConvolutionEffect::GrMatrixConvolutionEffect(std::unique_ptr<GrFragmentProcessor> child,
-                                                     KernelWrapper kernel,
+                                                     const SkISize& kernelSize,
+                                                     const SkScalar* kernel,
                                                      SkScalar gain,
                                                      SkScalar bias,
                                                      const SkIPoint& kernelOffset,
@@ -238,14 +129,14 @@
         // To advertise either the modulation or opaqueness optimizations we'd have to examine the
         // parameters.
         : INHERITED(kGrMatrixConvolutionEffect_ClassID, kNone_OptimizationFlags)
-        , fKernel(std::move(kernel))
+        , fKernelSize(kernelSize)
         , fGain(SkScalarToFloat(gain))
         , fBias(SkScalarToFloat(bias) / 255.0f)
         , fConvolveAlpha(convolveAlpha) {
     child->setSampledWithExplicitCoords();
     this->registerChildProcessor(std::move(child));
-    if (fKernel.isSampled()) {
-        this->setTextureSamplerCnt(1);
+    for (int i = 0; i < kernelSize.width() * kernelSize.height(); i++) {
+        fKernel[i] = SkScalarToFloat(kernel[i]);
     }
     fKernelOffset = {static_cast<float>(kernelOffset.x()),
                      static_cast<float>(kernelOffset.y())};
@@ -254,7 +145,7 @@
 
 GrMatrixConvolutionEffect::GrMatrixConvolutionEffect(const GrMatrixConvolutionEffect& that)
         : INHERITED(kGrMatrixConvolutionEffect_ClassID, kNone_OptimizationFlags)
-        , fKernel(that.fKernel)
+        , fKernelSize(that.fKernelSize)
         , fGain(that.fGain)
         , fBias(that.fBias)
         , fKernelOffset(that.fKernelOffset)
@@ -262,9 +153,7 @@
     auto child = that.childProcessor(0).clone();
     child->setSampledWithExplicitCoords();
     this->registerChildProcessor(std::move(child));
-    if (fKernel.isSampled()) {
-        this->setTextureSamplerCnt(1);
-    }
+    std::copy_n(that.fKernel, fKernelSize.width() * fKernelSize.height(), fKernel);
     this->addCoordTransform(&fCoordTransform);
 }
 
@@ -283,18 +172,14 @@
 
 bool GrMatrixConvolutionEffect::onIsEqual(const GrFragmentProcessor& sBase) const {
     const GrMatrixConvolutionEffect& s = sBase.cast<GrMatrixConvolutionEffect>();
-    return fKernel == s.fKernel &&
+    return fKernelSize == s.kernelSize() &&
+           std::equal(fKernel, fKernel + fKernelSize.area(), s.fKernel) &&
            fGain == s.gain() &&
            fBias == s.bias() &&
            fKernelOffset == s.kernelOffset() &&
            fConvolveAlpha == s.convolveAlpha();
 }
 
-const GrFragmentProcessor::TextureSampler& GrMatrixConvolutionEffect::onTextureSampler(
-        int index) const {
-    return IthTextureSampler(index, fKernel.scalableSampler().fSampler);
-}
-
 static void fill_in_1D_gaussian_kernel_with_stride(float* kernel, int size, int stride,
                                                    float twoSigmaSqrd) {
     SkASSERT(!SkScalarNearlyZero(twoSigmaSqrd, SK_ScalarNearlyZero));
@@ -319,6 +204,7 @@
 
 static void fill_in_2D_gaussian_kernel(float* kernel, int width, int height,
                                        SkScalar sigmaX, SkScalar sigmaY) {
+    SkASSERT(width * height <= MAX_KERNEL_SIZE);
     const float twoSigmaSqrdX = 2.0f * SkScalarToFloat(SkScalarSquare(sigmaX));
     const float twoSigmaSqrdY = 2.0f * SkScalarToFloat(SkScalarSquare(sigmaY));
 
@@ -374,8 +260,7 @@
     }
 }
 
-std::unique_ptr<GrFragmentProcessor> GrMatrixConvolutionEffect::Make(GrRecordingContext* context,
-                                                                     GrSurfaceProxyView srcView,
+std::unique_ptr<GrFragmentProcessor> GrMatrixConvolutionEffect::Make(GrSurfaceProxyView srcView,
                                                                      const SkIRect& srcBounds,
                                                                      const SkISize& kernelSize,
                                                                      const SkScalar* kernel,
@@ -385,19 +270,14 @@
                                                                      GrSamplerState::WrapMode wm,
                                                                      bool convolveAlpha,
                                                                      const GrCaps& caps) {
-    auto kw = KernelWrapper::Make(context, kernelSize, kernel);
-    if (!kw.isValid()) {
-        return nullptr;
-    }
     GrSamplerState sampler(wm, GrSamplerState::Filter::kNearest);
     auto child = GrTextureEffect::MakeSubset(std::move(srcView), kPremul_SkAlphaType, SkMatrix::I(),
                                              sampler, SkRect::Make(srcBounds), caps);
     return std::unique_ptr<GrFragmentProcessor>(new GrMatrixConvolutionEffect(
-            std::move(child), std::move(kw), gain, bias, kernelOffset, convolveAlpha));
+            std::move(child), kernelSize, kernel, gain, bias, kernelOffset, convolveAlpha));
 }
 
 std::unique_ptr<GrFragmentProcessor> GrMatrixConvolutionEffect::MakeGaussian(
-        GrRecordingContext* context,
         GrSurfaceProxyView srcView,
         const SkIRect& srcBounds,
         const SkISize& kernelSize,
@@ -409,11 +289,11 @@
         SkScalar sigmaX,
         SkScalar sigmaY,
         const GrCaps& caps) {
-    SkAutoSTMalloc<32, float> kernel(kernelSize.area());
-    fill_in_2D_gaussian_kernel(kernel.get(), kernelSize.width(), kernelSize.height(),
-                               sigmaX, sigmaY);
-    return Make(context, std::move(srcView), srcBounds, kernelSize, kernel.get(),
-                gain, bias, kernelOffset, wm, convolveAlpha, caps);
+    float kernel[MAX_KERNEL_SIZE];
+
+    fill_in_2D_gaussian_kernel(kernel, kernelSize.width(), kernelSize.height(), sigmaX, sigmaY);
+    return Make(std::move(srcView), srcBounds, kernelSize, kernel, gain, bias, kernelOffset, wm,
+                convolveAlpha, caps);
 }
 
 GR_DEFINE_FRAGMENT_PROCESSOR_TEST(GrMatrixConvolutionEffect);
@@ -422,9 +302,8 @@
 std::unique_ptr<GrFragmentProcessor> GrMatrixConvolutionEffect::TestCreate(GrProcessorTestData* d) {
     auto [view, ct, at] = d->randomView();
 
-    static constexpr size_t kMaxTestKernelSize = 2 * kMaxUniformSize;
-    int width = d->fRandom->nextRangeU(1, kMaxTestKernelSize);
-    int height = d->fRandom->nextRangeU(1, kMaxTestKernelSize / width);
+    int width = d->fRandom->nextRangeU(1, MAX_KERNEL_SIZE);
+    int height = d->fRandom->nextRangeU(1, MAX_KERNEL_SIZE / width);
     SkISize kernelSize = SkISize::Make(width, height);
     std::unique_ptr<SkScalar[]> kernel(new SkScalar[width * height]);
     for (int i = 0; i < width * height; i++) {
@@ -446,8 +325,8 @@
     auto wm = static_cast<GrSamplerState::WrapMode>(
             d->fRandom->nextULessThan(GrSamplerState::kWrapModeCount));
     bool convolveAlpha = d->fRandom->nextBool();
-    return GrMatrixConvolutionEffect::Make(d->context()->priv().asRecordingContext(),
-                                           std::move(view),
+
+    return GrMatrixConvolutionEffect::Make(std::move(view),
                                            bounds,
                                            kernelSize,
                                            kernel.get(),