Go back to rect blur profile textures.

It's faster than analytic.

This version of profile texture has better binning and simpler shader
logic than the original. I believe it also avoids some integralization
that led to artifacts in the previous texture implementation.

We oversize the profile for the blur, bin by pow 2 with a 32 texel min
and then rescale the texture coordinates over a 6 sigma range in the
shader. We pre-inset the rect uniform so the texture is always placed
with one end at the rect edge, extending outward.

Bug: chromium:995308
Change-Id: I739db98d4df69de0f5b2f5dda079cf034ba32035
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/239119
Reviewed-by: Robert Phillips <robertphillips@google.com>
Commit-Queue: Brian Salomon <bsalomon@google.com>
diff --git a/src/gpu/effects/GrRectBlurEffect.fp b/src/gpu/effects/GrRectBlurEffect.fp
index 9cb7778..03922a8 100644
--- a/src/gpu/effects/GrRectBlurEffect.fp
+++ b/src/gpu/effects/GrRectBlurEffect.fp
@@ -6,10 +6,11 @@
  */
 
 @header {
-    #include "include/core/SkScalar.h"
-    #include "src/core/SkBlurMask.h"
-    #include "src/gpu/GrProxyProvider.h"
-    #include "src/gpu/GrShaderCaps.h"
+#include "include/core/SkScalar.h"
+#include "src/core/SkBlurMask.h"
+#include "src/core/SkMathPriv.h"
+#include "src/gpu/GrProxyProvider.h"
+#include "src/gpu/GrShaderCaps.h"
 }
 
 in float4 rect;
@@ -20,7 +21,55 @@
 layout(when= highp) uniform float4 rectF;
 layout(when=!highp) uniform half4  rectH;
 
-in uniform half sigma;
+in uniform sampler2D blurProfile;
+in uniform half invProfileWidth;
+
+@constructorParams {
+    GrSamplerState samplerParams
+}
+
+@samplerParams(blurProfile) {
+    samplerParams
+}
+@class {
+static sk_sp<GrTextureProxy> CreateBlurProfileTexture(GrProxyProvider* proxyProvider,
+                                                      float sigma) {
+    // The "profile" we are calculating is the integral of a Gaussian with 'sigma' and a half
+    // plane. All such profiles are just scales of each other. So all we really care about is
+    // having enough resolution so that the linear interpolation done in texture lookup doesn't
+    // introduce noticeable artifacts. SkBlurMask::ComputeBlurProfile() produces profiles with
+    // ceil(6 * sigma) entries. We conservatively choose to have 2 texels for each dst pixel.
+    int minProfileWidth = 2 * sk_float_ceil2int(6 * sigma);
+    // Bin by powers of 2 with a minimum so we get good profile reuse (remember we can just scale
+    // the texture coords to span the larger profile over a 6 sigma distance).
+    int profileWidth = SkTMax(SkNextPow2(minProfileWidth), 32);
+
+    static const GrUniqueKey::Domain kDomain = GrUniqueKey::GenerateDomain();
+    GrUniqueKey key;
+    GrUniqueKey::Builder builder(&key, kDomain, 1, "Rect Blur Mask");
+    builder[0] = profileWidth;
+    builder.finish();
+
+    sk_sp<GrTextureProxy> blurProfile(proxyProvider->findOrCreateProxyByUniqueKey(
+            key, GrColorType::kAlpha_8, kTopLeft_GrSurfaceOrigin));
+    if (!blurProfile) {
+        SkBitmap bitmap;
+        if (!bitmap.tryAllocPixels(SkImageInfo::MakeA8(profileWidth, 1))) {
+            return nullptr;
+        }
+        SkBlurMask::ComputeBlurProfile(bitmap.getAddr8(0, 0), profileWidth, profileWidth / 6.f);
+        bitmap.setImmutable();
+        blurProfile = proxyProvider->createProxyFromBitmap(bitmap, GrMipMapped::kNo);
+        if (!blurProfile) {
+            return nullptr;
+        }
+        SkASSERT(blurProfile->origin() == kTopLeft_GrSurfaceOrigin);
+        proxyProvider->assignUniqueKeyToProxy(key, blurProfile.get());
+    }
+
+    return blurProfile;
+}
+}
 
 @make {
      static std::unique_ptr<GrFragmentProcessor> Make(GrProxyProvider* proxyProvider,
@@ -36,11 +85,6 @@
                     return nullptr;
              }
          }
-         // Sigma is always a half.
-         SkASSERT(sigma > 0);
-         if (sigma > 16000.f) {
-             return nullptr;
-         }
 
          if (doubleProfileSize >= (float) rect.width() ||
              doubleProfileSize >= (float) rect.height()) {
@@ -49,52 +93,45 @@
              return nullptr;
          }
 
-         return std::unique_ptr<GrFragmentProcessor>(new GrRectBlurEffect(rect, sigma));
+         auto profile = CreateBlurProfileTexture(proxyProvider, sigma);
+         if (!profile) {
+             return nullptr;
+         }
+         // The profile is calculated such that the midpoint is at the rect's edge. To simplify
+         // calculating texture coords in the shader, we inset the rect such that the profile
+         // can be used with one end point aligned to the edges of the rect uniform. The texture
+         // coords should be scaled such that the profile is sampled over a 6 sigma range so inset
+         // by 3 sigma.
+         float halfW = 3.f * sigma;
+         auto insetR = rect.makeInset(halfW, halfW);
+         // inverse of the width over which the profile texture should be interpolated outward from
+         // the inset rect.
+         float invWidth = 1.f / (2 * halfW);
+         return std::unique_ptr<GrFragmentProcessor>(new GrRectBlurEffect(
+                 insetR, std::move(profile), invWidth, GrSamplerState::ClampBilerp()));
      }
 }
 
 void main() {
         // Get the smaller of the signed distance from the frag coord to the left and right edges
         // and similar for y.
+        // The blur profile computed by SkMaskFilter::ComputeBlurProfile is actually 1 - integral.
+        // The integral is an S-looking shape that is symmetric about 0, so we just  compute x and
+        // "backwards" such that texture coord is 1 at the edge and goes to 0 as we move outward.
         half x;
         @if (highp) {
-            x = min(half(sk_FragCoord.x - rectF.x), half(rectF.z - sk_FragCoord.x));
+            x = max(half(rectF.x - sk_FragCoord.x), half(sk_FragCoord.x - rectF.z));
         } else {
-            x = min(half(sk_FragCoord.x - rectH.x), half(rectH.z - sk_FragCoord.x));
+            x = max(half(rectH.x - sk_FragCoord.x), half(sk_FragCoord.x - rectH.z));
         }
         half y;
         @if (highp) {
-            y = min(half(sk_FragCoord.y - rectF.y), half(rectF.w - sk_FragCoord.y));
+            y = max(half(rectF.y - sk_FragCoord.y), half(sk_FragCoord.y - rectF.w));
         } else {
-            y = min(half(sk_FragCoord.y - rectH.y), half(rectH.w - sk_FragCoord.y));
+            y = max(half(rectH.y - sk_FragCoord.y), half(sk_FragCoord.y - rectH.w));
         }
-        // The sw code computes an approximation of an integral of the Gaussian from -inf to x,
-        // where x is the signed distance to the edge (positive inside the rect). The approximation
-        // is based on three box filters and is a piecewise cubic. The piecewise nature introduces
-        // branches so here we use a 5th degree very close approximation of the piecewise cubic. The
-        // piecewise cubic goes from 0 to 1 as x goes from -1.5 to 1.5.
-        half r = 1 / (2.0 * sigma);
-        x *= r;
-        y *= r;
-        // The polynomial is such that we can either clamp the domain or the range. Clamping the
-        // range (xCoverage/yCoverage) seems to be faster but the polynomial quickly produces very
-        // large absolute values outside the [-1.5, 1.5] domain and some mobile GPUs don't seem to
-        // properly produce -infs or infs in that case. So instead we clamp the domain (x/y). The
-        // perf is probably because clamping to [0, 1] is faster than clamping to [-1.5, 1.5].
-        x = clamp(x, -1.5, 1.5);
-        y = clamp(y, -1.5, 1.5);
-        half x2 = x * x;
-        half x3 = x2 * x;
-        half x5 = x2 * x3;
-        half a =  0.734822;
-        half b = -0.313376;
-        half c =  0.0609169;
-        half d =  0.5;
-        half xCoverage = a * x + b * x3 + c * x5 + d;
-        half y2 = y * y;
-        half y3 = y2 * y;
-        half y5 = y2 * y3;
-        half yCoverage = a * y + b * y3 + c * y5 + d;
+        half xCoverage = sample(blurProfile, half2(x * invProfileWidth, 0.5)).a;
+        half yCoverage = sample(blurProfile, half2(y * invProfileWidth, 0.5)).a;
         sk_OutColor = sk_InColor * xCoverage * yCoverage;
 }
 
diff --git a/src/gpu/effects/generated/GrRectBlurEffect.cpp b/src/gpu/effects/generated/GrRectBlurEffect.cpp
index e4412e2..a717040 100644
--- a/src/gpu/effects/generated/GrRectBlurEffect.cpp
+++ b/src/gpu/effects/generated/GrRectBlurEffect.cpp
@@ -25,8 +25,8 @@
         (void)_outer;
         auto rect = _outer.rect;
         (void)rect;
-        auto sigma = _outer.sigma;
-        (void)sigma;
+        auto invProfileWidth = _outer.invProfileWidth;
+        (void)invProfileWidth;
         highp = ((abs(rect.left()) > 16000.0 || abs(rect.top()) > 16000.0) ||
                  abs(rect.right()) > 16000.0) ||
                 abs(rect.bottom()) > 16000.0;
@@ -38,16 +38,17 @@
             rectHVar = args.fUniformHandler->addUniform(kFragment_GrShaderFlag, kHalf4_GrSLType,
                                                         "rectH");
         }
-        sigmaVar =
-                args.fUniformHandler->addUniform(kFragment_GrShaderFlag, kHalf_GrSLType, "sigma");
+        invProfileWidthVar = args.fUniformHandler->addUniform(kFragment_GrShaderFlag,
+                                                              kHalf_GrSLType, "invProfileWidth");
         fragBuilder->codeAppendf(
-                "/* key */ bool highp = %s;\nhalf x;\n@if (highp) {\n    x = "
-                "min(half(sk_FragCoord.x - %s.x), half(%s.z - sk_FragCoord.x));\n} else {\n    x = "
-                "min(half(sk_FragCoord.x - float(%s.x)), half(float(%s.z) - "
-                "sk_FragCoord.x));\n}\nhalf y;\n@if (highp) {\n    y = min(half(sk_FragCoord.y - "
-                "%s.y), half(%s.w - sk_FragCoord.y));\n} else {\n    y = min(half(sk_FragCoord.y - "
-                "float(%s.y)), half(float(%s.w) - sk_FragCoord.y));\n}\nhalf r = 1.0 / (2.0 * "
-                "%s);\nx *= r;\ny *= r;\nx = clamp(x, -1.5, 1.5);\ny = clamp(y, -1.5, 1.5",
+                "/* key */ bool highp = %s;\nhalf x;\n@if (highp) {\n    x = max(half(%s.x - "
+                "sk_FragCoord.x), half(sk_FragCoord.x - %s.z));\n} else {\n    x = "
+                "max(half(float(%s.x) - sk_FragCoord.x), half(sk_FragCoord.x - "
+                "float(%s.z)));\n}\nhalf y;\n@if (highp) {\n    y = max(half(%s.y - "
+                "sk_FragCoord.y), half(sk_FragCoord.y - %s.w));\n} else {\n    y = "
+                "max(half(float(%s.y) - sk_FragCoord.y), half(sk_FragCoord.y - "
+                "float(%s.w)));\n}\nhalf xCoverage = sample(%s, float2(half2(x * %s, "
+                "0.5))).%s.w;\nhalf yCoverage = sample(%s, flo",
                 (highp ? "true" : "false"),
                 rectFVar.isValid() ? args.fUniformHandler->getUniformCStr(rectFVar) : "float4(0)",
                 rectFVar.isValid() ? args.fUniformHandler->getUniformCStr(rectFVar) : "float4(0)",
@@ -57,13 +58,14 @@
                 rectFVar.isValid() ? args.fUniformHandler->getUniformCStr(rectFVar) : "float4(0)",
                 rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)",
                 rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)",
-                args.fUniformHandler->getUniformCStr(sigmaVar));
+                fragBuilder->getProgramBuilder()->samplerVariable(args.fTexSamplers[0]),
+                args.fUniformHandler->getUniformCStr(invProfileWidthVar),
+                fragBuilder->getProgramBuilder()->samplerSwizzle(args.fTexSamplers[0]).c_str(),
+                fragBuilder->getProgramBuilder()->samplerVariable(args.fTexSamplers[0]));
         fragBuilder->codeAppendf(
-                ");\nhalf x2 = x * x;\nhalf x3 = x2 * x;\nhalf x5 = x2 * x3;\n\n\n\n\nhalf "
-                "xCoverage = ((0.73482197523117065 * x + -0.31337600946426392 * x3) + "
-                "0.060916900634765625 * x5) + 0.5;\nhalf y2 = y * y;\nhalf y3 = y2 * y;\nhalf y5 = "
-                "y2 * y3;\nhalf yCoverage = ((0.73482197523117065 * y + -0.31337600946426392 * y3) "
-                "+ 0.060916900634765625 * y5) + 0.5;\n%s = (%s * xCoverage) * yCoverage;\n",
+                "at2(half2(y * %s, 0.5))).%s.w;\n%s = (%s * xCoverage) * yCoverage;\n",
+                args.fUniformHandler->getUniformCStr(invProfileWidthVar),
+                fragBuilder->getProgramBuilder()->samplerSwizzle(args.fTexSamplers[0]).c_str(),
                 args.fOutputColor, args.fInputColor);
     }
 
@@ -71,15 +73,18 @@
     void onSetData(const GrGLSLProgramDataManager& pdman,
                    const GrFragmentProcessor& _proc) override {
         const GrRectBlurEffect& _outer = _proc.cast<GrRectBlurEffect>();
-        { pdman.set1f(sigmaVar, (_outer.sigma)); }
+        { pdman.set1f(invProfileWidthVar, (_outer.invProfileWidth)); }
         auto rect = _outer.rect;
         (void)rect;
         UniformHandle& rectF = rectFVar;
         (void)rectF;
         UniformHandle& rectH = rectHVar;
         (void)rectH;
-        UniformHandle& sigma = sigmaVar;
-        (void)sigma;
+        GrSurfaceProxy& blurProfileProxy = *_outer.textureSampler(0).proxy();
+        GrTexture& blurProfile = *blurProfileProxy.peekTexture();
+        (void)blurProfile;
+        UniformHandle& invProfileWidth = invProfileWidthVar;
+        (void)invProfileWidth;
 
         float r[]{rect.fLeft, rect.fTop, rect.fRight, rect.fBottom};
         pdman.set4fv(highp ? rectF : rectH, 1, r);
@@ -87,7 +92,7 @@
     bool highp = false;
     UniformHandle rectFVar;
     UniformHandle rectHVar;
-    UniformHandle sigmaVar;
+    UniformHandle invProfileWidthVar;
 };
 GrGLSLFragmentProcessor* GrRectBlurEffect::onCreateGLSLInstance() const {
     return new GrGLSLRectBlurEffect();
@@ -103,16 +108,23 @@
     const GrRectBlurEffect& that = other.cast<GrRectBlurEffect>();
     (void)that;
     if (rect != that.rect) return false;
-    if (sigma != that.sigma) return false;
+    if (blurProfile != that.blurProfile) return false;
+    if (invProfileWidth != that.invProfileWidth) return false;
     return true;
 }
 GrRectBlurEffect::GrRectBlurEffect(const GrRectBlurEffect& src)
         : INHERITED(kGrRectBlurEffect_ClassID, src.optimizationFlags())
         , rect(src.rect)
-        , sigma(src.sigma) {}
+        , blurProfile(src.blurProfile)
+        , invProfileWidth(src.invProfileWidth) {
+    this->setTextureSamplerCnt(1);
+}
 std::unique_ptr<GrFragmentProcessor> GrRectBlurEffect::clone() const {
     return std::unique_ptr<GrFragmentProcessor>(new GrRectBlurEffect(*this));
 }
+const GrFragmentProcessor::TextureSampler& GrRectBlurEffect::onTextureSampler(int index) const {
+    return IthTextureSampler(index, blurProfile);
+}
 GR_DEFINE_FRAGMENT_PROCESSOR_TEST(GrRectBlurEffect);
 #if GR_TEST_UTILS
 std::unique_ptr<GrFragmentProcessor> GrRectBlurEffect::TestCreate(GrProcessorTestData* data) {
diff --git a/src/gpu/effects/generated/GrRectBlurEffect.h b/src/gpu/effects/generated/GrRectBlurEffect.h
index 2d14df4..1b0e183 100644
--- a/src/gpu/effects/generated/GrRectBlurEffect.h
+++ b/src/gpu/effects/generated/GrRectBlurEffect.h
@@ -14,6 +14,7 @@
 
 #include "include/core/SkScalar.h"
 #include "src/core/SkBlurMask.h"
+#include "src/core/SkMathPriv.h"
 #include "src/gpu/GrProxyProvider.h"
 #include "src/gpu/GrShaderCaps.h"
 
@@ -21,6 +22,44 @@
 #include "src/gpu/GrFragmentProcessor.h"
 class GrRectBlurEffect : public GrFragmentProcessor {
 public:
+    static sk_sp<GrTextureProxy> CreateBlurProfileTexture(GrProxyProvider* proxyProvider,
+                                                          float sigma) {
+        // The "profile" we are calculating is the integral of a Gaussian with 'sigma' and a half
+        // plane. All such profiles are just scales of each other. So all we really care about is
+        // having enough resolution so that the linear interpolation done in texture lookup doesn't
+        // introduce noticeable artifacts. SkBlurMask::ComputeBlurProfile() produces profiles with
+        // ceil(6 * sigma) entries. We conservatively choose to have 2 texels for each dst pixel.
+        int minProfileWidth = 2 * sk_float_ceil2int(6 * sigma);
+        // Bin by powers of 2 with a minimum so we get good profile reuse (remember we can just
+        // scale the texture coords to span the larger profile over a 6 sigma distance).
+        int profileWidth = SkTMax(SkNextPow2(minProfileWidth), 32);
+
+        static const GrUniqueKey::Domain kDomain = GrUniqueKey::GenerateDomain();
+        GrUniqueKey key;
+        GrUniqueKey::Builder builder(&key, kDomain, 1, "Rect Blur Mask");
+        builder[0] = profileWidth;
+        builder.finish();
+
+        sk_sp<GrTextureProxy> blurProfile(proxyProvider->findOrCreateProxyByUniqueKey(
+                key, GrColorType::kAlpha_8, kTopLeft_GrSurfaceOrigin));
+        if (!blurProfile) {
+            SkBitmap bitmap;
+            if (!bitmap.tryAllocPixels(SkImageInfo::MakeA8(profileWidth, 1))) {
+                return nullptr;
+            }
+            SkBlurMask::ComputeBlurProfile(bitmap.getAddr8(0, 0), profileWidth, profileWidth / 6.f);
+            bitmap.setImmutable();
+            blurProfile = proxyProvider->createProxyFromBitmap(bitmap, GrMipMapped::kNo);
+            if (!blurProfile) {
+                return nullptr;
+            }
+            SkASSERT(blurProfile->origin() == kTopLeft_GrSurfaceOrigin);
+            proxyProvider->assignUniqueKeyToProxy(key, blurProfile.get());
+        }
+
+        return blurProfile;
+    }
+
     static std::unique_ptr<GrFragmentProcessor> Make(GrProxyProvider* proxyProvider,
                                                      const GrShaderCaps& caps, const SkRect& rect,
                                                      float sigma) {
@@ -34,11 +73,6 @@
                 return nullptr;
             }
         }
-        // Sigma is always a half.
-        SkASSERT(sigma > 0);
-        if (sigma > 16000.f) {
-            return nullptr;
-        }
 
         if (doubleProfileSize >= (float)rect.width() || doubleProfileSize >= (float)rect.height()) {
             // if the blur sigma is too large so the gaussian overlaps the whole
@@ -46,23 +80,44 @@
             return nullptr;
         }
 
-        return std::unique_ptr<GrFragmentProcessor>(new GrRectBlurEffect(rect, sigma));
+        auto profile = CreateBlurProfileTexture(proxyProvider, sigma);
+        if (!profile) {
+            return nullptr;
+        }
+        // The profile is calculated such that the midpoint is at the rect's edge. To simplify
+        // calculating texture coords in the shader, we inset the rect such that the profile
+        // can be used with one end point aligned to the edges of the rect uniform. The texture
+        // coords should be scaled such that the profile is sampled over a 6 sigma range so inset
+        // by 3 sigma.
+        float halfW = 3.f * sigma;
+        auto insetR = rect.makeInset(halfW, halfW);
+        // inverse of the width over which the profile texture should be interpolated outward from
+        // the inset rect.
+        float invWidth = 1.f / (2 * halfW);
+        return std::unique_ptr<GrFragmentProcessor>(new GrRectBlurEffect(
+                insetR, std::move(profile), invWidth, GrSamplerState::ClampBilerp()));
     }
     GrRectBlurEffect(const GrRectBlurEffect& src);
     std::unique_ptr<GrFragmentProcessor> clone() const override;
     const char* name() const override { return "RectBlurEffect"; }
     SkRect rect;
-    float sigma;
+    TextureSampler blurProfile;
+    float invProfileWidth;
 
 private:
-    GrRectBlurEffect(SkRect rect, float sigma)
+    GrRectBlurEffect(SkRect rect, sk_sp<GrTextureProxy> blurProfile, float invProfileWidth,
+                     GrSamplerState samplerParams)
             : INHERITED(kGrRectBlurEffect_ClassID,
                         (OptimizationFlags)kCompatibleWithCoverageAsAlpha_OptimizationFlag)
             , rect(rect)
-            , sigma(sigma) {}
+            , blurProfile(std::move(blurProfile), samplerParams)
+            , invProfileWidth(invProfileWidth) {
+        this->setTextureSamplerCnt(1);
+    }
     GrGLSLFragmentProcessor* onCreateGLSLInstance() const override;
     void onGetGLSLProcessorKey(const GrShaderCaps&, GrProcessorKeyBuilder*) const override;
     bool onIsEqual(const GrFragmentProcessor&) const override;
+    const TextureSampler& onTextureSampler(int) const override;
     GR_DECLARE_FRAGMENT_PROCESSOR_TEST
     typedef GrFragmentProcessor INHERITED;
 };