GrRectBlurEffect: approximate piecewise cubic with quintic.

Mali 400 and Quadro P1000 both have ~48% reduction.



Change-Id: Ib61a51355fa3202029dd87d483acb1027f17a7df
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/235679
Commit-Queue: Brian Salomon <bsalomon@google.com>
Reviewed-by: Greg Daniel <egdaniel@google.com>
diff --git a/src/gpu/effects/GrRectBlurEffect.fp b/src/gpu/effects/GrRectBlurEffect.fp
index 5498e73..9cb7778 100644
--- a/src/gpu/effects/GrRectBlurEffect.fp
+++ b/src/gpu/effects/GrRectBlurEffect.fp
@@ -54,71 +54,48 @@
 }
 
 void main() {
-    half invr = 1.0 / (2.0 * sigma);
-
-    // Get the smaller of the signed distance from the frag coord to the left and right edges.
-    half x;
-    @if (highp) {
-        float lDiff = rectF.x - sk_FragCoord.x;
-        float rDiff = sk_FragCoord.x - rectF.z;
-        x = half(max(lDiff, rDiff) * invr);
-    } else {
-        half lDiff = half(rectH.x - sk_FragCoord.x);
-        half rDiff = half(sk_FragCoord.x - rectH.z);
-        x = max(lDiff, rDiff) * invr;
-    }
-    // This is lifted from the implementation of SkBlurMask::ComputeBlurProfile. It approximates
-    // a Gaussian as three box filters, and then computes the integral of this approximation from
-    // -inf to x.
-    // TODO: Make this a function when supported in .fp files as we duplicate it for y below.
-    half xCoverage;
-    if (x > 1.5) {
-        xCoverage = 0.0;
-    } else if (x < -1.5) {
-        xCoverage = 1.0;
-    } else {
+        // Get the smaller of the signed distance from the frag coord to the left and right edges
+        // and similar for y.
+        half x;
+        @if (highp) {
+            x = min(half(sk_FragCoord.x - rectF.x), half(rectF.z - sk_FragCoord.x));
+        } else {
+            x = min(half(sk_FragCoord.x - rectH.x), half(rectH.z - sk_FragCoord.x));
+        }
+        half y;
+        @if (highp) {
+            y = min(half(sk_FragCoord.y - rectF.y), half(rectF.w - sk_FragCoord.y));
+        } else {
+            y = min(half(sk_FragCoord.y - rectH.y), half(rectH.w - sk_FragCoord.y));
+        }
+        // The sw code computes an approximation of an integral of the Gaussian from -inf to x,
+        // where x is the signed distance to the edge (positive inside the rect). The approximation
+        // is based on three box filters and is a piecewise cubic. The piecewise nature introduces
+        // branches so here we use a 5th degree very close approximation of the piecewise cubic. The
+        // piecewise cubic goes from 0 to 1 as x goes from -1.5 to 1.5.
+        half r = 1 / (2.0 * sigma);
+        x *= r;
+        y *= r;
+        // The polynomial is such that we can either clamp the domain or the range. Clamping the
+        // range (xCoverage/yCoverage) seems to be faster but the polynomial quickly produces very
+        // large absolute values outside the [-1.5, 1.5] domain and some mobile GPUs don't seem to
+        // properly produce -infs or infs in that case. So instead we clamp the domain (x/y). The
+        // perf is probably because clamping to [0, 1] is faster than clamping to [-1.5, 1.5].
+        x = clamp(x, -1.5, 1.5);
+        y = clamp(y, -1.5, 1.5);
         half x2 = x * x;
         half x3 = x2 * x;
-
-        if (x > 0.5) {
-            xCoverage = 0.5625 - (x3 / 6.0 - 3.0 * x2 * 0.25 + 1.125 * x);
-        } else if (x > -0.5) {
-            xCoverage = 0.5 - (0.75 * x - x3 / 3.0);
-        } else {
-            xCoverage = 0.4375 + (-x3 / 6.0 - 3.0 * x2 * 0.25 - 1.125 * x);
-        }
-    }
-
-    // Repeat of above for y.
-    half y;
-    @if (highp) {
-        float tDiff = rectF.y - sk_FragCoord.y;
-        float bDiff = sk_FragCoord.y - rectF.w;
-        y = half(max(tDiff, bDiff) * invr);
-    } else {
-        half tDiff = half(rectH.y - sk_FragCoord.y);
-        half bDiff = half(sk_FragCoord.y - rectH.w);
-        y = max(tDiff, bDiff) * invr;
-    }
-    half yCoverage;
-    if (y > 1.5) {
-        yCoverage = 0.0;
-    } else if (y < -1.5) {
-        yCoverage = 1.0;
-    } else {
+        half x5 = x2 * x3;
+        half a =  0.734822;
+        half b = -0.313376;
+        half c =  0.0609169;
+        half d =  0.5;
+        half xCoverage = a * x + b * x3 + c * x5 + d;
         half y2 = y * y;
         half y3 = y2 * y;
-
-        if (y > 0.5) {
-            yCoverage = 0.5625 - (y3 / 6.0 - 3.0 * y2 * 0.25 + 1.125 * y);
-        } else if (y > -0.5) {
-            yCoverage = 0.5 - (0.75 * y - y3 / 3.0);
-        } else {
-            yCoverage = 0.4375 + (-y3 / 6.0 - 3.0 * y2 * 0.25 - 1.125 * y);
-        }
-    }
-
-    sk_OutColor = sk_InColor * xCoverage * yCoverage;
+        half y5 = y2 * y3;
+        half yCoverage = a * y + b * y3 + c * y5 + d;
+        sk_OutColor = sk_InColor * xCoverage * yCoverage;
 }
 
 @setData(pdman) {
diff --git a/src/gpu/effects/generated/GrRectBlurEffect.cpp b/src/gpu/effects/generated/GrRectBlurEffect.cpp
index 1dfd304..e4412e2 100644
--- a/src/gpu/effects/generated/GrRectBlurEffect.cpp
+++ b/src/gpu/effects/generated/GrRectBlurEffect.cpp
@@ -41,40 +41,30 @@
         sigmaVar =
                 args.fUniformHandler->addUniform(kFragment_GrShaderFlag, kHalf_GrSLType, "sigma");
         fragBuilder->codeAppendf(
-                "/* key */ bool highp = %s;\nhalf invr = 1.0 / (2.0 * %s);\nhalf x;\n@if (highp) "
-                "{\n    float lDiff = %s.x - sk_FragCoord.x;\n    float rDiff = sk_FragCoord.x - "
-                "%s.z;\n    x = half(max(lDiff, rDiff) * float(invr));\n} else {\n    half lDiff = "
-                "half(float(%s.x) - sk_FragCoord.x);\n    half rDiff = half(sk_FragCoord.x - "
-                "float(%s.z));\n    x = max(lDiff, rDiff) * invr;\n}\nhalf xCoverage;\nif (x > "
-                "1.5) {\n    xCoverage = 0.0;\n} else if (x < -1.5) {\n    xCoverage = 1.0;\n} "
-                "else {\n    half x2 = x * x;\n    half",
-                (highp ? "true" : "false"), args.fUniformHandler->getUniformCStr(sigmaVar),
+                "/* key */ bool highp = %s;\nhalf x;\n@if (highp) {\n    x = "
+                "min(half(sk_FragCoord.x - %s.x), half(%s.z - sk_FragCoord.x));\n} else {\n    x = "
+                "min(half(sk_FragCoord.x - float(%s.x)), half(float(%s.z) - "
+                "sk_FragCoord.x));\n}\nhalf y;\n@if (highp) {\n    y = min(half(sk_FragCoord.y - "
+                "%s.y), half(%s.w - sk_FragCoord.y));\n} else {\n    y = min(half(sk_FragCoord.y - "
+                "float(%s.y)), half(float(%s.w) - sk_FragCoord.y));\n}\nhalf r = 1.0 / (2.0 * "
+                "%s);\nx *= r;\ny *= r;\nx = clamp(x, -1.5, 1.5);\ny = clamp(y, -1.5, 1.5",
+                (highp ? "true" : "false"),
                 rectFVar.isValid() ? args.fUniformHandler->getUniformCStr(rectFVar) : "float4(0)",
                 rectFVar.isValid() ? args.fUniformHandler->getUniformCStr(rectFVar) : "float4(0)",
                 rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)",
-                rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)");
-        fragBuilder->codeAppendf(
-                " x3 = x2 * x;\n    if (x > 0.5) {\n        xCoverage = 0.5625 - ((x3 / 6.0 - (3.0 "
-                "* x2) * 0.25) + 1.125 * x);\n    } else if (x > -0.5) {\n        xCoverage = 0.5 "
-                "- (0.75 * x - x3 / 3.0);\n    } else {\n        xCoverage = 0.4375 + ((-x3 / 6.0 "
-                "- (3.0 * x2) * 0.25) - 1.125 * x);\n    }\n}\nhalf y;\n@if (highp) {\n    float "
-                "tDiff = %s.y - sk_FragCoord.y;\n    float bDiff = sk_FragCoord.y - %s.w;\n    y = "
-                "half(max(tDiff, bDiff) * float(invr));\n} else {\n    half tDiff = "
-                "half(float(%s.y) - sk_FragCoord.y);\n  ",
+                rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)",
                 rectFVar.isValid() ? args.fUniformHandler->getUniformCStr(rectFVar) : "float4(0)",
                 rectFVar.isValid() ? args.fUniformHandler->getUniformCStr(rectFVar) : "float4(0)",
-                rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)");
+                rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)",
+                rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)",
+                args.fUniformHandler->getUniformCStr(sigmaVar));
         fragBuilder->codeAppendf(
-                "  half bDiff = half(sk_FragCoord.y - float(%s.w));\n    y = max(tDiff, bDiff) * "
-                "invr;\n}\nhalf yCoverage;\nif (y > 1.5) {\n    yCoverage = 0.0;\n} else if (y < "
-                "-1.5) {\n    yCoverage = 1.0;\n} else {\n    half y2 = y * y;\n    half y3 = y2 * "
-                "y;\n    if (y > 0.5) {\n        yCoverage = 0.5625 - ((y3 / 6.0 - (3.0 * y2) * "
-                "0.25) + 1.125 * y);\n    } else if (y > -0.5) {\n        yCoverage = 0.5 - (0.75 "
-                "* y - y3 / 3.0);\n    } else {\n        yCoverage = 0.4375 + ((-y3 / 6.0 - (3.0 * "
-                "y2) * 0.25) - 1.125 * y);\n ",
-                rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)");
-        fragBuilder->codeAppendf("   }\n}\n%s = (%s * xCoverage) * yCoverage;\n", args.fOutputColor,
-                                 args.fInputColor);
+                ");\nhalf x2 = x * x;\nhalf x3 = x2 * x;\nhalf x5 = x2 * x3;\n\n\n\n\nhalf "
+                "xCoverage = ((0.73482197523117065 * x + -0.31337600946426392 * x3) + "
+                "0.060916900634765625 * x5) + 0.5;\nhalf y2 = y * y;\nhalf y3 = y2 * y;\nhalf y5 = "
+                "y2 * y3;\nhalf yCoverage = ((0.73482197523117065 * y + -0.31337600946426392 * y3) "
+                "+ 0.060916900634765625 * y5) + 0.5;\n%s = (%s * xCoverage) * yCoverage;\n",
+                args.fOutputColor, args.fInputColor);
     }
 
 private: