GrRectBlurEffect: approximate piecewise cubic with quintic.
Mali 400 and Quadro P1000 both have ~48% reduction.
Change-Id: Ib61a51355fa3202029dd87d483acb1027f17a7df
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/235679
Commit-Queue: Brian Salomon <bsalomon@google.com>
Reviewed-by: Greg Daniel <egdaniel@google.com>
diff --git a/src/gpu/effects/GrRectBlurEffect.fp b/src/gpu/effects/GrRectBlurEffect.fp
index 5498e73..9cb7778 100644
--- a/src/gpu/effects/GrRectBlurEffect.fp
+++ b/src/gpu/effects/GrRectBlurEffect.fp
@@ -54,71 +54,48 @@
}
void main() {
- half invr = 1.0 / (2.0 * sigma);
-
- // Get the smaller of the signed distance from the frag coord to the left and right edges.
- half x;
- @if (highp) {
- float lDiff = rectF.x - sk_FragCoord.x;
- float rDiff = sk_FragCoord.x - rectF.z;
- x = half(max(lDiff, rDiff) * invr);
- } else {
- half lDiff = half(rectH.x - sk_FragCoord.x);
- half rDiff = half(sk_FragCoord.x - rectH.z);
- x = max(lDiff, rDiff) * invr;
- }
- // This is lifted from the implementation of SkBlurMask::ComputeBlurProfile. It approximates
- // a Gaussian as three box filters, and then computes the integral of this approximation from
- // -inf to x.
- // TODO: Make this a function when supported in .fp files as we duplicate it for y below.
- half xCoverage;
- if (x > 1.5) {
- xCoverage = 0.0;
- } else if (x < -1.5) {
- xCoverage = 1.0;
- } else {
+ // Get the smaller of the signed distance from the frag coord to the left and right edges
+ // and similar for y.
+ half x;
+ @if (highp) {
+ x = min(half(sk_FragCoord.x - rectF.x), half(rectF.z - sk_FragCoord.x));
+ } else {
+ x = min(half(sk_FragCoord.x - rectH.x), half(rectH.z - sk_FragCoord.x));
+ }
+ half y;
+ @if (highp) {
+ y = min(half(sk_FragCoord.y - rectF.y), half(rectF.w - sk_FragCoord.y));
+ } else {
+ y = min(half(sk_FragCoord.y - rectH.y), half(rectH.w - sk_FragCoord.y));
+ }
+ // The sw code computes an approximation of an integral of the Gaussian from -inf to x,
+ // where x is the signed distance to the edge (positive inside the rect). The approximation
+ // is based on three box filters and is a piecewise cubic. The piecewise nature introduces
+ // branches so here we use a 5th degree very close approximation of the piecewise cubic. The
+ // piecewise cubic goes from 0 to 1 as x goes from -1.5 to 1.5.
+ half r = 1 / (2.0 * sigma);
+ x *= r;
+ y *= r;
+ // The polynomial is such that we can either clamp the domain or the range. Clamping the
+ // range (xCoverage/yCoverage) seems to be faster but the polynomial quickly produces very
+ // large absolute values outside the [-1.5, 1.5] domain and some mobile GPUs don't seem to
+ // properly produce -infs or infs in that case. So instead we clamp the domain (x/y). The
+ // perf is probably because clamping to [0, 1] is faster than clamping to [-1.5, 1.5].
+ x = clamp(x, -1.5, 1.5);
+ y = clamp(y, -1.5, 1.5);
half x2 = x * x;
half x3 = x2 * x;
-
- if (x > 0.5) {
- xCoverage = 0.5625 - (x3 / 6.0 - 3.0 * x2 * 0.25 + 1.125 * x);
- } else if (x > -0.5) {
- xCoverage = 0.5 - (0.75 * x - x3 / 3.0);
- } else {
- xCoverage = 0.4375 + (-x3 / 6.0 - 3.0 * x2 * 0.25 - 1.125 * x);
- }
- }
-
- // Repeat of above for y.
- half y;
- @if (highp) {
- float tDiff = rectF.y - sk_FragCoord.y;
- float bDiff = sk_FragCoord.y - rectF.w;
- y = half(max(tDiff, bDiff) * invr);
- } else {
- half tDiff = half(rectH.y - sk_FragCoord.y);
- half bDiff = half(sk_FragCoord.y - rectH.w);
- y = max(tDiff, bDiff) * invr;
- }
- half yCoverage;
- if (y > 1.5) {
- yCoverage = 0.0;
- } else if (y < -1.5) {
- yCoverage = 1.0;
- } else {
+ half x5 = x2 * x3;
+ half a = 0.734822;
+ half b = -0.313376;
+ half c = 0.0609169;
+ half d = 0.5;
+ half xCoverage = a * x + b * x3 + c * x5 + d;
half y2 = y * y;
half y3 = y2 * y;
-
- if (y > 0.5) {
- yCoverage = 0.5625 - (y3 / 6.0 - 3.0 * y2 * 0.25 + 1.125 * y);
- } else if (y > -0.5) {
- yCoverage = 0.5 - (0.75 * y - y3 / 3.0);
- } else {
- yCoverage = 0.4375 + (-y3 / 6.0 - 3.0 * y2 * 0.25 - 1.125 * y);
- }
- }
-
- sk_OutColor = sk_InColor * xCoverage * yCoverage;
+ half y5 = y2 * y3;
+ half yCoverage = a * y + b * y3 + c * y5 + d;
+ sk_OutColor = sk_InColor * xCoverage * yCoverage;
}
@setData(pdman) {
diff --git a/src/gpu/effects/generated/GrRectBlurEffect.cpp b/src/gpu/effects/generated/GrRectBlurEffect.cpp
index 1dfd304..e4412e2 100644
--- a/src/gpu/effects/generated/GrRectBlurEffect.cpp
+++ b/src/gpu/effects/generated/GrRectBlurEffect.cpp
@@ -41,40 +41,30 @@
sigmaVar =
args.fUniformHandler->addUniform(kFragment_GrShaderFlag, kHalf_GrSLType, "sigma");
fragBuilder->codeAppendf(
- "/* key */ bool highp = %s;\nhalf invr = 1.0 / (2.0 * %s);\nhalf x;\n@if (highp) "
- "{\n float lDiff = %s.x - sk_FragCoord.x;\n float rDiff = sk_FragCoord.x - "
- "%s.z;\n x = half(max(lDiff, rDiff) * float(invr));\n} else {\n half lDiff = "
- "half(float(%s.x) - sk_FragCoord.x);\n half rDiff = half(sk_FragCoord.x - "
- "float(%s.z));\n x = max(lDiff, rDiff) * invr;\n}\nhalf xCoverage;\nif (x > "
- "1.5) {\n xCoverage = 0.0;\n} else if (x < -1.5) {\n xCoverage = 1.0;\n} "
- "else {\n half x2 = x * x;\n half",
- (highp ? "true" : "false"), args.fUniformHandler->getUniformCStr(sigmaVar),
+ "/* key */ bool highp = %s;\nhalf x;\n@if (highp) {\n x = "
+ "min(half(sk_FragCoord.x - %s.x), half(%s.z - sk_FragCoord.x));\n} else {\n x = "
+ "min(half(sk_FragCoord.x - float(%s.x)), half(float(%s.z) - "
+ "sk_FragCoord.x));\n}\nhalf y;\n@if (highp) {\n y = min(half(sk_FragCoord.y - "
+ "%s.y), half(%s.w - sk_FragCoord.y));\n} else {\n y = min(half(sk_FragCoord.y - "
+ "float(%s.y)), half(float(%s.w) - sk_FragCoord.y));\n}\nhalf r = 1.0 / (2.0 * "
+ "%s);\nx *= r;\ny *= r;\nx = clamp(x, -1.5, 1.5);\ny = clamp(y, -1.5, 1.5",
+ (highp ? "true" : "false"),
rectFVar.isValid() ? args.fUniformHandler->getUniformCStr(rectFVar) : "float4(0)",
rectFVar.isValid() ? args.fUniformHandler->getUniformCStr(rectFVar) : "float4(0)",
rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)",
- rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)");
- fragBuilder->codeAppendf(
- " x3 = x2 * x;\n if (x > 0.5) {\n xCoverage = 0.5625 - ((x3 / 6.0 - (3.0 "
- "* x2) * 0.25) + 1.125 * x);\n } else if (x > -0.5) {\n xCoverage = 0.5 "
- "- (0.75 * x - x3 / 3.0);\n } else {\n xCoverage = 0.4375 + ((-x3 / 6.0 "
- "- (3.0 * x2) * 0.25) - 1.125 * x);\n }\n}\nhalf y;\n@if (highp) {\n float "
- "tDiff = %s.y - sk_FragCoord.y;\n float bDiff = sk_FragCoord.y - %s.w;\n y = "
- "half(max(tDiff, bDiff) * float(invr));\n} else {\n half tDiff = "
- "half(float(%s.y) - sk_FragCoord.y);\n ",
+ rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)",
rectFVar.isValid() ? args.fUniformHandler->getUniformCStr(rectFVar) : "float4(0)",
rectFVar.isValid() ? args.fUniformHandler->getUniformCStr(rectFVar) : "float4(0)",
- rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)");
+ rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)",
+ rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)",
+ args.fUniformHandler->getUniformCStr(sigmaVar));
fragBuilder->codeAppendf(
- " half bDiff = half(sk_FragCoord.y - float(%s.w));\n y = max(tDiff, bDiff) * "
- "invr;\n}\nhalf yCoverage;\nif (y > 1.5) {\n yCoverage = 0.0;\n} else if (y < "
- "-1.5) {\n yCoverage = 1.0;\n} else {\n half y2 = y * y;\n half y3 = y2 * "
- "y;\n if (y > 0.5) {\n yCoverage = 0.5625 - ((y3 / 6.0 - (3.0 * y2) * "
- "0.25) + 1.125 * y);\n } else if (y > -0.5) {\n yCoverage = 0.5 - (0.75 "
- "* y - y3 / 3.0);\n } else {\n yCoverage = 0.4375 + ((-y3 / 6.0 - (3.0 * "
- "y2) * 0.25) - 1.125 * y);\n ",
- rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)");
- fragBuilder->codeAppendf(" }\n}\n%s = (%s * xCoverage) * yCoverage;\n", args.fOutputColor,
- args.fInputColor);
+ ");\nhalf x2 = x * x;\nhalf x3 = x2 * x;\nhalf x5 = x2 * x3;\n\n\n\n\nhalf "
+ "xCoverage = ((0.73482197523117065 * x + -0.31337600946426392 * x3) + "
+ "0.060916900634765625 * x5) + 0.5;\nhalf y2 = y * y;\nhalf y3 = y2 * y;\nhalf y5 = "
+ "y2 * y3;\nhalf yCoverage = ((0.73482197523117065 * y + -0.31337600946426392 * y3) "
+ "+ 0.060916900634765625 * y5) + 0.5;\n%s = (%s * xCoverage) * yCoverage;\n",
+ args.fOutputColor, args.fInputColor);
}
private: