Go back to rect blur profile textures.
It's faster than analytic.
This version of profile texture has better binning and simpler shader
logic than the original. I believe it also avoids some integralization
that led to artifacts in the previous texture implementation.
We oversize the profile for the blur, bin by pow 2 with a 32 texel min
and then rescale the texture coordinates over a 6 sigma range in the
shader. We pre-inset the rect uniform so the texture is always placed
with one end at the rect edge, extending outward.
Bug: chromium:995308
Change-Id: I739db98d4df69de0f5b2f5dda079cf034ba32035
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/239119
Reviewed-by: Robert Phillips <robertphillips@google.com>
Commit-Queue: Brian Salomon <bsalomon@google.com>
diff --git a/src/gpu/effects/GrRectBlurEffect.fp b/src/gpu/effects/GrRectBlurEffect.fp
index 9cb7778..03922a8 100644
--- a/src/gpu/effects/GrRectBlurEffect.fp
+++ b/src/gpu/effects/GrRectBlurEffect.fp
@@ -6,10 +6,11 @@
*/
@header {
- #include "include/core/SkScalar.h"
- #include "src/core/SkBlurMask.h"
- #include "src/gpu/GrProxyProvider.h"
- #include "src/gpu/GrShaderCaps.h"
+#include "include/core/SkScalar.h"
+#include "src/core/SkBlurMask.h"
+#include "src/core/SkMathPriv.h"
+#include "src/gpu/GrProxyProvider.h"
+#include "src/gpu/GrShaderCaps.h"
}
in float4 rect;
@@ -20,7 +21,55 @@
layout(when= highp) uniform float4 rectF;
layout(when=!highp) uniform half4 rectH;
-in uniform half sigma;
+in uniform sampler2D blurProfile;
+in uniform half invProfileWidth;
+
+@constructorParams {
+ GrSamplerState samplerParams
+}
+
+@samplerParams(blurProfile) {
+ samplerParams
+}
+@class {
+static sk_sp<GrTextureProxy> CreateBlurProfileTexture(GrProxyProvider* proxyProvider,
+ float sigma) {
+ // The "profile" we are calculating is the integral of a Gaussian with 'sigma' and a half
+ // plane. All such profiles are just scales of each other. So all we really care about is
+ // having enough resolution so that the linear interpolation done in texture lookup doesn't
+ // introduce noticeable artifacts. SkBlurMask::ComputeBlurProfile() produces profiles with
+ // ceil(6 * sigma) entries. We conservatively choose to have 2 texels for each dst pixel.
+ int minProfileWidth = 2 * sk_float_ceil2int(6 * sigma);
+ // Bin by powers of 2 with a minimum so we get good profile reuse (remember we can just scale
+ // the texture coords to span the larger profile over a 6 sigma distance).
+ int profileWidth = SkTMax(SkNextPow2(minProfileWidth), 32);
+
+ static const GrUniqueKey::Domain kDomain = GrUniqueKey::GenerateDomain();
+ GrUniqueKey key;
+ GrUniqueKey::Builder builder(&key, kDomain, 1, "Rect Blur Mask");
+ builder[0] = profileWidth;
+ builder.finish();
+
+ sk_sp<GrTextureProxy> blurProfile(proxyProvider->findOrCreateProxyByUniqueKey(
+ key, GrColorType::kAlpha_8, kTopLeft_GrSurfaceOrigin));
+ if (!blurProfile) {
+ SkBitmap bitmap;
+ if (!bitmap.tryAllocPixels(SkImageInfo::MakeA8(profileWidth, 1))) {
+ return nullptr;
+ }
+ SkBlurMask::ComputeBlurProfile(bitmap.getAddr8(0, 0), profileWidth, profileWidth / 6.f);
+ bitmap.setImmutable();
+ blurProfile = proxyProvider->createProxyFromBitmap(bitmap, GrMipMapped::kNo);
+ if (!blurProfile) {
+ return nullptr;
+ }
+ SkASSERT(blurProfile->origin() == kTopLeft_GrSurfaceOrigin);
+ proxyProvider->assignUniqueKeyToProxy(key, blurProfile.get());
+ }
+
+ return blurProfile;
+}
+}
@make {
static std::unique_ptr<GrFragmentProcessor> Make(GrProxyProvider* proxyProvider,
@@ -36,11 +85,6 @@
return nullptr;
}
}
- // Sigma is always a half.
- SkASSERT(sigma > 0);
- if (sigma > 16000.f) {
- return nullptr;
- }
if (doubleProfileSize >= (float) rect.width() ||
doubleProfileSize >= (float) rect.height()) {
@@ -49,52 +93,45 @@
return nullptr;
}
- return std::unique_ptr<GrFragmentProcessor>(new GrRectBlurEffect(rect, sigma));
+ auto profile = CreateBlurProfileTexture(proxyProvider, sigma);
+ if (!profile) {
+ return nullptr;
+ }
+ // The profile is calculated such that the midpoint is at the rect's edge. To simplify
+ // calculating texture coords in the shader, we inset the rect such that the profile
+ // can be used with one end point aligned to the edges of the rect uniform. The texture
+ // coords should be scaled such that the profile is sampled over a 6 sigma range so inset
+ // by 3 sigma.
+ float halfW = 3.f * sigma;
+ auto insetR = rect.makeInset(halfW, halfW);
+ // inverse of the width over which the profile texture should be interpolated outward from
+ // the inset rect.
+ float invWidth = 1.f / (2 * halfW);
+ return std::unique_ptr<GrFragmentProcessor>(new GrRectBlurEffect(
+ insetR, std::move(profile), invWidth, GrSamplerState::ClampBilerp()));
}
}
void main() {
// Get the smaller of the signed distance from the frag coord to the left and right edges
// and similar for y.
+ // The blur profile computed by SkMaskFilter::ComputeBlurProfile is actually 1 - integral.
+ // The integral is an S-looking shape that is symmetric about 0, so we just compute x and
+ // "backwards" such that texture coord is 1 at the edge and goes to 0 as we move outward.
half x;
@if (highp) {
- x = min(half(sk_FragCoord.x - rectF.x), half(rectF.z - sk_FragCoord.x));
+ x = max(half(rectF.x - sk_FragCoord.x), half(sk_FragCoord.x - rectF.z));
} else {
- x = min(half(sk_FragCoord.x - rectH.x), half(rectH.z - sk_FragCoord.x));
+ x = max(half(rectH.x - sk_FragCoord.x), half(sk_FragCoord.x - rectH.z));
}
half y;
@if (highp) {
- y = min(half(sk_FragCoord.y - rectF.y), half(rectF.w - sk_FragCoord.y));
+ y = max(half(rectF.y - sk_FragCoord.y), half(sk_FragCoord.y - rectF.w));
} else {
- y = min(half(sk_FragCoord.y - rectH.y), half(rectH.w - sk_FragCoord.y));
+ y = max(half(rectH.y - sk_FragCoord.y), half(sk_FragCoord.y - rectH.w));
}
- // The sw code computes an approximation of an integral of the Gaussian from -inf to x,
- // where x is the signed distance to the edge (positive inside the rect). The approximation
- // is based on three box filters and is a piecewise cubic. The piecewise nature introduces
- // branches so here we use a 5th degree very close approximation of the piecewise cubic. The
- // piecewise cubic goes from 0 to 1 as x goes from -1.5 to 1.5.
- half r = 1 / (2.0 * sigma);
- x *= r;
- y *= r;
- // The polynomial is such that we can either clamp the domain or the range. Clamping the
- // range (xCoverage/yCoverage) seems to be faster but the polynomial quickly produces very
- // large absolute values outside the [-1.5, 1.5] domain and some mobile GPUs don't seem to
- // properly produce -infs or infs in that case. So instead we clamp the domain (x/y). The
- // perf is probably because clamping to [0, 1] is faster than clamping to [-1.5, 1.5].
- x = clamp(x, -1.5, 1.5);
- y = clamp(y, -1.5, 1.5);
- half x2 = x * x;
- half x3 = x2 * x;
- half x5 = x2 * x3;
- half a = 0.734822;
- half b = -0.313376;
- half c = 0.0609169;
- half d = 0.5;
- half xCoverage = a * x + b * x3 + c * x5 + d;
- half y2 = y * y;
- half y3 = y2 * y;
- half y5 = y2 * y3;
- half yCoverage = a * y + b * y3 + c * y5 + d;
+ half xCoverage = sample(blurProfile, half2(x * invProfileWidth, 0.5)).a;
+ half yCoverage = sample(blurProfile, half2(y * invProfileWidth, 0.5)).a;
sk_OutColor = sk_InColor * xCoverage * yCoverage;
}
diff --git a/src/gpu/effects/generated/GrRectBlurEffect.cpp b/src/gpu/effects/generated/GrRectBlurEffect.cpp
index e4412e2..a717040 100644
--- a/src/gpu/effects/generated/GrRectBlurEffect.cpp
+++ b/src/gpu/effects/generated/GrRectBlurEffect.cpp
@@ -25,8 +25,8 @@
(void)_outer;
auto rect = _outer.rect;
(void)rect;
- auto sigma = _outer.sigma;
- (void)sigma;
+ auto invProfileWidth = _outer.invProfileWidth;
+ (void)invProfileWidth;
highp = ((abs(rect.left()) > 16000.0 || abs(rect.top()) > 16000.0) ||
abs(rect.right()) > 16000.0) ||
abs(rect.bottom()) > 16000.0;
@@ -38,16 +38,17 @@
rectHVar = args.fUniformHandler->addUniform(kFragment_GrShaderFlag, kHalf4_GrSLType,
"rectH");
}
- sigmaVar =
- args.fUniformHandler->addUniform(kFragment_GrShaderFlag, kHalf_GrSLType, "sigma");
+ invProfileWidthVar = args.fUniformHandler->addUniform(kFragment_GrShaderFlag,
+ kHalf_GrSLType, "invProfileWidth");
fragBuilder->codeAppendf(
- "/* key */ bool highp = %s;\nhalf x;\n@if (highp) {\n x = "
- "min(half(sk_FragCoord.x - %s.x), half(%s.z - sk_FragCoord.x));\n} else {\n x = "
- "min(half(sk_FragCoord.x - float(%s.x)), half(float(%s.z) - "
- "sk_FragCoord.x));\n}\nhalf y;\n@if (highp) {\n y = min(half(sk_FragCoord.y - "
- "%s.y), half(%s.w - sk_FragCoord.y));\n} else {\n y = min(half(sk_FragCoord.y - "
- "float(%s.y)), half(float(%s.w) - sk_FragCoord.y));\n}\nhalf r = 1.0 / (2.0 * "
- "%s);\nx *= r;\ny *= r;\nx = clamp(x, -1.5, 1.5);\ny = clamp(y, -1.5, 1.5",
+ "/* key */ bool highp = %s;\nhalf x;\n@if (highp) {\n x = max(half(%s.x - "
+ "sk_FragCoord.x), half(sk_FragCoord.x - %s.z));\n} else {\n x = "
+ "max(half(float(%s.x) - sk_FragCoord.x), half(sk_FragCoord.x - "
+ "float(%s.z)));\n}\nhalf y;\n@if (highp) {\n y = max(half(%s.y - "
+ "sk_FragCoord.y), half(sk_FragCoord.y - %s.w));\n} else {\n y = "
+ "max(half(float(%s.y) - sk_FragCoord.y), half(sk_FragCoord.y - "
+ "float(%s.w)));\n}\nhalf xCoverage = sample(%s, float2(half2(x * %s, "
+ "0.5))).%s.w;\nhalf yCoverage = sample(%s, flo",
(highp ? "true" : "false"),
rectFVar.isValid() ? args.fUniformHandler->getUniformCStr(rectFVar) : "float4(0)",
rectFVar.isValid() ? args.fUniformHandler->getUniformCStr(rectFVar) : "float4(0)",
@@ -57,13 +58,14 @@
rectFVar.isValid() ? args.fUniformHandler->getUniformCStr(rectFVar) : "float4(0)",
rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)",
rectHVar.isValid() ? args.fUniformHandler->getUniformCStr(rectHVar) : "half4(0)",
- args.fUniformHandler->getUniformCStr(sigmaVar));
+ fragBuilder->getProgramBuilder()->samplerVariable(args.fTexSamplers[0]),
+ args.fUniformHandler->getUniformCStr(invProfileWidthVar),
+ fragBuilder->getProgramBuilder()->samplerSwizzle(args.fTexSamplers[0]).c_str(),
+ fragBuilder->getProgramBuilder()->samplerVariable(args.fTexSamplers[0]));
fragBuilder->codeAppendf(
- ");\nhalf x2 = x * x;\nhalf x3 = x2 * x;\nhalf x5 = x2 * x3;\n\n\n\n\nhalf "
- "xCoverage = ((0.73482197523117065 * x + -0.31337600946426392 * x3) + "
- "0.060916900634765625 * x5) + 0.5;\nhalf y2 = y * y;\nhalf y3 = y2 * y;\nhalf y5 = "
- "y2 * y3;\nhalf yCoverage = ((0.73482197523117065 * y + -0.31337600946426392 * y3) "
- "+ 0.060916900634765625 * y5) + 0.5;\n%s = (%s * xCoverage) * yCoverage;\n",
+ "at2(half2(y * %s, 0.5))).%s.w;\n%s = (%s * xCoverage) * yCoverage;\n",
+ args.fUniformHandler->getUniformCStr(invProfileWidthVar),
+ fragBuilder->getProgramBuilder()->samplerSwizzle(args.fTexSamplers[0]).c_str(),
args.fOutputColor, args.fInputColor);
}
@@ -71,15 +73,18 @@
void onSetData(const GrGLSLProgramDataManager& pdman,
const GrFragmentProcessor& _proc) override {
const GrRectBlurEffect& _outer = _proc.cast<GrRectBlurEffect>();
- { pdman.set1f(sigmaVar, (_outer.sigma)); }
+ { pdman.set1f(invProfileWidthVar, (_outer.invProfileWidth)); }
auto rect = _outer.rect;
(void)rect;
UniformHandle& rectF = rectFVar;
(void)rectF;
UniformHandle& rectH = rectHVar;
(void)rectH;
- UniformHandle& sigma = sigmaVar;
- (void)sigma;
+ GrSurfaceProxy& blurProfileProxy = *_outer.textureSampler(0).proxy();
+ GrTexture& blurProfile = *blurProfileProxy.peekTexture();
+ (void)blurProfile;
+ UniformHandle& invProfileWidth = invProfileWidthVar;
+ (void)invProfileWidth;
float r[]{rect.fLeft, rect.fTop, rect.fRight, rect.fBottom};
pdman.set4fv(highp ? rectF : rectH, 1, r);
@@ -87,7 +92,7 @@
bool highp = false;
UniformHandle rectFVar;
UniformHandle rectHVar;
- UniformHandle sigmaVar;
+ UniformHandle invProfileWidthVar;
};
GrGLSLFragmentProcessor* GrRectBlurEffect::onCreateGLSLInstance() const {
return new GrGLSLRectBlurEffect();
@@ -103,16 +108,23 @@
const GrRectBlurEffect& that = other.cast<GrRectBlurEffect>();
(void)that;
if (rect != that.rect) return false;
- if (sigma != that.sigma) return false;
+ if (blurProfile != that.blurProfile) return false;
+ if (invProfileWidth != that.invProfileWidth) return false;
return true;
}
GrRectBlurEffect::GrRectBlurEffect(const GrRectBlurEffect& src)
: INHERITED(kGrRectBlurEffect_ClassID, src.optimizationFlags())
, rect(src.rect)
- , sigma(src.sigma) {}
+ , blurProfile(src.blurProfile)
+ , invProfileWidth(src.invProfileWidth) {
+ this->setTextureSamplerCnt(1);
+}
std::unique_ptr<GrFragmentProcessor> GrRectBlurEffect::clone() const {
return std::unique_ptr<GrFragmentProcessor>(new GrRectBlurEffect(*this));
}
+const GrFragmentProcessor::TextureSampler& GrRectBlurEffect::onTextureSampler(int index) const {
+ return IthTextureSampler(index, blurProfile);
+}
GR_DEFINE_FRAGMENT_PROCESSOR_TEST(GrRectBlurEffect);
#if GR_TEST_UTILS
std::unique_ptr<GrFragmentProcessor> GrRectBlurEffect::TestCreate(GrProcessorTestData* data) {
diff --git a/src/gpu/effects/generated/GrRectBlurEffect.h b/src/gpu/effects/generated/GrRectBlurEffect.h
index 2d14df4..1b0e183 100644
--- a/src/gpu/effects/generated/GrRectBlurEffect.h
+++ b/src/gpu/effects/generated/GrRectBlurEffect.h
@@ -14,6 +14,7 @@
#include "include/core/SkScalar.h"
#include "src/core/SkBlurMask.h"
+#include "src/core/SkMathPriv.h"
#include "src/gpu/GrProxyProvider.h"
#include "src/gpu/GrShaderCaps.h"
@@ -21,6 +22,44 @@
#include "src/gpu/GrFragmentProcessor.h"
class GrRectBlurEffect : public GrFragmentProcessor {
public:
+ static sk_sp<GrTextureProxy> CreateBlurProfileTexture(GrProxyProvider* proxyProvider,
+ float sigma) {
+ // The "profile" we are calculating is the integral of a Gaussian with 'sigma' and a half
+ // plane. All such profiles are just scales of each other. So all we really care about is
+ // having enough resolution so that the linear interpolation done in texture lookup doesn't
+ // introduce noticeable artifacts. SkBlurMask::ComputeBlurProfile() produces profiles with
+ // ceil(6 * sigma) entries. We conservatively choose to have 2 texels for each dst pixel.
+ int minProfileWidth = 2 * sk_float_ceil2int(6 * sigma);
+ // Bin by powers of 2 with a minimum so we get good profile reuse (remember we can just
+ // scale the texture coords to span the larger profile over a 6 sigma distance).
+ int profileWidth = SkTMax(SkNextPow2(minProfileWidth), 32);
+
+ static const GrUniqueKey::Domain kDomain = GrUniqueKey::GenerateDomain();
+ GrUniqueKey key;
+ GrUniqueKey::Builder builder(&key, kDomain, 1, "Rect Blur Mask");
+ builder[0] = profileWidth;
+ builder.finish();
+
+ sk_sp<GrTextureProxy> blurProfile(proxyProvider->findOrCreateProxyByUniqueKey(
+ key, GrColorType::kAlpha_8, kTopLeft_GrSurfaceOrigin));
+ if (!blurProfile) {
+ SkBitmap bitmap;
+ if (!bitmap.tryAllocPixels(SkImageInfo::MakeA8(profileWidth, 1))) {
+ return nullptr;
+ }
+ SkBlurMask::ComputeBlurProfile(bitmap.getAddr8(0, 0), profileWidth, profileWidth / 6.f);
+ bitmap.setImmutable();
+ blurProfile = proxyProvider->createProxyFromBitmap(bitmap, GrMipMapped::kNo);
+ if (!blurProfile) {
+ return nullptr;
+ }
+ SkASSERT(blurProfile->origin() == kTopLeft_GrSurfaceOrigin);
+ proxyProvider->assignUniqueKeyToProxy(key, blurProfile.get());
+ }
+
+ return blurProfile;
+ }
+
static std::unique_ptr<GrFragmentProcessor> Make(GrProxyProvider* proxyProvider,
const GrShaderCaps& caps, const SkRect& rect,
float sigma) {
@@ -34,11 +73,6 @@
return nullptr;
}
}
- // Sigma is always a half.
- SkASSERT(sigma > 0);
- if (sigma > 16000.f) {
- return nullptr;
- }
if (doubleProfileSize >= (float)rect.width() || doubleProfileSize >= (float)rect.height()) {
// if the blur sigma is too large so the gaussian overlaps the whole
@@ -46,23 +80,44 @@
return nullptr;
}
- return std::unique_ptr<GrFragmentProcessor>(new GrRectBlurEffect(rect, sigma));
+ auto profile = CreateBlurProfileTexture(proxyProvider, sigma);
+ if (!profile) {
+ return nullptr;
+ }
+ // The profile is calculated such that the midpoint is at the rect's edge. To simplify
+ // calculating texture coords in the shader, we inset the rect such that the profile
+ // can be used with one end point aligned to the edges of the rect uniform. The texture
+ // coords should be scaled such that the profile is sampled over a 6 sigma range so inset
+ // by 3 sigma.
+ float halfW = 3.f * sigma;
+ auto insetR = rect.makeInset(halfW, halfW);
+ // inverse of the width over which the profile texture should be interpolated outward from
+ // the inset rect.
+ float invWidth = 1.f / (2 * halfW);
+ return std::unique_ptr<GrFragmentProcessor>(new GrRectBlurEffect(
+ insetR, std::move(profile), invWidth, GrSamplerState::ClampBilerp()));
}
GrRectBlurEffect(const GrRectBlurEffect& src);
std::unique_ptr<GrFragmentProcessor> clone() const override;
const char* name() const override { return "RectBlurEffect"; }
SkRect rect;
- float sigma;
+ TextureSampler blurProfile;
+ float invProfileWidth;
private:
- GrRectBlurEffect(SkRect rect, float sigma)
+ GrRectBlurEffect(SkRect rect, sk_sp<GrTextureProxy> blurProfile, float invProfileWidth,
+ GrSamplerState samplerParams)
: INHERITED(kGrRectBlurEffect_ClassID,
(OptimizationFlags)kCompatibleWithCoverageAsAlpha_OptimizationFlag)
, rect(rect)
- , sigma(sigma) {}
+ , blurProfile(std::move(blurProfile), samplerParams)
+ , invProfileWidth(invProfileWidth) {
+ this->setTextureSamplerCnt(1);
+ }
GrGLSLFragmentProcessor* onCreateGLSLInstance() const override;
void onGetGLSLProcessorKey(const GrShaderCaps&, GrProcessorKeyBuilder*) const override;
bool onIsEqual(const GrFragmentProcessor&) const override;
+ const TextureSampler& onTextureSampler(int) const override;
GR_DECLARE_FRAGMENT_PROCESSOR_TEST
typedef GrFragmentProcessor INHERITED;
};