Implement GPU path for matrix convolution. Note that when not convolving alpha,
the premultiplying is done less efficiently than in the raster path: it's
done on each texture access, rather than as a pre-processing pass. This was
so I could do the filter as a single custom stage; will try the optimization
separately.
This implementation gives a ~30X speedup on the GPU results for the
matrixconvolution bench (~10X due to the GPU, and ~3X due to texture
uploads/readback removal).
Note: this changes the matrixconvolution for the software path as well, so
it will likely break the bots until that test is rebaselined.
Review URL: https://codereview.appspot.com/6585069/
git-svn-id: http://skia.googlecode.com/svn/trunk@5809 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/src/effects/SkMatrixConvolutionImageFilter.cpp b/src/effects/SkMatrixConvolutionImageFilter.cpp
index 1c6f980..6b8a06d 100644
--- a/src/effects/SkMatrixConvolutionImageFilter.cpp
+++ b/src/effects/SkMatrixConvolutionImageFilter.cpp
@@ -12,6 +12,10 @@
#include "SkRect.h"
#include "SkUnPreMultiply.h"
+#if SK_SUPPORT_GPU
+#include "gl/GrGLProgramStage.h"
+#endif
+
SkMatrixConvolutionImageFilter::SkMatrixConvolutionImageFilter(const SkISize& kernelSize, const SkScalar* kernel, SkScalar gain, SkScalar bias, const SkIPoint& target, TileMode tileMode, bool convolveAlpha, SkImageFilter* input)
: INHERITED(input),
fKernelSize(kernelSize),
@@ -23,6 +27,7 @@
uint32_t size = fKernelSize.fWidth * fKernelSize.fHeight;
fKernel = SkNEW_ARRAY(SkScalar, size);
memcpy(fKernel, kernel, size * sizeof(SkScalar));
+ SkASSERT(kernelSize.fWidth >= 1 && kernelSize.fHeight >= 1);
SkASSERT(target.fX >= 0 && target.fX < kernelSize.fWidth);
SkASSERT(target.fY >= 0 && target.fY < kernelSize.fHeight);
}
@@ -121,11 +126,12 @@
}
int a = convolveAlpha
? SkClampMax(SkScalarFloorToInt(SkScalarMul(sumA, fGain) + fBias), 255)
- : SkGetPackedA32(PixelFetcher::fetch(src, x, y));
+ : 255;
int r = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumR, fGain) + fBias), a);
int g = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumG, fGain) + fBias), a);
int b = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumB, fGain) + fBias), a);
if (!convolveAlpha) {
+ a = SkGetPackedA32(PixelFetcher::fetch(src, x, y));
*dptr++ = SkPreMultiplyARGB(a, r, g, b);
} else {
*dptr++ = SkPackARGB32(a, r, g, b);
@@ -225,3 +231,314 @@
filterBorderPixels(src, result, bottom);
return true;
}
+
+#if SK_SUPPORT_GPU
+
+///////////////////////////////////////////////////////////////////////////////
+
+class GrGLMatrixConvolutionEffect;
+
+class GrMatrixConvolutionEffect : public GrSingleTextureEffect {
+public:
+ typedef SkMatrixConvolutionImageFilter::TileMode TileMode;
+ GrMatrixConvolutionEffect(GrTexture*,
+ const SkISize& kernelSize,
+ const SkScalar* kernel,
+ SkScalar gain,
+ SkScalar bias,
+ const SkIPoint& target,
+ TileMode tileMode,
+ bool convolveAlpha);
+ virtual ~GrMatrixConvolutionEffect();
+
+ static const char* Name() { return "MatrixConvolution"; }
+ const SkISize& kernelSize() const { return fKernelSize; }
+ const float* target() const { return fTarget; }
+ const float* kernel() const { return fKernel; }
+ float gain() const { return fGain; }
+ float bias() const { return fBias; }
+ TileMode tileMode() const { return fTileMode; }
+ bool convolveAlpha() const { return fConvolveAlpha; }
+
+ typedef GrGLMatrixConvolutionEffect GLProgramStage;
+
+ virtual const GrProgramStageFactory& getFactory() const SK_OVERRIDE;
+ virtual bool isEqual(const GrCustomStage&) const SK_OVERRIDE;
+
+private:
+ SkISize fKernelSize;
+ float *fKernel;
+ float fGain;
+ float fBias;
+ float fTarget[2];
+ TileMode fTileMode;
+ bool fConvolveAlpha;
+
+ GR_DECLARE_CUSTOM_STAGE_TEST;
+
+ typedef GrSingleTextureEffect INHERITED;
+};
+
+class GrGLMatrixConvolutionEffect : public GrGLProgramStage {
+public:
+ GrGLMatrixConvolutionEffect(const GrProgramStageFactory& factory,
+ const GrCustomStage& stage);
+ virtual void setupVariables(GrGLShaderBuilder* builder) SK_OVERRIDE;
+ virtual void emitVS(GrGLShaderBuilder* state,
+ const char* vertexCoords) SK_OVERRIDE {}
+ virtual void emitFS(GrGLShaderBuilder* state,
+ const char* outputColor,
+ const char* inputColor,
+ const TextureSamplerArray&) SK_OVERRIDE;
+
+ static inline StageKey GenKey(const GrCustomStage& s, const GrGLCaps& caps);
+
+ virtual void setData(const GrGLUniformManager&,
+ const GrCustomStage&,
+ const GrRenderTarget*,
+ int stageNum) SK_OVERRIDE;
+
+private:
+ typedef GrGLUniformManager::UniformHandle UniformHandle;
+ typedef SkMatrixConvolutionImageFilter::TileMode TileMode;
+ SkISize fKernelSize;
+ TileMode fTileMode;
+ bool fConvolveAlpha;
+
+ UniformHandle fKernelUni;
+ UniformHandle fImageIncrementUni;
+ UniformHandle fTargetUni;
+ UniformHandle fGainUni;
+ UniformHandle fBiasUni;
+};
+
+GrGLMatrixConvolutionEffect::GrGLMatrixConvolutionEffect(const GrProgramStageFactory& factory,
+ const GrCustomStage& stage)
+ : GrGLProgramStage(factory)
+ , fKernelUni(GrGLUniformManager::kInvalidUniformHandle)
+ , fImageIncrementUni(GrGLUniformManager::kInvalidUniformHandle)
+ , fTargetUni(GrGLUniformManager::kInvalidUniformHandle)
+ , fGainUni(GrGLUniformManager::kInvalidUniformHandle)
+ , fBiasUni(GrGLUniformManager::kInvalidUniformHandle) {
+ const GrMatrixConvolutionEffect& m = static_cast<const GrMatrixConvolutionEffect&>(stage);
+ fKernelSize = m.kernelSize();
+ fTileMode = m.tileMode();
+ fConvolveAlpha = m.convolveAlpha();
+}
+
+void GrGLMatrixConvolutionEffect::setupVariables(GrGLShaderBuilder* builder) {
+ fImageIncrementUni = builder->addUniform(GrGLShaderBuilder::kFragment_ShaderType,
+ kVec2f_GrSLType, "ImageIncrement");
+ fKernelUni = builder->addUniformArray(GrGLShaderBuilder::kFragment_ShaderType,
+ kFloat_GrSLType, "Kernel", fKernelSize.width() * fKernelSize.height());
+ fTargetUni = builder->addUniform(GrGLShaderBuilder::kFragment_ShaderType,
+ kVec2f_GrSLType, "Target");
+ fGainUni = builder->addUniform(GrGLShaderBuilder::kFragment_ShaderType,
+ kFloat_GrSLType, "Gain");
+ fBiasUni = builder->addUniform(GrGLShaderBuilder::kFragment_ShaderType,
+ kFloat_GrSLType, "Bias");
+}
+
+static void appendTextureLookup(GrGLShaderBuilder* builder,
+ const GrGLShaderBuilder::TextureSampler& sampler,
+ const char* coord,
+ SkMatrixConvolutionImageFilter::TileMode tileMode) {
+ SkString* code = &builder->fFSCode;
+ SkString clampedCoord;
+ switch (tileMode) {
+ case SkMatrixConvolutionImageFilter::kClamp_TileMode:
+ clampedCoord.printf("clamp(%s, 0.0, 1.0)", coord);
+ coord = clampedCoord.c_str();
+ break;
+ case SkMatrixConvolutionImageFilter::kRepeat_TileMode:
+ clampedCoord.printf("fract(%s)", coord);
+ coord = clampedCoord.c_str();
+ break;
+ case SkMatrixConvolutionImageFilter::kClampToBlack_TileMode:
+ code->appendf("clamp(%s, 0.0, 1.0) != %s ? vec4(0, 0, 0, 0) : ", coord, coord);
+ break;
+ }
+ builder->appendTextureLookup(code, sampler, coord);
+}
+
+void GrGLMatrixConvolutionEffect::emitFS(GrGLShaderBuilder* builder,
+ const char* outputColor,
+ const char* inputColor,
+ const TextureSamplerArray& samplers) {
+ SkString* code = &builder->fFSCode;
+
+ const char* target = builder->getUniformCStr(fTargetUni);
+ const char* imgInc = builder->getUniformCStr(fImageIncrementUni);
+ const char* kernel = builder->getUniformCStr(fKernelUni);
+ const char* gain = builder->getUniformCStr(fGainUni);
+ const char* bias = builder->getUniformCStr(fBiasUni);
+ int kWidth = fKernelSize.width();
+ int kHeight = fKernelSize.height();
+
+ code->appendf("\t\tvec4 sum = vec4(0, 0, 0, 0);\n");
+ code->appendf("\t\tvec2 coord = %s - %s * %s;\n",
+ builder->defaultTexCoordsName(), target, imgInc);
+ code->appendf("\t\tfor (int y = 0; y < %d; y++) {\n", kHeight);
+ code->appendf("\t\t\tfor (int x = 0; x < %d; x++) {\n", kWidth);
+ code->appendf("\t\t\t\tfloat k = %s[y * %d + x];\n", kernel, kWidth);
+ code->appendf("\t\t\t\tvec2 coord2 = coord + vec2(x, y) * %s;\n", imgInc);
+ code->appendf("\t\t\t\tvec4 c = ");
+ appendTextureLookup(builder, samplers[0], "coord2", fTileMode);
+ code->appendf(";\n");
+ if (!fConvolveAlpha) {
+ code->appendf("\t\t\t\tc.rgb /= c.a;\n");
+ }
+ code->appendf("\t\t\t\tsum += c * k;\n");
+ code->appendf("\t\t\t}\n");
+ code->appendf("\t\t}\n");
+ if (fConvolveAlpha) {
+ code->appendf("\t\t%s = sum * %s + %s;\n", outputColor, gain, bias);
+ code->appendf("\t\t%s.rgb = clamp(%s.rgb, 0.0, %s.a);\n", outputColor, outputColor, outputColor);
+ } else {
+ code->appendf("\t\t%s.a = (", outputColor);
+ appendTextureLookup(builder, samplers[0], builder->defaultTexCoordsName(), fTileMode);
+ code->appendf(").a;\n");
+ code->appendf("\t\t%s.rgb = sum.rgb * %s + %s;\n", outputColor, gain, bias);
+ code->appendf("\t\t%s.rgb *= %s.a;\n", outputColor, outputColor);
+ }
+}
+
+namespace {
+
+int encodeXY(int x, int y) {
+ SkASSERT(x >= 1 && y >= 1 && x * y <= 32);
+ if (y < x)
+ return 0x40 | encodeXY(y, x);
+ else
+ return (0x40 >> x) | (y - x);
+}
+
+};
+
+GrGLProgramStage::StageKey GrGLMatrixConvolutionEffect::GenKey(const GrCustomStage& s,
+ const GrGLCaps& caps) {
+ const GrMatrixConvolutionEffect& m = static_cast<const GrMatrixConvolutionEffect&>(s);
+ StageKey key = encodeXY(m.kernelSize().width(), m.kernelSize().height());
+ key |= m.tileMode() << 7;
+ key |= m.convolveAlpha() ? 1 << 9 : 0;
+ return key;
+}
+
+void GrGLMatrixConvolutionEffect::setData(const GrGLUniformManager& uman,
+ const GrCustomStage& data,
+ const GrRenderTarget*,
+ int stageNum) {
+ const GrMatrixConvolutionEffect& effect =
+ static_cast<const GrMatrixConvolutionEffect&>(data);
+ GrGLTexture& texture =
+ *static_cast<GrGLTexture*>(data.texture(0));
+ // the code we generated was for a specific kernel size
+ GrAssert(effect.kernelSize() == fKernelSize);
+ GrAssert(effect.tileMode() == fTileMode);
+ float imageIncrement[2];
+ imageIncrement[0] = 1.0f / texture.width();
+ imageIncrement[1] = 1.0f / texture.height();
+ uman.set2fv(fImageIncrementUni, 0, 1, imageIncrement);
+ uman.set2fv(fTargetUni, 0, 1, effect.target());
+ uman.set1fv(fKernelUni, 0, fKernelSize.width() * fKernelSize.height(), effect.kernel());
+ uman.set1f(fGainUni, effect.gain());
+ uman.set1f(fBiasUni, effect.bias());
+}
+
+GrMatrixConvolutionEffect::GrMatrixConvolutionEffect(GrTexture* texture,
+ const SkISize& kernelSize,
+ const SkScalar* kernel,
+ SkScalar gain,
+ SkScalar bias,
+ const SkIPoint& target,
+ TileMode tileMode,
+ bool convolveAlpha)
+ : INHERITED(texture),
+ fKernelSize(kernelSize),
+ fGain(SkScalarToFloat(gain)),
+ fBias(SkScalarToFloat(bias) / 255.0f),
+ fTileMode(tileMode),
+ fConvolveAlpha(convolveAlpha) {
+ fKernel = new float[kernelSize.width() * kernelSize.height()];
+ for (int i = 0; i < kernelSize.width() * kernelSize.height(); i++) {
+ fKernel[i] = SkScalarToFloat(kernel[i]);
+ }
+ fTarget[0] = target.x();
+ fTarget[1] = target.y();
+}
+
+GrMatrixConvolutionEffect::~GrMatrixConvolutionEffect() {
+ delete[] fKernel;
+}
+
+const GrProgramStageFactory& GrMatrixConvolutionEffect::getFactory() const {
+ return GrTProgramStageFactory<GrMatrixConvolutionEffect>::getInstance();
+}
+
+bool GrMatrixConvolutionEffect::isEqual(const GrCustomStage& sBase) const {
+ const GrMatrixConvolutionEffect& s =
+ static_cast<const GrMatrixConvolutionEffect&>(sBase);
+ return INHERITED::isEqual(sBase) &&
+ fKernelSize == s.kernelSize() &&
+ !memcmp(fKernel, s.kernel(), fKernelSize.width() * fKernelSize.height() * sizeof(float)) &&
+ fGain == s.gain() &&
+ fBias == s.bias() &&
+ fTarget == s.target() &&
+ fTileMode == s.tileMode() &&
+ fConvolveAlpha == s.convolveAlpha();
+}
+
+GR_DEFINE_CUSTOM_STAGE_TEST(GrMatrixConvolutionEffect);
+
+// A little bit less than the minimum # uniforms required by DX9SM2 (32).
+// Allows for a 5x5 kernel (or 25x1, for that matter).
+#define MAX_KERNEL_SIZE 25
+
+GrCustomStage* GrMatrixConvolutionEffect::TestCreate(SkRandom* random,
+ GrContext* context,
+ GrTexture* textures[]) {
+ int texIdx = random->nextBool() ? GrCustomStageUnitTest::kSkiaPMTextureIdx :
+ GrCustomStageUnitTest::kAlphaTextureIdx;
+ int width = random->nextRangeU(1, MAX_KERNEL_SIZE);
+ int height = random->nextRangeU(1, MAX_KERNEL_SIZE / width);
+ SkISize kernelSize = SkISize::Make(width, height);
+ SkScalar* kernel = new SkScalar[width * height];
+ for (int i = 0; i < width * height; i++) {
+ kernel[i] = random->nextSScalar1();
+ }
+ SkScalar gain = random->nextSScalar1();
+ SkScalar bias = random->nextSScalar1();
+ SkIPoint target = SkIPoint::Make(random->nextRangeU(0, kernelSize.width()),
+ random->nextRangeU(0, kernelSize.height()));
+ TileMode tileMode = static_cast<TileMode>(random->nextRangeU(0, 2));
+ bool convolveAlpha = random->nextBool();
+ return SkNEW_ARGS(GrMatrixConvolutionEffect, (textures[texIdx],
+ kernelSize,
+ kernel,
+ gain,
+ bias,
+ target,
+ tileMode,
+ convolveAlpha));
+
+}
+
+bool SkMatrixConvolutionImageFilter::asNewCustomStage(GrCustomStage** stage,
+ GrTexture* texture) const {
+ bool ok = fKernelSize.width() * fKernelSize.height() <= MAX_KERNEL_SIZE;
+ if (ok && stage) {
+ *stage = SkNEW_ARGS(GrMatrixConvolutionEffect, (texture,
+ fKernelSize,
+ fKernel,
+ fGain,
+ fBias,
+ fTarget,
+ fTileMode,
+ fConvolveAlpha));
+ }
+ return ok;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+#endif