Do premul and r/b swap conversions in a custom effect
Review URL: https://codereview.appspot.com/6473060/
git-svn-id: http://skia.googlecode.com/svn/trunk@5284 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/src/gpu/GrContext.cpp b/src/gpu/GrContext.cpp
index d1fdb18..2b5014d 100644
--- a/src/gpu/GrContext.cpp
+++ b/src/gpu/GrContext.cpp
@@ -11,6 +11,7 @@
#include "effects/GrConvolutionEffect.h"
#include "effects/GrSingleTextureEffect.h"
+#include "effects/GrConfigConversionEffect.h"
#include "GrBufferAllocPool.h"
#include "GrGpu.h"
@@ -1235,6 +1236,25 @@
return false;
}
}
+
+// It returns a configuration with where the byte position of the R & B components are swapped in
+// relation to the input config. This should only be called with the result of
+// grconfig_to_config8888 as it will fail for other configs.
+SkCanvas::Config8888 swap_config8888_red_and_blue(SkCanvas::Config8888 config8888) {
+ switch (config8888) {
+ case SkCanvas::kBGRA_Premul_Config8888:
+ return SkCanvas::kRGBA_Premul_Config8888;
+ case SkCanvas::kBGRA_Unpremul_Config8888:
+ return SkCanvas::kRGBA_Unpremul_Config8888;
+ case SkCanvas::kRGBA_Premul_Config8888:
+ return SkCanvas::kBGRA_Premul_Config8888;
+ case SkCanvas::kRGBA_Unpremul_Config8888:
+ return SkCanvas::kBGRA_Unpremul_Config8888;
+ default:
+ GrCrash("Unexpected input");
+ return SkCanvas::kBGRA_Unpremul_Config8888;;
+ }
+}
}
bool GrContext::readRenderTargetPixels(GrRenderTarget* target,
@@ -1255,67 +1275,53 @@
this->flush();
}
- if ((kUnpremul_PixelOpsFlag & flags) &&
- !fGpu->canPreserveReadWriteUnpremulPixels()) {
+ // Determine which conversions have to be applied: flipY, swapRAnd, and/or unpremul.
- SkCanvas::Config8888 srcConfig8888, dstConfig8888;
- if (!grconfig_to_config8888(target->config(), false, &srcConfig8888) ||
- !grconfig_to_config8888(config, true, &dstConfig8888)) {
- return false;
- }
- // do read back using target's own config
- this->readRenderTargetPixels(target,
- left, top,
- width, height,
- target->config(),
- buffer, rowBytes,
- kDontFlush_PixelOpsFlag); // we already flushed
- // sw convert the pixels to unpremul config
- uint32_t* pixels = reinterpret_cast<uint32_t*>(buffer);
- SkConvertConfig8888Pixels(pixels, rowBytes, dstConfig8888,
- pixels, rowBytes, srcConfig8888,
- width, height);
- return true;
- }
-
- GrTexture* src = target->asTexture();
- bool swapRAndB = NULL != src &&
- fGpu->preferredReadPixelsConfig(config) ==
- GrPixelConfigSwapRAndB(config);
-
- bool flipY = NULL != src &&
- fGpu->readPixelsWillPayForYFlip(target, left, top,
+ // If fGpu->readPixels would incur a y-flip cost then we will read the pixels upside down. We'll
+ // either do the flipY by drawing into a scratch with a matrix or on the cpu after the read.
+ bool flipY = fGpu->readPixelsWillPayForYFlip(target, left, top,
width, height, config,
rowBytes);
+ bool swapRAndB = fGpu->preferredReadPixelsConfig(config) == GrPixelConfigSwapRAndB(config);
+
bool unpremul = SkToBool(kUnpremul_PixelOpsFlag & flags);
- if (NULL == src && unpremul) {
- // we should fallback to cpu conversion here. This could happen when
- // we were given an external render target by the client that is not
- // also a texture (e.g. FBO 0 in GL)
+ // flipY will get set to false when it is handled below using a scratch. However, in that case
+ // we still want to do the read upside down.
+ bool readUpsideDown = flipY;
+
+ if (unpremul && kRGBA_8888_GrPixelConfig != config && kBGRA_8888_GrPixelConfig != config) {
+ // The unpremul flag is only allowed for these two configs.
return false;
}
- // we draw to a scratch texture if any of these conversion are applied
+
+ GrPixelConfig readConfig;
+ if (swapRAndB) {
+ readConfig = GrPixelConfigSwapRAndB(config);
+ GrAssert(kUnknown_GrPixelConfig != config);
+ } else {
+ readConfig = config;
+ }
+
+ // If the src is a texture and we would have to do conversions after read pixels, we instead
+ // do the conversions by drawing the src to a scratch texture. If we handle any of the
+ // conversions in the draw we set the corresponding bool to false so that we don't reapply it
+ // on the read back pixels.
+ GrTexture* src = target->asTexture();
GrAutoScratchTexture ast;
- if (flipY || swapRAndB || unpremul) {
- GrAssert(NULL != src);
- if (swapRAndB) {
- config = GrPixelConfigSwapRAndB(config);
- GrAssert(kUnknown_GrPixelConfig != config);
- }
- // Make the scratch a render target because we don't have a robust
- // readTexturePixels as of yet (it calls this function).
+ if (NULL != src && (swapRAndB || unpremul || flipY)) {
+ // Make the scratch a render target because we don't have a robust readTexturePixels as of
+ // yet. It calls this function.
GrTextureDesc desc;
desc.fFlags = kRenderTarget_GrTextureFlagBit;
desc.fWidth = width;
desc.fHeight = height;
- desc.fConfig = config;
+ desc.fConfig = readConfig;
- // When a full readback is faster than a partial we could always make
- // the scratch exactly match the passed rect. However, if we see many
- // different size rectangles we will trash our texture cache and pay the
- // cost of creating and destroying many textures. So, we only request
- // an exact match when the caller is reading an entire RT.
+ // When a full readback is faster than a partial we could always make the scratch exactly
+ // match the passed rect. However, if we see many different size rectangles we will trash
+ // our texture cache and pay the cost of creating and destroying many textures. So, we only
+ // request an exact match when the caller is reading an entire RT.
ScratchTexMatch match = kApprox_ScratchTexMatch;
if (0 == left &&
0 == top &&
@@ -1326,42 +1332,104 @@
}
ast.set(this, desc, match);
GrTexture* texture = ast.texture();
- if (!texture) {
- return false;
- }
- target = texture->asRenderTarget();
- GrAssert(NULL != target);
+ if (texture) {
+ SkAutoTUnref<GrCustomStage> stage;
+ if (unpremul) {
+ stage.reset(this->createPMToUPMEffect(src, swapRAndB));
+ }
+ // If we failed to create a PM->UPM effect and have no other conversions to perform then
+ // there is no longer any point to using the scratch.
+ if (NULL != stage || flipY || swapRAndB) {
+ if (NULL == stage) {
+ stage.reset(GrConfigConversionEffect::Create(src, swapRAndB));
+ GrAssert(NULL != stage);
+ } else {
+ unpremul = false; // we will handle the UPM conversion in the draw
+ }
+ swapRAndB = false; // we will handle the swap in the draw.
- GrDrawTarget::AutoStateRestore asr(fGpu,
- GrDrawTarget::kReset_ASRInit);
- GrDrawState* drawState = fGpu->drawState();
- drawState->setRenderTarget(target);
-
- if (unpremul) {
- drawState->enableState(GrDrawState::kUnpremultiply_StageBit);
+ GrDrawTarget::AutoStateRestore asr(fGpu, GrDrawTarget::kReset_ASRInit);
+ GrDrawState* drawState = fGpu->drawState();
+ drawState->setRenderTarget(texture->asRenderTarget());
+ GrMatrix matrix;
+ if (flipY) {
+ matrix.setTranslate(SK_Scalar1 * left,
+ SK_Scalar1 * (top + height));
+ matrix.set(GrMatrix::kMScaleY, -GR_Scalar1);
+ flipY = false; // the y flip will be handled in the draw
+ } else {
+ matrix.setTranslate(SK_Scalar1 *left, SK_Scalar1 *top);
+ }
+ matrix.postIDiv(src->width(), src->height());
+ drawState->sampler(0)->reset(matrix);
+ drawState->sampler(0)->setCustomStage(stage);
+ GrRect rect = GrRect::MakeWH(GrIntToScalar(width), GrIntToScalar(height));
+ fGpu->drawSimpleRect(rect, NULL);
+ // we want to read back from the scratch's origin
+ left = 0;
+ top = 0;
+ target = texture->asRenderTarget();
+ }
}
-
- GrMatrix matrix;
- if (flipY) {
- matrix.setTranslate(SK_Scalar1 * left,
- SK_Scalar1 * (top + height));
- matrix.set(GrMatrix::kMScaleY, -GR_Scalar1);
- } else {
- matrix.setTranslate(SK_Scalar1 *left, SK_Scalar1 *top);
- }
- matrix.postIDiv(src->width(), src->height());
- drawState->sampler(0)->reset(matrix);
- drawState->sampler(0)->setRAndBSwap(swapRAndB);
- drawState->createTextureEffect(0, src);
- GrRect rect;
- rect.setXYWH(0, 0, SK_Scalar1 * width, SK_Scalar1 * height);
- fGpu->drawSimpleRect(rect, NULL);
- left = 0;
- top = 0;
}
- return fGpu->readPixels(target,
- left, top, width, height,
- config, buffer, rowBytes, flipY);
+ if (!fGpu->readPixels(target,
+ left, top, width, height,
+ readConfig, buffer, rowBytes, readUpsideDown)) {
+ return false;
+ }
+ // Perform any conversions we weren't able to perfom using a scratch texture.
+ if (unpremul || swapRAndB || flipY) {
+ SkCanvas::Config8888 srcC8888;
+ SkCanvas::Config8888 dstC8888;
+ bool c8888IsValid = grconfig_to_config8888(config, false, &srcC8888);
+ grconfig_to_config8888(config, unpremul, &dstC8888);
+ if (swapRAndB) {
+ GrAssert(c8888IsValid); // we should only do r/b swap on 8888 configs
+ srcC8888 = swap_config8888_red_and_blue(srcC8888);
+ }
+ if (flipY) {
+ size_t tightRB = width * GrBytesPerPixel(config);
+ if (0 == rowBytes) {
+ rowBytes = tightRB;
+ }
+ SkAutoSTMalloc<256, uint8_t> tempRow(tightRB);
+ intptr_t top = reinterpret_cast<intptr_t>(buffer);
+ intptr_t bot = top + (height - 1) * rowBytes;
+ while (top < bot) {
+ uint32_t* t = reinterpret_cast<uint32_t*>(top);
+ uint32_t* b = reinterpret_cast<uint32_t*>(bot);
+ uint32_t* temp = reinterpret_cast<uint32_t*>(tempRow.get());
+ memcpy(temp, t, tightRB);
+ if (c8888IsValid) {
+ SkConvertConfig8888Pixels(t, tightRB, dstC8888,
+ b, tightRB, srcC8888,
+ width, 1);
+ SkConvertConfig8888Pixels(b, tightRB, dstC8888,
+ temp, tightRB, srcC8888,
+ width, 1);
+ } else {
+ memcpy(t, b, tightRB);
+ memcpy(b, temp, tightRB);
+ }
+ top += rowBytes;
+ bot -= rowBytes;
+ }
+ // The above loop does nothing on the middle row when height is odd.
+ if (top == bot && c8888IsValid && dstC8888 != srcC8888) {
+ uint32_t* mid = reinterpret_cast<uint32_t*>(top);
+ SkConvertConfig8888Pixels(mid, tightRB, dstC8888, mid, tightRB, srcC8888, width, 1);
+ }
+ } else {
+ // if we aren't flipping Y then we have no reason to be here other than doing
+ // conversions for 8888 (r/b swap or upm).
+ GrAssert(c8888IsValid);
+ uint32_t* b32 = reinterpret_cast<uint32_t*>(buffer);
+ SkConvertConfig8888Pixels(b32, rowBytes, dstC8888,
+ b32, rowBytes, srcC8888,
+ width, height);
+ }
+ }
+ return true;
}
void GrContext::resolveRenderTarget(GrRenderTarget* target) {
@@ -1415,17 +1483,21 @@
}
}
- // TODO: when underlying api has a direct way to do this we should use it
- // (e.g. glDrawPixels on desktop GL).
+ // TODO: when underlying api has a direct way to do this we should use it (e.g. glDrawPixels on
+ // desktop GL).
+
+ // We will always call some form of writeTexturePixels and we will pass our flags on to it.
+ // Thus, we don't perform a flush here since that call will do it (if the kNoFlush flag isn't
+ // set.)
// If the RT is also a texture and we don't have to premultiply then take the texture path.
// We expect to be at least as fast or faster since it doesn't use an intermediate texture as
// we do below.
#if !GR_MAC_BUILD
- // At least some drivers on the Mac get confused when glTexImage2D is called
- // on a texture attached to an FBO. The FBO still sees the old image. TODO:
- // determine what OS versions and/or HW is affected.
+ // At least some drivers on the Mac get confused when glTexImage2D is called on a texture
+ // attached to an FBO. The FBO still sees the old image. TODO: determine what OS versions and/or
+ // HW is affected.
if (NULL != target->asTexture() && !(kUnpremul_PixelOpsFlag & flags)) {
this->writeTexturePixels(target->asTexture(),
left, top, width, height,
@@ -1433,48 +1505,59 @@
return;
}
#endif
- if ((kUnpremul_PixelOpsFlag & flags) &&
- !fGpu->canPreserveReadWriteUnpremulPixels()) {
- SkCanvas::Config8888 srcConfig8888, dstConfig8888;
- if (!grconfig_to_config8888(config, true, &srcConfig8888) ||
- !grconfig_to_config8888(target->config(), false, &dstConfig8888)) {
- return;
- }
- // allocate a tmp buffer and sw convert the pixels to premul
- SkAutoSTMalloc<128 * 128, uint32_t> tmpPixels(width * height);
- const uint32_t* src = reinterpret_cast<const uint32_t*>(buffer);
- SkConvertConfig8888Pixels(tmpPixels.get(), 4 * width, dstConfig8888,
- src, rowBytes, srcConfig8888,
- width, height);
- // upload the already premul pixels
- flags &= ~kUnpremul_PixelOpsFlag;
- this->writeRenderTargetPixels(target,
- left, top,
- width, height,
- target->config(),
- tmpPixels, 4 * width,
- flags);
- return;
- }
+ SkAutoTUnref<GrCustomStage> stage;
+ bool swapRAndB = (fGpu->preferredReadPixelsConfig(config) == GrPixelConfigSwapRAndB(config));
- bool swapRAndB = fGpu->preferredReadPixelsConfig(config) ==
- GrPixelConfigSwapRAndB(config);
+ GrPixelConfig textureConfig;
if (swapRAndB) {
- config = GrPixelConfigSwapRAndB(config);
+ textureConfig = GrPixelConfigSwapRAndB(config);
+ } else {
+ textureConfig = config;
}
GrTextureDesc desc;
desc.fWidth = width;
desc.fHeight = height;
- desc.fConfig = config;
-
+ desc.fConfig = textureConfig;
GrAutoScratchTexture ast(this, desc);
GrTexture* texture = ast.texture();
if (NULL == texture) {
return;
}
- this->writeTexturePixels(texture, 0, 0, width, height,
- config, buffer, rowBytes, flags & ~kUnpremul_PixelOpsFlag);
+ // allocate a tmp buffer and sw convert the pixels to premul
+ SkAutoSTMalloc<128 * 128, uint32_t> tmpPixels(0);
+
+ if (kUnpremul_PixelOpsFlag & flags) {
+ if (kRGBA_8888_GrPixelConfig != config && kBGRA_8888_GrPixelConfig != config) {
+ return;
+ }
+ stage.reset(this->createUPMToPMEffect(texture, swapRAndB));
+ if (NULL == stage) {
+ SkCanvas::Config8888 srcConfig8888, dstConfig8888;
+ GR_DEBUGCODE(bool success = )
+ grconfig_to_config8888(config, true, &srcConfig8888);
+ GrAssert(success);
+ GR_DEBUGCODE(success = )
+ grconfig_to_config8888(config, false, &dstConfig8888);
+ GrAssert(success);
+ const uint32_t* src = reinterpret_cast<const uint32_t*>(buffer);
+ tmpPixels.reset(width * height);
+ SkConvertConfig8888Pixels(tmpPixels.get(), 4 * width, dstConfig8888,
+ src, rowBytes, srcConfig8888,
+ width, height);
+ buffer = tmpPixels.get();
+ rowBytes = 4 * width;
+ }
+ }
+ if (NULL == stage) {
+ stage.reset(GrConfigConversionEffect::Create(texture, swapRAndB));
+ GrAssert(NULL != stage);
+ }
+
+ this->writeTexturePixels(texture,
+ 0, 0, width, height,
+ textureConfig, buffer, rowBytes,
+ flags & ~kUnpremul_PixelOpsFlag);
GrDrawTarget::AutoStateRestore asr(fGpu, GrDrawTarget::kReset_ASRInit);
GrDrawState* drawState = fGpu->drawState();
@@ -1486,20 +1569,9 @@
matrix.setIDiv(texture->width(), texture->height());
drawState->sampler(0)->reset(matrix);
- drawState->createTextureEffect(0, texture);
- drawState->sampler(0)->setRAndBSwap(swapRAndB);
- drawState->sampler(0)->setPremultiply(SkToBool(kUnpremul_PixelOpsFlag & flags));
+ drawState->sampler(0)->setCustomStage(stage);
- static const GrVertexLayout layout = 0;
- static const int VCOUNT = 4;
- // TODO: Use GrGpu::drawRect here
- GrDrawTarget::AutoReleaseGeometry geo(fGpu, layout, VCOUNT, 0);
- if (!geo.succeeded()) {
- GrPrintf("Failed to get space for vertices!\n");
- return;
- }
- ((GrPoint*)geo.vertices())->setIRectFan(0, 0, width, height);
- fGpu->drawNonIndexed(kTriangleFan_GrPrimitiveType, 0, VCOUNT);
+ fGpu->drawSimpleRect(GrRect::MakeWH(SkIntToScalar(width), SkIntToScalar(height)), NULL);
}
////////////////////////////////////////////////////////////////////////////////
@@ -1673,6 +1745,8 @@
fAARectRenderer = SkNEW(GrAARectRenderer);
+ fDidTestPMConversions = false;
+
this->setupDrawBuffer();
}
@@ -1710,6 +1784,42 @@
return fGpu->getQuadIndexBuffer();
}
+namespace {
+void test_pm_conversions(GrContext* ctx, int* pmToUPMValue, int* upmToPMValue) {
+ GrConfigConversionEffect::PMConversion pmToUPM;
+ GrConfigConversionEffect::PMConversion upmToPM;
+ GrConfigConversionEffect::TestForPreservingPMConversions(ctx, &pmToUPM, &upmToPM);
+ *pmToUPMValue = pmToUPM;
+ *upmToPMValue = upmToPM;
+}
+}
+
+GrCustomStage* GrContext::createPMToUPMEffect(GrTexture* texture, bool swapRAndB) {
+ if (!fDidTestPMConversions) {
+ test_pm_conversions(this, &fPMToUPMConversion, &fUPMToPMConversion);
+ }
+ GrConfigConversionEffect::PMConversion pmToUPM =
+ static_cast<GrConfigConversionEffect::PMConversion>(fPMToUPMConversion);
+ if (GrConfigConversionEffect::kNone_PMConversion != pmToUPM) {
+ return GrConfigConversionEffect::Create(texture, swapRAndB, pmToUPM);
+ } else {
+ return NULL;
+ }
+}
+
+GrCustomStage* GrContext::createUPMToPMEffect(GrTexture* texture, bool swapRAndB) {
+ if (!fDidTestPMConversions) {
+ test_pm_conversions(this, &fPMToUPMConversion, &fUPMToPMConversion);
+ }
+ GrConfigConversionEffect::PMConversion upmToPM =
+ static_cast<GrConfigConversionEffect::PMConversion>(fUPMToPMConversion);
+ if (GrConfigConversionEffect::kNone_PMConversion != upmToPM) {
+ return GrConfigConversionEffect::Create(texture, swapRAndB, upmToPM);
+ } else {
+ return NULL;
+ }
+}
+
GrTexture* GrContext::gaussianBlur(GrTexture* srcTexture,
bool canClobberSrc,
const SkRect& rect,