[GPU] Add explicit byte order and PM vs. UPM 8888 configs

Review URL: http://codereview.appspot.com/5347042/



git-svn-id: http://skia.googlecode.com/svn/trunk@2618 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/bench/benchmain.cpp b/bench/benchmain.cpp
index b809c99..229b80e 100644
--- a/bench/benchmain.cpp
+++ b/bench/benchmain.cpp
@@ -465,7 +465,7 @@
         context = GrContext::Create(kOpenGL_Shaders_GrEngine, ctx);
         if (NULL != context) {
             GrPlatformRenderTargetDesc desc;
-            desc.fConfig = kRGBA_8888_GrPixelConfig;
+            desc.fConfig = kSkia8888_PM_GrPixelConfig;
             desc.fWidth = contextWidth;
             desc.fHeight = contextHeight;
             desc.fStencilBits = 8;
diff --git a/experimental/iOSSampleApp/SkSampleUIView.mm b/experimental/iOSSampleApp/SkSampleUIView.mm
index e06cceb..0e8a674 100644
--- a/experimental/iOSSampleApp/SkSampleUIView.mm
+++ b/experimental/iOSSampleApp/SkSampleUIView.mm
@@ -90,7 +90,7 @@
             desc.fSurfaceType = kRenderTarget_GrPlatformSurfaceType;
             desc.fWidth = SkScalarRound(win->width());
             desc.fHeight = SkScalarRound(win->height());
-            desc.fConfig = kRGBA_8888_GrPixelConfig;
+            desc.fConfig = kSkia8888_PM_GrPixelConfig;
             const GrGLInterface* gl = GrGLGetDefaultGLInterface();
             GrAssert(NULL != gl);
             GR_GL_GetIntegerv(gl, GR_GL_STENCIL_BITS, &desc.fStencilBits);
diff --git a/gm/gmmain.cpp b/gm/gmmain.cpp
index f437906..99b1584 100644
--- a/gm/gmmain.cpp
+++ b/gm/gmmain.cpp
@@ -648,7 +648,7 @@
         gGrContext = GrContext::Create(kOpenGL_Shaders_GrEngine, ctx);
         if (NULL != gGrContext) {
             GrPlatformRenderTargetDesc desc;
-            desc.fConfig = kRGBA_8888_GrPixelConfig;
+            desc.fConfig = kSkia8888_PM_GrPixelConfig;
             desc.fWidth = maxW;
             desc.fHeight = maxH;
             desc.fStencilBits = 8;
diff --git a/gm/texdata.cpp b/gm/texdata.cpp
index e3547af..a55bdeb 100644
--- a/gm/texdata.cpp
+++ b/gm/texdata.cpp
@@ -79,7 +79,7 @@
                 // use RT flag bit because in GL it makes the texture be bottom-up
                 desc.fFlags     = i ? kRenderTarget_GrTextureFlagBit :
                                       kNone_GrTextureFlags;
-                desc.fFormat    = kRGBA_8888_GrPixelConfig;
+                desc.fFormat    = kSkia8888_PM_GrPixelConfig;
                 desc.fWidth     = 2 * S;
                 desc.fHeight    = 2 * S;
                 GrTexture* texture = 
diff --git a/include/gpu/GrColor.h b/include/gpu/GrColor.h
index 5c9e408..ed666de 100644
--- a/include/gpu/GrColor.h
+++ b/include/gpu/GrColor.h
@@ -19,15 +19,12 @@
  */
 typedef uint32_t GrColor;
 
-// indices for address a GrColor as an array of bytes
 
-#define GrColor_INDEX_R     0
-#define GrColor_INDEX_G     1
-#define GrColor_INDEX_B     2
-#define GrColor_INDEX_A     3
-
-// shfit amount to assign a component to a GrColor int
-
+// shift amount to assign a component to a GrColor int
+// These shift values are chosen for compatibility with GL attrib arrays
+// ES doesn't allow BGRA vertex attrib order so if they were not in this order
+// we'd have to swizzle in shaders. Note the assumption that the cpu is little
+// endian.
 #define GrColor_SHIFT_R     0
 #define GrColor_SHIFT_G     8
 #define GrColor_SHIFT_B     16
diff --git a/include/gpu/GrGLConfig.h b/include/gpu/GrGLConfig.h
index 44a3b86..45b551c 100644
--- a/include/gpu/GrGLConfig.h
+++ b/include/gpu/GrGLConfig.h
@@ -150,16 +150,6 @@
     #error "unknown GR_TEXT_SCALAR type"
 #endif
 
-// Pick a pixel config for 32bit bitmaps. Our default is GL_RGBA (except on
-// Windows where we match GDI's order).
-#ifndef GR_GL_32BPP_COLOR_FORMAT
-    #if GR_WIN32_BUILD || GR_LINUX_BUILD
-        #define GR_GL_32BPP_COLOR_FORMAT    GR_GL_BGRA
-    #else
-        #define GR_GL_32BPP_COLOR_FORMAT    GR_GL_RGBA
-    #endif
-#endif
-
 ////////////////////////////////////////////////////////////////////////////////
 
 struct GrGLInterface;
diff --git a/include/gpu/GrGLConfig_chrome.h b/include/gpu/GrGLConfig_chrome.h
index c00f269..d21e2f4 100644
--- a/include/gpu/GrGLConfig_chrome.h
+++ b/include/gpu/GrGLConfig_chrome.h
@@ -8,9 +8,6 @@
 #ifndef GrGLConfig_chrome_DEFINED
 #define GrGLConfig_chrome_DEFINED
 
-// chrome always assumes BGRA
-#define GR_GL_32BPP_COLOR_FORMAT        GR_GL_BGRA
-
 // glGetError() forces a sync with gpu process on chrome
 #define GR_GL_CHECK_ERROR_START         0
 
diff --git a/include/gpu/GrTypes.h b/include/gpu/GrTypes.h
index cec98a3..36ad9e0 100644
--- a/include/gpu/GrTypes.h
+++ b/include/gpu/GrTypes.h
@@ -268,17 +268,57 @@
 
 /**
  * Pixel configurations.
+ * Unpremultiplied configs are intended for conversion out from skia. They are
+ * not supported as input (e.g. drawBitmap or a bitmap shader).
  */
 enum GrPixelConfig {
     kUnknown_GrPixelConfig,
     kAlpha_8_GrPixelConfig,
     kIndex_8_GrPixelConfig,
     kRGB_565_GrPixelConfig,
-    kRGBA_4444_GrPixelConfig, //!< premultiplied
-    kRGBA_8888_GrPixelConfig, //!< premultiplied
-    kRGBX_8888_GrPixelConfig, //!< treat the alpha channel as opaque
+    /**
+     * Premultiplied
+     */
+    kRGBA_4444_GrPixelConfig,
+    /**
+     * Premultiplied. Byte order is r,g,b,a
+     */
+    kRGBA_8888_PM_GrPixelConfig,
+    /**
+     * Unpremultiplied. Byte order is r,g,b,a
+     */
+    kRGBA_8888_UPM_GrPixelConfig,
+    /**
+     * Premultiplied. Byte order is b,g,r,a
+     */
+    kBGRA_8888_PM_GrPixelConfig,
+    /**
+     * Unpremultiplied. Byte order is b,g,r,a
+     */
+    kBGRA_8888_UPM_GrPixelConfig,
 };
 
+// Aliases for pixel configs that match skia's byte order
+#ifndef SK_CPU_LENDIAN
+    #error "Skia gpu currently assumes little endian"
+#endif
+#if 24 == SK_A32_SHIFT && 16 == SK_R32_SHIFT && \
+     8 == SK_G32_SHIFT &&  0 == SK_B32_SHIFT
+    static const GrPixelConfig kSkia8888_PM_GrPixelConfig = kBGRA_8888_PM_GrPixelConfig;
+    static const GrPixelConfig kSkia8888_UPM_GrPixelConfig = kBGRA_8888_UPM_GrPixelConfig;
+#elif 24 == SK_A32_SHIFT && 16 == SK_B32_SHIFT && \
+       8 == SK_G32_SHIFT &&  0 == SK_R32_SHIFT
+    static const GrPixelConfig kSkia8888_PM_GrPixelConfig = kRGBA_8888_PM_GrPixelConfig;
+    static const GrPixelConfig kSkia8888_UPM_GrPixelConfig = kRGBA_8888_UPM_GrPixelConfig;
+#else
+    #error "SK_*32_SHIFT values must correspond to GL_BGRA or GL_RGBA format."
+#endif
+
+// WebKit is relying on this old name for the native skia PM config. This will
+// be deleted ASAP because it is so similar to kRGBA_PM_8888_GrPixelConfig but
+// has a different interpretation when skia is compiled BGRA.
+static const GrPixelConfig kRGBA_8888_GrPixelConfig = kSkia8888_PM_GrPixelConfig;
+
 static inline size_t GrBytesPerPixel(GrPixelConfig config) {
     switch (config) {
         case kAlpha_8_GrPixelConfig:
@@ -287,8 +327,10 @@
         case kRGB_565_GrPixelConfig:
         case kRGBA_4444_GrPixelConfig:
             return 2;
-        case kRGBA_8888_GrPixelConfig:
-        case kRGBX_8888_GrPixelConfig:
+        case kRGBA_8888_PM_GrPixelConfig:
+        case kRGBA_8888_UPM_GrPixelConfig:
+        case kBGRA_8888_PM_GrPixelConfig:
+        case kBGRA_8888_UPM_GrPixelConfig:
             return 4;
         default:
             return 0;
@@ -298,7 +340,20 @@
 static inline bool GrPixelConfigIsOpaque(GrPixelConfig config) {
     switch (config) {
         case kRGB_565_GrPixelConfig:
-        case kRGBX_8888_GrPixelConfig:
+            return true;
+        default:
+            return false;
+    }
+}
+
+/**
+ * Premultiplied alpha is the usual for skia. Therefore, configs that are
+ * ambiguous (alpha-only or color-only) are considered premultiplied.
+ */
+static inline bool GrPixelConfigIsUnpremultiplied(GrPixelConfig config) {
+    switch (config) {
+        case kRGBA_8888_UPM_GrPixelConfig:
+        case kBGRA_8888_UPM_GrPixelConfig:
             return true;
         default:
             return false;
@@ -710,7 +765,7 @@
  * renderTargetTextureDesc.fRenderTargetFlags = kGrCanResolve_GrPlatformRenderTargetFlagBit;
  * renderTargetTextureDesc.fWidth = W;
  * renderTargetTextureDesc.fHeight = H;
- * renderTargetTextureDesc.fConfig = kRGBA_8888_GrPixelConfig
+ * renderTargetTextureDesc.fConfig = kSkia8888_PM_GrPixelConfig
  * renderTargetTextureDesc.fStencilBits = 8;
  * renderTargetTextureDesc.fSampleCnt = S;
  * renderTargetTextureDesc.fPlatformTexture = textureID;
diff --git a/samplecode/SampleApp.cpp b/samplecode/SampleApp.cpp
index e95c10a..2578696 100644
--- a/samplecode/SampleApp.cpp
+++ b/samplecode/SampleApp.cpp
@@ -171,7 +171,8 @@
                 fGrContext->setRenderTarget(fGrRenderTarget);
                 const SkBitmap& bm = win->getBitmap();
                 fGrContext->writePixels(0, 0, bm.width(), bm.height(),
-                                        kRGBA_8888_GrPixelConfig, bm.getPixels(),
+                                        kSkia8888_PM_GrPixelConfig,
+                                        bm.getPixels(),
                                         bm.rowBytes());
             }
         }
@@ -185,7 +186,7 @@
             GrPlatformRenderTargetDesc desc;
             desc.fWidth = SkScalarRound(win->width());
             desc.fHeight = SkScalarRound(win->height());
-            desc.fConfig = kRGBA_8888_GrPixelConfig;
+            desc.fConfig = kSkia8888_PM_GrPixelConfig;
             GR_GL_GetIntegerv(fGL, GR_GL_SAMPLES, &desc.fSampleCnt);
             GR_GL_GetIntegerv(fGL, GR_GL_STENCIL_BITS, &desc.fStencilBits);
             GrGLint buffer;
@@ -199,7 +200,7 @@
             GrPlatformRenderTargetDesc desc;
             desc.fWidth = SkScalarRound(win->width());
             desc.fHeight = SkScalarRound(win->height());
-            desc.fConfig = kRGBA_8888_GrPixelConfig;
+            desc.fConfig = kSkia8888_PM_GrPixelConfig;
             desc.fStencilBits = 8;
             desc.fSampleCnt = 0;
             desc.fRenderTargetHandle = 0;
diff --git a/src/gpu/GrAtlas.cpp b/src/gpu/GrAtlas.cpp
index 0838895..244f694 100644
--- a/src/gpu/GrAtlas.cpp
+++ b/src/gpu/GrAtlas.cpp
@@ -141,7 +141,7 @@
         case kA565_GrMaskFormat:
             return kRGB_565_GrPixelConfig;
         case kA888_GrMaskFormat:
-            return kRGBA_8888_GrPixelConfig;
+            return kSkia8888_PM_GrPixelConfig;
         default:
             GrAssert(!"unknown maskformat");
     }
diff --git a/src/gpu/GrContext.cpp b/src/gpu/GrContext.cpp
index f4e5e9b..8c06c21 100644
--- a/src/gpu/GrContext.cpp
+++ b/src/gpu/GrContext.cpp
@@ -718,7 +718,7 @@
                       kNoStencil_GrTextureFlagBit;
     }
 
-    desc.fConfig = kRGBA_8888_GrPixelConfig;
+    desc.fConfig = kRGBA_8888_PM_GrPixelConfig;
 
     if (PREFER_MSAA_OFFSCREEN_AA && fGpu->getCaps().fFSAASupport) {
         record->fDownsample = OffscreenRecord::kFSAA_Downsample;
@@ -1610,9 +1610,8 @@
     if (kDiscard_FlushBit & flagsBitfield) {
         fDrawBuffer->reset();
     } else {
-        flushDrawBuffer();
+        this->flushDrawBuffer();
     }
-
     if (kForceCurrentRenderTarget_FlushBit & flagsBitfield) {
         fGpu->forceRenderTargetFlush();
     }
@@ -1643,9 +1642,9 @@
     this->flush();
     GrRenderTarget* target = texture->asRenderTarget();
     if (NULL != target) {
-        return fGpu->readPixels(target,
-                                left, top, width, height, 
-                                config, buffer, 0);
+        return this->readRenderTargetPixels(target,
+                                            left, top, width, height, 
+                                            config, buffer, 0);
     } else {
         return false;
     }
@@ -1656,15 +1655,87 @@
                                        GrPixelConfig config, void* buffer,
                                        size_t rowBytes) {
     SK_TRACE_EVENT0("GrContext::readRenderTargetPixels");
-    uint32_t flushFlags = 0;
     if (NULL == target) { 
-        flushFlags |= GrContext::kForceCurrentRenderTarget_FlushBit;
+        target = fGpu->getRenderTarget();
+        if (NULL == target) {
+            return false;
+        }
+    }
+    
+    // PM <-> UPM conversion requires a draw. Currently we only support drawing
+    // into a UPM target, not reading from a UPM texture. Thus, UPM->PM is not
+    // not supported at this time.
+    if (GrPixelConfigIsUnpremultiplied(target->config()) && 
+        !GrPixelConfigIsUnpremultiplied(config)) {
+        return false;
     }
 
-    this->flush(flushFlags);
+    this->flush();
+
+    GrTexture* src = target->asTexture();
+
+    bool flipY = NULL != src &&
+                 fGpu->readPixelsWillPayForYFlip(target, left, top,
+                                                 width, height, config,
+                                                 rowBytes);
+
+    if (flipY || (!GrPixelConfigIsUnpremultiplied(target->config()) &&
+                  GrPixelConfigIsUnpremultiplied(config))) {
+        if (!src) {
+            // we should fallback to cpu conversion here. This could happen when
+            // we were given an external render target by the client that is not
+            // also a texture (e.g. FBO 0 in GL)
+            return false;
+        }
+        // Make the scratch a render target because we don't have a robust
+        // readTexturePixels as of yet (it calls this function).
+        const GrTextureDesc desc = {
+            kRenderTarget_GrTextureFlagBit,
+            kNone_GrAALevel,
+            width, height,
+            config
+        };
+        GrAutoScratchTexture ast(this, desc);
+        GrTexture* texture = ast.texture();
+        if (!texture) {
+            return false;
+        }
+        target = texture->asRenderTarget();
+        fGpu->setRenderTarget(target);
+        GrAssert(NULL != target);
+
+        GrDrawTarget::AutoStateRestore asr(fGpu);
+
+        fGpu->setViewMatrix(GrMatrix::I());
+        fGpu->setColorFilter(0, SkXfermode::kDst_Mode);
+        fGpu->disableState(GrDrawTarget::kClip_StateBit);
+        fGpu->setAlpha(0xFF);
+        fGpu->setBlendFunc(kOne_BlendCoeff,
+                           kZero_BlendCoeff);
+
+        GrSamplerState sampler;
+        sampler.setClampNoFilter();
+        GrMatrix matrix;
+        if (flipY) {
+            matrix.setTranslate(SK_Scalar1 * left,
+                                SK_Scalar1 * (top + height));
+            matrix.set(GrMatrix::kMScaleY, -GR_Scalar1);
+        } else {
+            matrix.setTranslate(SK_Scalar1 *left, SK_Scalar1 *top);
+        }
+        matrix.postIDiv(src->width(), src->height());
+        sampler.setMatrix(matrix);
+        fGpu->setSamplerState(0, sampler);
+        fGpu->setTexture(0, src);
+        GrRect rect;
+        rect.setXYWH(0, 0, SK_Scalar1 * width, SK_Scalar1 * height);
+        fGpu->drawSimpleRect(rect, NULL, 0x1);
+        left = 0;
+        top = 0;
+    }
     return fGpu->readPixels(target,
-                            left, top, width, height, 
-                            config, buffer, rowBytes);
+                            left, top, width, height,
+                            config, buffer, rowBytes, flipY);
 }
 
 void GrContext::writePixels(int left, int top, int width, int height,
@@ -1675,7 +1746,7 @@
     // TODO: when underlying api has a direct way to do this we should use it
     // (e.g. glDrawPixels on desktop GL).
 
-    this->flush(true);
+    this->flush(kForceCurrentRenderTarget_FlushBit);
 
     const GrTextureDesc desc = {
         kNone_GrTextureFlags, kNone_GrAALevel, width, height, config
@@ -1949,3 +2020,5 @@
     fGpu->setBlendFunc(kOne_BlendCoeff, kZero_BlendCoeff);
     fGpu->drawSimpleRect(rect, NULL, 1 << 0);
 }
+
+///////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/GrDrawTarget.cpp b/src/gpu/GrDrawTarget.cpp
index 8f4f3eb..6b51ca2 100644
--- a/src/gpu/GrDrawTarget.cpp
+++ b/src/gpu/GrDrawTarget.cpp
@@ -800,6 +800,12 @@
         }
     }
 #endif
+    for (int s = 0; s < GrDrawState::kNumStages; ++s) {
+        if (this->isStageEnabled(s) &&
+            GrPixelConfigIsUnpremultiplied(fCurrDrawState.fTextures[s]->config())) {
+            return false;
+        }
+    }
     return true;
 }
 
@@ -814,7 +820,6 @@
     }
 }
 
-
 void GrDrawTarget::drawNonIndexed(GrPrimitiveType type,
                                   int startVertex,
                                   int vertexCount) {
diff --git a/src/gpu/GrGLProgram.cpp b/src/gpu/GrGLProgram.cpp
index 554a274..89ced68 100644
--- a/src/gpu/GrGLProgram.cpp
+++ b/src/gpu/GrGLProgram.cpp
@@ -892,6 +892,13 @@
                         inColor.c_str(),
                         inCoverage.c_str(),
                         &segments.fFSCode);
+        if (ProgramDesc::kNo_OutputPM == fProgramDesc.fOutputPM) {
+            segments.fFSCode.appendf("\t%s = vec4(%s.rgb / %s.a, %s.a);\n",
+                                     fsColorOutput,
+                                     fsColorOutput,
+                                     fsColorOutput,
+                                     fsColorOutput);
+        }
     }
 
     segments.fVSCode.append("}\n");
diff --git a/src/gpu/GrGLProgram.h b/src/gpu/GrGLProgram.h
index 53bc0d8..9abf1b1 100644
--- a/src/gpu/GrGLProgram.h
+++ b/src/gpu/GrGLProgram.h
@@ -102,6 +102,15 @@
             memset(this, 0, sizeof(ProgramDesc));
         }
 
+        enum OutputPM {
+            // PM-color OR color with no alpha channel
+            kYes_OutputPM,
+            // nonPM-color with alpha channel
+            kNo_OutputPM,
+
+            kOutputPMCnt
+        };
+
         struct StageDesc {
             enum OptFlagBits {
                 kNoPerspective_OptFlagBit       = 1 << 0,
@@ -195,6 +204,7 @@
 #endif
 
         uint8_t fColorInput;        // casts to enum ColorInput
+        uint8_t fOutputPM;          // cases to enum OutputPM
         uint8_t fDualSrcOutput;     // casts to enum DualSrcOutput
         int8_t fFirstCoverageStage;
         SkBool8 fEmitsPointSize;
@@ -203,8 +213,6 @@
         int8_t fEdgeAANumEdges;
         uint8_t fColorFilterXfermode;  // casts to enum SkXfermode::Mode
 
-        uint8_t fPadTo32bLengthMultiple [1];
-
     } fProgramDesc;
     GR_STATIC_ASSERT(!(sizeof(ProgramDesc) % 4));
 
diff --git a/src/gpu/GrGpu.cpp b/src/gpu/GrGpu.cpp
index b7d1395..7da560e 100644
--- a/src/gpu/GrGpu.cpp
+++ b/src/gpu/GrGpu.cpp
@@ -242,11 +242,12 @@
 bool GrGpu::readPixels(GrRenderTarget* target,
                        int left, int top, int width, int height,
                        GrPixelConfig config, void* buffer,
-                       size_t rowBytes) {
-
+                       size_t rowBytes, bool invertY) {
+    GrAssert(GrPixelConfigIsUnpremultiplied(config) ==
+             GrPixelConfigIsUnpremultiplied(target->config()));
     this->handleDirtyContext();
     return this->onReadPixels(target, left, top, width, height,
-                              config, buffer, rowBytes);
+                              config, buffer, rowBytes, invertY);
 }
 
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/GrGpu.h b/src/gpu/GrGpu.h
index b0aaa18..11edfe3 100644
--- a/src/gpu/GrGpu.h
+++ b/src/gpu/GrGpu.h
@@ -184,7 +184,34 @@
     void forceRenderTargetFlush();
 
     /**
-     * Reads a rectangle of pixels from a render target.
+     * OpenGL's readPixels returns the result bottom-to-top while the skia
+     * API is top-to-bottom. Thus we have to do a y-axis flip. The obvious
+     * solution is to have the subclass do the flip using either the CPU or GPU.
+     * However, the caller (GrContext) may have transformations to apply and can
+     * simply fold in the y-flip for free. On the other hand, the subclass may
+     * be able to do it for free itself. For example, the subclass may have to
+     * do memcpys to handle rowBytes that aren't tight. It could do the y-flip 
+     * concurrently.
+     *
+     * This function returns true if a y-flip is required to put the pixels in
+     * top-to-bottom order and the subclass cannot do it for free.
+     *
+     * See read pixels for the params
+     * @return true if calling readPixels with the same set of params will
+     *              produce bottom-to-top data
+     */
+     virtual bool readPixelsWillPayForYFlip(GrRenderTarget* renderTarget,
+                                            int left, int top,
+                                            int width, int height,
+                                            GrPixelConfig config,
+                                            size_t rowBytes) = 0;
+
+    /**
+     * Reads a rectangle of pixels from a render target. Fails if read requires
+     * conversion between premultiplied and unpremultiplied configs. The caller
+     * should do the conversion by rendering to a target with the desire config
+     * first.
+     *
      * @param renderTarget  the render target to read from. NULL means the
      *                      current render target.
      * @param left          left edge of the rectangle to read (inclusive)
@@ -195,6 +222,8 @@
      * @param buffer        memory to read the rectangle into.
      * @param rowBytes      the number of bytes between consecutive rows. Zero
      *                      means rows are tightly packed.
+     * @param invertY       buffer should be populated bottom-to-top as opposed
+     *                      to top-to-bottom (skia's usual order)
      *
      * @return true if the read succeeded, false if not. The read can fail
      *              because of a unsupported pixel config or because no render
@@ -202,7 +231,8 @@
      */
     bool readPixels(GrRenderTarget* renderTarget,
                     int left, int top, int width, int height,
-                    GrPixelConfig config, void* buffer, size_t rowBytes);
+                    GrPixelConfig config, void* buffer, size_t rowBytes,
+                    bool invertY);
 
     const GrGpuStats& getStats() const;
     void resetStats();
@@ -356,7 +386,10 @@
     // overridden by API-specific derived class to perform the read pixels.
     virtual bool onReadPixels(GrRenderTarget* target,
                               int left, int top, int width, int height,
-                              GrPixelConfig, void* buffer, size_t rowBytes) = 0;
+                              GrPixelConfig,
+                              void* buffer,
+                              size_t rowBytes,
+                              bool invertY) = 0;
 
     // called to program the vertex data, indexCount will be 0 if drawing non-
     // indexed geometry. The subclass may adjust the startVertex and/or
diff --git a/src/gpu/GrGpuGL.cpp b/src/gpu/GrGpuGL.cpp
index a3562b2..016b110 100644
--- a/src/gpu/GrGpuGL.cpp
+++ b/src/gpu/GrGpuGL.cpp
@@ -384,14 +384,24 @@
     if (kDesktop_GrGLBinding == this->glBinding()) {
         fGLCaps.fRGBA8Renderbuffer = true;
     } else {
-        fGLCaps.fRGBA8Renderbuffer = this->hasExtension("GL_OES_rgb8_rgba8");
+        fGLCaps.fRGBA8Renderbuffer = this->hasExtension("GL_OES_rgb8_rgba8") ||
+                                     this->hasExtension("GL_ARM_rgba8");
     }
 
 
-    if (kDesktop_GrGLBinding != this->glBinding()) {
-        if (GR_GL_32BPP_COLOR_FORMAT == GR_GL_BGRA) {
-            GrAssert(this->hasExtension("GL_EXT_texture_format_BGRA8888"));
+    if (kDesktop_GrGLBinding == this->glBinding()) {
+        fGLCaps.fBGRAFormat = this->glVersion() >= GR_GL_VER(1,2) ||
+                              this->hasExtension("GL_EXT_bgra");
+    } else {
+        bool hasBGRAExt = false;
+        if (this->hasExtension("GL_APPLE_texture_format_BGRA8888")) {
+            fGLCaps.fBGRAFormat = true;
+        } else if (this->hasExtension("GL_EXT_texture_format_BGRA8888")) {
+            fGLCaps.fBGRAFormat = true;
+            fGLCaps.fBGRAInternalFormat = true;
         }
+        GrAssert(fGLCaps.fBGRAFormat ||
+                 kSkia8888_PM_GrPixelConfig != kBGRA_8888_PM_GrPixelConfig);
     }
 
     if (kDesktop_GrGLBinding == this->glBinding()) {
@@ -1497,18 +1507,35 @@
     this->flushRenderTarget(&GrIRect::EmptyIRect());
 }
 
+bool GrGpuGL::readPixelsWillPayForYFlip(GrRenderTarget* renderTarget,
+                                        int left, int top,
+                                        int width, int height,
+                                        GrPixelConfig config,
+                                        size_t rowBytes) {
+    if (kDesktop_GrGLBinding == this->glBinding()) {
+        return false;
+    } else {
+        // On ES we'll have to do memcpys to handle rowByte padding. So, we
+        // might as well flipY while we're at it.
+        return 0 != rowBytes &&
+               GrBytesPerPixel(config) * width != rowBytes; 
+    }
+}
+
 bool GrGpuGL::onReadPixels(GrRenderTarget* target,
                            int left, int top,
                            int width, int height,
-                           GrPixelConfig config, 
-                           void* buffer, size_t rowBytes) {
+                           GrPixelConfig config,
+                           void* buffer,
+                           size_t rowBytes,
+                           bool invertY) {
     GrGLenum internalFormat;  // we don't use this for glReadPixels
     GrGLenum format;
     GrGLenum type;
     if (!this->canBeTexture(config, &internalFormat, &format, &type)) {
         return false;
     }
-    
+
     // resolve the render target if necessary
     GrGLRenderTarget* tgt = static_cast<GrGLRenderTarget*>(target);
     GrAutoTPtrValueRestore<GrRenderTarget*> autoTargetRestore;
@@ -1568,29 +1595,38 @@
     // that the above readPixels did not overwrite the padding.
     if (readDst == buffer) {
         GrAssert(rowBytes == readDstRowBytes);
-        scratch.reset(tightRowBytes);
-        void* tmpRow = scratch.get();
-        // flip y in-place by rows
-        const int halfY = height >> 1;
-        char* top = reinterpret_cast<char*>(buffer);
-        char* bottom = top + (height - 1) * rowBytes;
-        for (int y = 0; y < halfY; y++) {
-            memcpy(tmpRow, top, tightRowBytes);
-            memcpy(top, bottom, tightRowBytes);
-            memcpy(bottom, tmpRow, tightRowBytes);
-            top += rowBytes;
-            bottom -= rowBytes;
+        if (!invertY) {
+            scratch.reset(tightRowBytes);
+            void* tmpRow = scratch.get();
+            // flip y in-place by rows
+            const int halfY = height >> 1;
+            char* top = reinterpret_cast<char*>(buffer);
+            char* bottom = top + (height - 1) * rowBytes;
+            for (int y = 0; y < halfY; y++) {
+                memcpy(tmpRow, top, tightRowBytes);
+                memcpy(top, bottom, tightRowBytes);
+                memcpy(bottom, tmpRow, tightRowBytes);
+                top += rowBytes;
+                bottom -= rowBytes;
+            }
         }
     } else {
-        GrAssert(readDst != buffer);
+        GrAssert(readDst != buffer);        GrAssert(rowBytes != tightRowBytes);
         // copy from readDst to buffer while flipping y
         const int halfY = height >> 1;
         const char* src = reinterpret_cast<const char*>(readDst);
-        char* dst = reinterpret_cast<char*>(buffer) + (height-1) * rowBytes;
+        char* dst = reinterpret_cast<char*>(buffer);
+        if (!invertY) {
+            dst += (height-1) * rowBytes;
+        }
         for (int y = 0; y < height; y++) {
             memcpy(dst, src, tightRowBytes);
             src += readDstRowBytes;
-            dst -= rowBytes;
+            if (invertY) {
+                dst += rowBytes;
+            } else {
+                dst -= rowBytes;
+            }
         }
     }
     return true;
@@ -2249,13 +2285,20 @@
                            GrGLenum* format,
                            GrGLenum* type) {
     switch (config) {
-        case kRGBA_8888_GrPixelConfig:
-        case kRGBX_8888_GrPixelConfig: // todo: can we tell it our X?
-            *format = GR_GL_32BPP_COLOR_FORMAT;
-            if (kDesktop_GrGLBinding != this->glBinding()) {
-                // according to GL_EXT_texture_format_BGRA8888 the *internal*
-                // format for a BGRA is BGRA not RGBA (as on desktop)
-                *internalFormat = GR_GL_32BPP_COLOR_FORMAT;
+        case kRGBA_8888_PM_GrPixelConfig:
+        case kRGBA_8888_UPM_GrPixelConfig:
+            *format = GR_GL_RGBA;
+            *internalFormat = GR_GL_RGBA;
+            *type = GR_GL_UNSIGNED_BYTE;
+            break;
+        case kBGRA_8888_PM_GrPixelConfig:
+        case kBGRA_8888_UPM_GrPixelConfig:
+            if (!fGLCaps.fBGRAFormat) {
+                return false;
+            }
+            *format = GR_GL_BGRA;
+            if (fGLCaps.fBGRAInternalFormat) {
+                *internalFormat = GR_GL_BGRA;
             } else {
                 *internalFormat = GR_GL_RGBA;
             }
@@ -2314,9 +2357,22 @@
  */
 bool GrGpuGL::fboInternalFormat(GrPixelConfig config, GrGLenum* format) {
     switch (config) {
-        case kRGBA_8888_GrPixelConfig:
-        case kRGBX_8888_GrPixelConfig:
+        // The ES story for BGRA and RenderbufferStorage appears murky. It
+        // takes an internal format as a parameter. The OES FBO extension and
+        // 2.0 spec don't refer to BGRA as it's not part of the core. One ES
+        // BGRA extensions adds BGRA as both an internal and external format
+        // and the other only as an external format (like desktop GL). OES
+        // restricts RenderbufferStorage's format to a *sized* internal format.
+        // There is no sized BGRA internal format.
+        // So if the texture has internal format BGRA we just hope that the
+        // resolve blit can do RGBA->BGRA internal format conversion.
+        case kRGBA_8888_PM_GrPixelConfig:
+        case kRGBA_8888_UPM_GrPixelConfig:
+        case kBGRA_8888_PM_GrPixelConfig:
+        case kBGRA_8888_UPM_GrPixelConfig:
             if (fGLCaps.fRGBA8Renderbuffer) {
+                // The GL_OES_rgba8_rgb8 extension defines GL_RGBA8 as a sized
+                // internal format.
                 *format = GR_GL_RGBA8;
                 return true;
             } else {
@@ -2326,9 +2382,12 @@
             // ES2 supports 565. ES1 supports it
             // with FBO extension desktop GL has
             // no such internal format
-            GrAssert(kDesktop_GrGLBinding != this->glBinding());  
-            *format = GR_GL_RGB565;
-            return true;
+            if (kDesktop_GrGLBinding != this->glBinding()) {
+                *format = GR_GL_RGB565;
+                return true;
+            } else {
+                return false;
+            }
         case kRGBA_4444_GrPixelConfig:
             *format = GR_GL_RGBA4;
             return true;
@@ -2435,6 +2494,8 @@
     GrPrintf("Max FS Uniform Vectors: %d\n", fMaxFragmentUniformVectors);
     GrPrintf("Support RGBA8 Render Buffer: %s\n",
              (fRGBA8Renderbuffer ? "YES": "NO"));
+    GrPrintf("BGRA is an internal format: %s\n",
+             (fBGRAInternalFormat ? "YES": "NO"));
     GrPrintf("Support texture swizzle: %s\n",
              (fTextureSwizzle ? "YES": "NO"));
 }
diff --git a/src/gpu/GrGpuGL.h b/src/gpu/GrGpuGL.h
index 3f10967..1dd9c80 100644
--- a/src/gpu/GrGpuGL.h
+++ b/src/gpu/GrGpuGL.h
@@ -22,7 +22,6 @@
 #include "SkString.h"
 
 class GrGpuGL : public GrGpu {
-
 public:
     virtual ~GrGpuGL();
 
@@ -30,22 +29,30 @@
     GrGLBinding glBinding() const { return fGLBinding; }
     GrGLVersion glVersion() const { return fGLVersion; }
 
+    virtual bool readPixelsWillPayForYFlip(
+                                    GrRenderTarget* renderTarget,
+                                    int left, int top,
+                                    int width, int height,
+                                    GrPixelConfig config,
+                                    size_t rowBytes) SK_OVERRIDE;
+
 protected:
     GrGpuGL(const GrGLInterface* glInterface, GrGLBinding glBinding);
 
     struct GLCaps {
         GLCaps()
             // make defaults be the most restrictive
-            : fMSFBOType(kNone_MSFBO)
+            : fStencilFormats(8) // prealloc space for    stencil formats
+            , fMSFBOType(kNone_MSFBO)
             , fMaxFragmentUniformVectors(0)
             , fRGBA8Renderbuffer(false)
             , fBGRAFormat(false)
-            , fStencilFormats(8) // prealloc space for    stencil formats
+            , fBGRAInternalFormat(false)
             , fTextureSwizzle(false) {
             memset(fAASamples, 0, sizeof(fAASamples));
         }
         SkTArray<GrGLStencilBuffer::Format, true> fStencilFormats;
-    
+
         enum {
             /**
              * no support for MSAA FBOs
@@ -64,19 +71,24 @@
              */
             kAppleES_MSFBO,
         } fMSFBOType;
-    
+
         // TODO: get rid of GrAALevel and use sample cnt directly
         GrGLuint fAASamples[4];
-    
+
         // The maximum number of fragment uniform vectors (GLES has min. 16).
         int fMaxFragmentUniformVectors;
-    
+
         // ES requires an extension to support RGBA8 in RenderBufferStorage
         bool fRGBA8Renderbuffer;
-    
+
         // Is GL_BGRA supported
         bool fBGRAFormat;
-    
+
+        // Depending on the ES extensions present the BGRA external format may
+        // correspond either a BGRA or RGBA internalFormat. On desktop GL it is
+        // RGBA
+        bool fBGRAInternalFormat;
+
         // GL_ARB_texture_swizzle support
         bool fTextureSwizzle;
     
@@ -148,7 +160,9 @@
                               int left, int top, 
                               int width, int height,
                               GrPixelConfig, 
-                              void* buffer, size_t rowBytes) SK_OVERRIDE;
+                              void* buffer,
+                              size_t rowBytes,
+                              bool invertY) SK_OVERRIDE;
 
     virtual void onGpuDrawIndexed(GrPrimitiveType type,
                                   uint32_t startVertex,
diff --git a/src/gpu/GrGpuGLShaders.cpp b/src/gpu/GrGpuGLShaders.cpp
index e3d2c2e..8e7c25f 100644
--- a/src/gpu/GrGpuGLShaders.cpp
+++ b/src/gpu/GrGpuGLShaders.cpp
@@ -221,6 +221,8 @@
         pdesc.fExperimentalGS = this->getCaps().fGeometryShaderSupport &&
                                 random.nextF() > .5f;
 #endif
+        pdesc.fOutputPM =  static_cast<int>(random.nextF() *
+                                            ProgramDesc::kOutputPMCnt);
 
         bool edgeAA = random.nextF() > .5f;
         if (edgeAA) {
@@ -1008,6 +1010,7 @@
             } else {
                 stage.fInputConfig = StageDesc::kColor_InputConfig;
             }
+
             if (sampler.getFilter() == GrSamplerState::kConvolution_Filter) {
                 stage.fKernelWidth = sampler.getKernelWidth();
             } else {
@@ -1022,6 +1025,12 @@
         }
     }
 
+    if (GrPixelConfigIsUnpremultiplied(fCurrDrawState.fRenderTarget->config())) {
+        desc.fOutputPM = ProgramDesc::kNo_OutputPM;
+    } else {
+        desc.fOutputPM = ProgramDesc::kYes_OutputPM;
+    }
+
     desc.fDualSrcOutput = ProgramDesc::kNone_DualSrcOutput;
 
     // currently the experimental GS will only work with triangle prims
diff --git a/src/gpu/SkGpuDevice.cpp b/src/gpu/SkGpuDevice.cpp
index 730f631..031333b 100644
--- a/src/gpu/SkGpuDevice.cpp
+++ b/src/gpu/SkGpuDevice.cpp
@@ -116,9 +116,10 @@
         case kRGBA_4444_GrPixelConfig:
             *isOpaque = false;
             return SkBitmap::kARGB_4444_Config;
-        case kRGBA_8888_GrPixelConfig:
-        case kRGBX_8888_GrPixelConfig:
-            *isOpaque = (kRGBX_8888_GrPixelConfig == config);
+        case kSkia8888_PM_GrPixelConfig:
+            // we don't currently have a way of knowing whether
+            // a 8888 is opaque based on the config.
+            *isOpaque = false;
             return SkBitmap::kARGB_8888_Config;
         default:
             *isOpaque = false;
@@ -256,23 +257,43 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 
+namespace {
+GrPixelConfig config8888_to_gr_config(SkCanvas::Config8888 config8888) {
+    switch (config8888) {
+        case SkCanvas::kNative_Premul_Config8888:
+            return kSkia8888_PM_GrPixelConfig;
+        case SkCanvas::kNative_Unpremul_Config8888:
+            return kSkia8888_UPM_GrPixelConfig;
+        case SkCanvas::kBGRA_Premul_Config8888:
+            return kBGRA_8888_PM_GrPixelConfig;
+        case SkCanvas::kBGRA_Unpremul_Config8888:
+            return kBGRA_8888_UPM_GrPixelConfig;
+        case SkCanvas::kRGBA_Premul_Config8888:
+            return kRGBA_8888_PM_GrPixelConfig;
+        case SkCanvas::kRGBA_Unpremul_Config8888:
+            return kRGBA_8888_UPM_GrPixelConfig;
+        default:
+            GrCrash("Unexpected Config8888.");
+            return kSkia8888_PM_GrPixelConfig;
+    }
+}
+}
+
 bool SkGpuDevice::onReadPixels(const SkBitmap& bitmap,
                                int x, int y,
                                SkCanvas::Config8888 config8888) {
-    // support for non-native configs coming soon
-    if (config8888 != SkCanvas::kNative_Premul_Config8888) {
-        return false;
-    }
     SkASSERT(SkBitmap::kARGB_8888_Config == bitmap.config());
     SkASSERT(!bitmap.isNull());
     SkASSERT(SkIRect::MakeWH(this->width(), this->height()).contains(SkIRect::MakeXYWH(x, y, bitmap.width(), bitmap.height())));
 
     SkAutoLockPixels alp(bitmap);
+    GrPixelConfig config;
+    config = config8888_to_gr_config(config8888);
     return fContext->readRenderTargetPixels(fRenderTarget,
                                             x, y,
                                             bitmap.width(),
                                             bitmap.height(),
-                                            kRGBA_8888_GrPixelConfig,
+                                            config,
                                             bitmap.getPixels(),
                                             bitmap.rowBytes());
 }
@@ -739,7 +760,7 @@
         srcRect.height(),
         // We actually only need A8, but it often isn't supported as a
         // render target
-        kRGBA_8888_GrPixelConfig
+        kRGBA_8888_PM_GrPixelConfig
     };
 
     GrAutoScratchTexture srcEntry(context, desc);
diff --git a/src/gpu/SkGr.cpp b/src/gpu/SkGr.cpp
index bbb55c8..6fe8292 100644
--- a/src/gpu/SkGr.cpp
+++ b/src/gpu/SkGr.cpp
@@ -207,11 +207,7 @@
         case SkBitmap::kARGB_4444_Config:
             return kRGBA_4444_GrPixelConfig;
         case SkBitmap::kARGB_8888_Config:
-            if (isOpaque) {
-                return kRGBX_8888_GrPixelConfig;
-            } else {
-                return kRGBA_8888_GrPixelConfig;
-            }
+            return kSkia8888_PM_GrPixelConfig;
         default:
             return kUnknown_GrPixelConfig;
     }
diff --git a/src/gpu/SkGrTexturePixelRef.cpp b/src/gpu/SkGrTexturePixelRef.cpp
index 40db1c5..140993a 100644
--- a/src/gpu/SkGrTexturePixelRef.cpp
+++ b/src/gpu/SkGrTexturePixelRef.cpp
@@ -78,7 +78,7 @@
         SkAutoLockPixels al(*dst);
         void* buffer = dst->getPixels();
         return fTexture->readPixels(left, top, width, height,
-                                    kRGBA_8888_GrPixelConfig,
+                                    kSkia8888_PM_GrPixelConfig,
                                     buffer);
     } else {
         return false;
@@ -122,7 +122,7 @@
         SkAutoLockPixels al(*dst);
         void* buffer = dst->getPixels();
         return fRenderTarget->readPixels(left, top, width, height,
-                                         kRGBA_8888_GrPixelConfig,
+                                         kSkia8888_PM_GrPixelConfig,
                                          buffer);
     } else {
         return false;
diff --git a/tests/ReadPixelsTest.cpp b/tests/ReadPixelsTest.cpp
index e2b21c4..5a6604a 100644
--- a/tests/ReadPixelsTest.cpp
+++ b/tests/ReadPixelsTest.cpp
@@ -27,7 +27,7 @@
     U8CPU b = 0xc;
 
     U8CPU a = 0xff;
-    switch (x % 5) {
+    switch ((x+y) % 5) {
         case 0:
             a = 0xff;
             break;
@@ -57,22 +57,23 @@
 }
 
 SkPMColor convertConfig8888ToPMColor(SkCanvas::Config8888 config8888,
-                                     uint32_t color) {
+                                     uint32_t color,
+                                     bool* premul) {
     const uint8_t* c = reinterpret_cast<uint8_t*>(&color);
     U8CPU a,r,g,b;
-    bool mul = false;
+    *premul = false;
     switch (config8888) {
         case SkCanvas::kNative_Premul_Config8888:
             return color;
         case SkCanvas::kNative_Unpremul_Config8888:
-            mul = true;
+            *premul = true;
             a = SkGetPackedA32(color);
             r = SkGetPackedR32(color);
             g = SkGetPackedG32(color);
             b = SkGetPackedB32(color);
             break;
         case SkCanvas::kBGRA_Unpremul_Config8888:
-            mul = true; // fallthru
+            *premul = true; // fallthru
         case SkCanvas::kBGRA_Premul_Config8888:
             a = static_cast<U8CPU>(c[3]);
             r = static_cast<U8CPU>(c[2]);
@@ -80,7 +81,7 @@
             b = static_cast<U8CPU>(c[0]);
             break;
         case SkCanvas::kRGBA_Unpremul_Config8888:
-            mul = true; // fallthru
+            *premul = true; // fallthru
         case SkCanvas::kRGBA_Premul_Config8888:
             a = static_cast<U8CPU>(c[3]);
             r = static_cast<U8CPU>(c[0]);
@@ -88,7 +89,7 @@
             b = static_cast<U8CPU>(c[2]);
             break;
     }
-    if (mul) {
+    if (*premul) {
         r = SkMulDiv255Ceiling(r, a);
         g = SkMulDiv255Ceiling(g, a);
         b = SkMulDiv255Ceiling(b, a);
@@ -134,6 +135,26 @@
     }
 }
 
+bool checkPixel(SkPMColor a, SkPMColor b, bool didPremulConversion) {
+    if (!didPremulConversion) {
+        return a == b;
+    }
+    int32_t aA = static_cast<int32_t>(SkGetPackedA32(a));
+    int32_t aR = static_cast<int32_t>(SkGetPackedR32(a));
+    int32_t aG = static_cast<int32_t>(SkGetPackedG32(a));
+    int32_t aB = SkGetPackedB32(a);
+
+    int32_t bA = static_cast<int32_t>(SkGetPackedA32(b));
+    int32_t bR = static_cast<int32_t>(SkGetPackedR32(b));
+    int32_t bG = static_cast<int32_t>(SkGetPackedG32(b));
+    int32_t bB = static_cast<int32_t>(SkGetPackedB32(b));
+
+    return aA == bA &&
+           SkAbs32(aR - bR) <= 1 &&
+           SkAbs32(aG - bG) <= 1 &&
+           SkAbs32(aB - bB) <= 1;
+}
+
 // checks the bitmap contains correct pixels after the readPixels
 // if the bitmap was prefilled with pixels it checks that these weren't
 // overwritten in the area outside the readPixels.
@@ -155,7 +176,7 @@
     if (!clippedSrcRect.intersect(srcRect)) {
         clippedSrcRect.setEmpty();
     }
-
+    bool failed = false;
     SkAutoLockPixels alp(bitmap);
     intptr_t pixels = reinterpret_cast<intptr_t>(bitmap.getPixels());
     for (int by = 0; by < bh; ++by) {
@@ -163,26 +184,28 @@
             int devx = bx + srcRect.fLeft;
             int devy = by + srcRect.fTop;
             
-            SkPMColor pixel = *reinterpret_cast<SkPMColor*>(pixels + by * bitmap.rowBytes() + bx * bitmap.bytesPerPixel());
+            uint32_t pixel = *reinterpret_cast<SkPMColor*>(pixels + by * bitmap.rowBytes() + bx * bitmap.bytesPerPixel());
 
             if (clippedSrcRect.contains(devx, devy)) {
                 if (checkCanvasPixels) {
                     SkPMColor canvasPixel = getCanvasColor(devx, devy);
-                    pixel = convertConfig8888ToPMColor(config8888, pixel);
-                    REPORTER_ASSERT(reporter, canvasPixel == pixel);
-                    if (getCanvasColor(devx, devy) != pixel) {
-                        return false;
+                    bool didPremul;
+                    SkPMColor pmPixel = convertConfig8888ToPMColor(config8888, pixel, &didPremul);
+                    bool check;
+                    REPORTER_ASSERT(reporter, check = checkPixel(pmPixel, canvasPixel, didPremul));
+                    if (!check) {
+                        failed = true;
                     }
                 }
             } else if (checkBitmapPixels) {
                 REPORTER_ASSERT(reporter, getBitmapColor(bx, by, bw, bh) == pixel);
                 if (getBitmapColor(bx, by, bw, bh) != pixel) {
-                    return false;
+                    failed = true;
                 }
             }
         }
     }
-    return true;
+    return failed;
 }
 
 enum BitmapInit {
@@ -274,7 +297,7 @@
         SkIRect::MakeLTRB(3 * DEV_W / 4, -10, DEV_W + 10, DEV_H + 10),
     };
 
-    for (int dtype = 0; dtype < 2; ++dtype) {
+    for (int dtype = 1; dtype < 2; ++dtype) {
 
         if (0 == dtype) {
             canvas.setDevice(new SkDevice(SkBitmap::kARGB_8888_Config,
@@ -322,12 +345,9 @@
                         canvas.readPixels(&bmp, srcRect.fLeft,
                                           srcRect.fTop, config8888);
 
-                    // non-native not implemented on GPU yet
-                    bool expectSuccess =
-                        SkIRect::Intersects(srcRect, DEV_RECT) &&
-                        !(1 == dtype &&
-                          config8888 != SkCanvas::kNative_Premul_Config8888);
-
+                    // we expect to succeed when the read isn't fully clipped
+                    // out.
+                    bool expectSuccess = SkIRect::Intersects(srcRect, DEV_RECT);
                     // determine whether we expected the read to succeed.
                     REPORTER_ASSERT(reporter, success == expectSuccess);