When GL_RGBA readPixels are slow do swizzle using a draw then readPixels with GL_BGRA

Review URL: http://codereview.appspot.com/5339051/


git-svn-id: http://skia.googlecode.com/svn/trunk@2631 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/include/gpu/GrConfig.h b/include/gpu/GrConfig.h
index 461dee7..bc71792 100644
--- a/include/gpu/GrConfig.h
+++ b/include/gpu/GrConfig.h
@@ -380,6 +380,7 @@
     #define GR_MAX_OFFSCREEN_AA_SIZE    256
 #endif
 
+
 ///////////////////////////////////////////////////////////////////////////////
 // tail section:
 //
diff --git a/include/gpu/GrGLConfig.h b/include/gpu/GrGLConfig.h
index 45b551c..f9a801d 100644
--- a/include/gpu/GrGLConfig.h
+++ b/include/gpu/GrGLConfig.h
@@ -77,6 +77,10 @@
  * It is not extern "C".
  * The GrGLInterface field fCallback specifies the function ptr and there is an
  * additional field fCallbackData of type intptr_t for client data.
+ *
+ * GR_GL_RGBA_8888_READBACK_SLOW: Set this to 1 if it is known that performing
+ * glReadPixels with format=GL_RGBA, type=GL_UNISIGNED_BYTE is significantly 
+ * slower than format=GL_BGRA, type=GL_UNISIGNED_BYTE.
  */
 
 #if !defined(GR_GL_LOG_CALLS)
@@ -111,6 +115,10 @@
     #define GR_GL_PER_GL_FUNC_CALLBACK          0
 #endif
 
+#if !defined(GR_GL_RGBA_8888_READBACK_SLOW)
+    #define GR_GL_RGBA_8888_READBACK_SLOW       0
+#endif
+
 #if(GR_GL_NO_CONSTANT_ATTRIBUTES) && (GR_GL_ATTRIBUTE_MATRICES)
     #error "Cannot combine GR_GL_NO_CONSTANT_ATTRIBUTES and GR_GL_ATTRIBUTE_MATRICES"
 #endif
diff --git a/include/gpu/GrGLConfig_chrome.h b/include/gpu/GrGLConfig_chrome.h
index d21e2f4..79324ab 100644
--- a/include/gpu/GrGLConfig_chrome.h
+++ b/include/gpu/GrGLConfig_chrome.h
@@ -14,6 +14,9 @@
 // ANGLE creates a temp VB for vertex attributes not specified per-vertex.
 #define GR_GL_NO_CONSTANT_ATTRIBUTES    GR_WIN32_BUILD
 
+// RGBA Readbacks are a slow path in ANGLE
+#define GR_GL_RGBA_8888_READBACK_SLOW   GR_WIN32_BUILD
+
 // cmd buffer allocates memory and memsets it to zero when it sees glBufferData
 // with NULL.
 #define GR_GL_USE_BUFFER_DATA_NULL_HINT 0
diff --git a/include/gpu/GrSamplerState.h b/include/gpu/GrSamplerState.h
index 324472c..9c217db 100644
--- a/include/gpu/GrSamplerState.h
+++ b/include/gpu/GrSamplerState.h
@@ -112,6 +112,7 @@
         fSampleMode = kNormal_SampleMode;
         fFilter = filter;
         fMatrix.setIdentity();
+        fSwapRAndB = false;
         fTextureDomain.setEmpty();
     }
 
@@ -125,6 +126,7 @@
         fSampleMode = kNormal_SampleMode;
         fFilter = filter;
         fMatrix = matrix;
+        fSwapRAndB = false;
         fTextureDomain.setEmpty();
     }
 
@@ -138,6 +140,7 @@
         fSampleMode = sample;
         fMatrix = matrix;
         fFilter = filter;
+        fSwapRAndB = false;
         fTextureDomain.setEmpty();
     }
 
@@ -151,6 +154,7 @@
     int getKernelWidth() const { return fKernelWidth; }
     const float* getKernel() const { return fKernel; }
     const float* getImageIncrement() const { return fImageIncrement; }
+    bool swapsRAndB() const { return fSwapRAndB; }
 
     bool isGradient() const {
         return  kRadial_SampleMode == fSampleMode ||
@@ -177,6 +181,12 @@
     void setTextureDomain(const GrRect& textureDomain) { fTextureDomain = textureDomain; }
 
     /**
+     * Swaps the R and B components when reading from the texture. Has no effect
+     * if the texture is alpha only.
+     */
+    void setRAndBSwap(bool swap) { fSwapRAndB = swap; }
+
+    /**
      *  Multiplies the current sampler matrix  a matrix
      *
      *  After this call M' = M*m where M is the old matrix, m is the parameter
@@ -201,6 +211,7 @@
         fFilter = kNearest_Filter;
         fMatrix.setIdentity();
         fTextureDomain.setEmpty();
+        fSwapRAndB = false;
     }
 
     GrScalar getRadial2CenterX1() const { return fRadial2CenterX1; }
@@ -246,6 +257,7 @@
     SampleMode  fSampleMode;
     Filter      fFilter;
     GrMatrix    fMatrix;
+    bool        fSwapRAndB;
     GrRect      fTextureDomain;
 
     // these are undefined unless fSampleMode == kRadial2_SampleMode
diff --git a/include/gpu/GrTypes.h b/include/gpu/GrTypes.h
index 36ad9e0..1986a4e 100644
--- a/include/gpu/GrTypes.h
+++ b/include/gpu/GrTypes.h
@@ -319,6 +319,60 @@
 // has a different interpretation when skia is compiled BGRA.
 static const GrPixelConfig kRGBA_8888_GrPixelConfig = kSkia8888_PM_GrPixelConfig;
 
+// Returns true if the pixel config has 8bit r,g,b,a components in that byte
+// order
+static inline bool GrPixelConfigIsRGBA8888(GrPixelConfig config) {
+    switch (config) {
+        case kRGBA_8888_PM_GrPixelConfig:
+        case kRGBA_8888_UPM_GrPixelConfig:
+            return true;
+        default:
+            return false;
+    }
+}
+
+// Returns true if the pixel config has 8bit b,g,r,a components in that byte
+// order
+static inline bool GrPixelConfigIsBGRA8888(GrPixelConfig config) {
+    switch (config) {
+        case kBGRA_8888_PM_GrPixelConfig:
+        case kBGRA_8888_UPM_GrPixelConfig:
+            return true;
+        default:
+            return false;
+    }
+}
+
+// Returns true if the pixel config is 32 bits per pixel
+static inline bool GrPixelConfigIs32Bit(GrPixelConfig config) {
+    switch (config) {
+        case kRGBA_8888_PM_GrPixelConfig:
+        case kRGBA_8888_UPM_GrPixelConfig:
+        case kBGRA_8888_PM_GrPixelConfig:
+        case kBGRA_8888_UPM_GrPixelConfig:
+            return true;
+        default:
+            return false;
+    }
+}
+
+// Takes a config and returns the equivalent config with the R and B order
+// swapped if such a config exists. Otherwise, kUnknown_GrPixelConfig
+static inline GrPixelConfig GrPixelConfigSwapRAndB(GrPixelConfig config) {
+    switch (config) {
+        case kBGRA_8888_PM_GrPixelConfig:
+            return kRGBA_8888_PM_GrPixelConfig;
+        case kBGRA_8888_UPM_GrPixelConfig:
+            return kRGBA_8888_UPM_GrPixelConfig;
+        case kRGBA_8888_PM_GrPixelConfig:
+            return kBGRA_8888_PM_GrPixelConfig;
+        case kRGBA_8888_UPM_GrPixelConfig:
+            return kBGRA_8888_UPM_GrPixelConfig;
+        default:
+            return kUnknown_GrPixelConfig;
+    }
+}
+
 static inline size_t GrBytesPerPixel(GrPixelConfig config) {
     switch (config) {
         case kAlpha_8_GrPixelConfig:
diff --git a/src/gpu/GrContext.cpp b/src/gpu/GrContext.cpp
index 8c06c21..8dbd411 100644
--- a/src/gpu/GrContext.cpp
+++ b/src/gpu/GrContext.cpp
@@ -1673,19 +1673,30 @@
     this->flush();
 
     GrTexture* src = target->asTexture();
+    bool swapRAndB = NULL != src &&
+                     fGpu->preferredReadPixelsConfig(config) ==
+                     GrPixelConfigSwapRAndB(config);
 
     bool flipY = NULL != src &&
                  fGpu->readPixelsWillPayForYFlip(target, left, top,
                                                  width, height, config,
                                                  rowBytes);
+    bool alphaConversion = (!GrPixelConfigIsUnpremultiplied(target->config()) &&
+                             GrPixelConfigIsUnpremultiplied(config));
 
-    if (flipY || (!GrPixelConfigIsUnpremultiplied(target->config()) &&
-                  GrPixelConfigIsUnpremultiplied(config))) {
-        if (!src) {
-            // we should fallback to cpu conversion here. This could happen when
-            // we were given an external render target by the client that is not
-            // also a texture (e.g. FBO 0 in GL)
-            return false;
+    if (NULL == src && alphaConversion) {
+        // we should fallback to cpu conversion here. This could happen when
+        // we were given an external render target by the client that is not
+        // also a texture (e.g. FBO 0 in GL)
+        return false;
+    }
+
+    // we draw to a scratch texture if any of these conversion are applied
+    if (flipY || swapRAndB || alphaConversion) {
+        GrAssert(NULL != src);
+        if (swapRAndB) {
+            config = GrPixelConfigSwapRAndB(config);
+            GrAssert(kUnknown_GrPixelConfig != config);
         }
         // Make the scratch a render target because we don't have a robust
         // readTexturePixels as of yet (it calls this function).
@@ -1715,6 +1726,7 @@
 
         GrSamplerState sampler;
         sampler.setClampNoFilter();
+        sampler.setRAndBSwap(swapRAndB);
         GrMatrix matrix;
         if (flipY) {
             matrix.setTranslate(SK_Scalar1 * left,
diff --git a/src/gpu/GrGLProgram.cpp b/src/gpu/GrGLProgram.cpp
index 89ced68..93303ca 100644
--- a/src/gpu/GrGLProgram.cpp
+++ b/src/gpu/GrGLProgram.cpp
@@ -1460,7 +1460,7 @@
               GrStringBuilder* sampleCoords,
               const char* samplerName,
               const char* texelSizeName,
-              const char* smear,
+              const char* swizzle,
               const char* fsOutColor,
               GrStringBuilder& texFunc,
               GrStringBuilder& modulate,
@@ -1479,10 +1479,10 @@
     GrAssert(2 == coordDims);
     GrStringBuilder accumVar("accum");
     accumVar.appendS32(stageNum);
-    segments->fFSCode.appendf("\tvec4 %s  = %s(%s, %s + vec2(-%s.x,-%s.y))%s;\n", accumVar.c_str(), texFunc.c_str(), samplerName, sampleCoords->c_str(), texelSizeName, texelSizeName, smear);
-    segments->fFSCode.appendf("\t%s += %s(%s, %s + vec2(+%s.x,-%s.y))%s;\n", accumVar.c_str(), texFunc.c_str(), samplerName, sampleCoords->c_str(), texelSizeName, texelSizeName, smear);
-    segments->fFSCode.appendf("\t%s += %s(%s, %s + vec2(-%s.x,+%s.y))%s;\n", accumVar.c_str(), texFunc.c_str(), samplerName, sampleCoords->c_str(), texelSizeName, texelSizeName, smear);
-    segments->fFSCode.appendf("\t%s += %s(%s, %s + vec2(+%s.x,+%s.y))%s;\n", accumVar.c_str(), texFunc.c_str(), samplerName, sampleCoords->c_str(), texelSizeName, texelSizeName, smear);
+    segments->fFSCode.appendf("\tvec4 %s  = %s(%s, %s + vec2(-%s.x,-%s.y))%s;\n", accumVar.c_str(), texFunc.c_str(), samplerName, sampleCoords->c_str(), texelSizeName, texelSizeName, swizzle);
+    segments->fFSCode.appendf("\t%s += %s(%s, %s + vec2(+%s.x,-%s.y))%s;\n", accumVar.c_str(), texFunc.c_str(), samplerName, sampleCoords->c_str(), texelSizeName, texelSizeName, swizzle);
+    segments->fFSCode.appendf("\t%s += %s(%s, %s + vec2(-%s.x,+%s.y))%s;\n", accumVar.c_str(), texFunc.c_str(), samplerName, sampleCoords->c_str(), texelSizeName, texelSizeName, swizzle);
+    segments->fFSCode.appendf("\t%s += %s(%s, %s + vec2(+%s.x,+%s.y))%s;\n", accumVar.c_str(), texFunc.c_str(), samplerName, sampleCoords->c_str(), texelSizeName, texelSizeName, swizzle);
     segments->fFSCode.appendf("\t%s = .25 * %s%s;\n", fsOutColor, accumVar.c_str(), modulate.c_str());
 
 }
@@ -1522,7 +1522,7 @@
                       ShaderCodeSegments* segments,
                       const char* samplerName,
                       const char* kernelName,
-                      const char* smear,
+                      const char* swizzle,
                       const char* imageIncrementName,
                       const char* fsOutColor,
                       GrStringBuilder& sampleCoords,
@@ -1542,7 +1542,7 @@
                               desc.fKernelWidth);
     segments->fFSCode.appendf("\t\t%s += %s(%s, %s)%s * %s[i];\n",
                               sumVar.c_str(), texFunc.c_str(),
-                              samplerName, coordVar.c_str(), smear,
+                              samplerName, coordVar.c_str(), swizzle,
                               kernelName);
     segments->fFSCode.appendf("\t\t%s += %s;\n",
                               coordVar.c_str(),
@@ -1708,12 +1708,19 @@
 
     };
 
-    const char* smear;
-    if (desc.fInputConfig == StageDesc::kAlphaOnly_InputConfig) {
-        smear = ".aaaa";
-    } else {
-        smear = "";
+    const char* swizzle;
+    switch (desc.fSwizzle) {
+        case StageDesc::kAlphaSmear_Swizzle:
+            swizzle = ".aaaa";
+            break;
+        case StageDesc::kSwapRAndB_Swizzle:
+            swizzle = ".bgra";
+            break;
+        case StageDesc::kNone_Swizzle:
+            swizzle = "";
+            break;
     }
+
     GrStringBuilder modulate;
     if (NULL != fsInColor) {
         modulate.printf(" * %s", fsInColor);
@@ -1738,19 +1745,19 @@
     switch (desc.fFetchMode) {
     case StageDesc::k2x2_FetchMode:
         gen2x2FS(stageNum, segments, locations, &sampleCoords,
-            samplerName, texelSizeName, smear, fsOutColor,
+            samplerName, texelSizeName, swizzle, fsOutColor,
             texFunc, modulate, complexCoord, coordDims);
         break;
     case StageDesc::kConvolution_FetchMode:
         genConvolutionFS(stageNum, desc, segments,
-            samplerName, kernelName, smear, imageIncrementName, fsOutColor,
+            samplerName, kernelName, swizzle, imageIncrementName, fsOutColor,
             sampleCoords, texFunc, modulate);
         break;
     default:
         segments->fFSCode.appendf("\t%s = %s(%s, %s)%s%s;\n",
                                   fsOutColor, texFunc.c_str(), 
                                   samplerName, sampleCoords.c_str(),
-                                  smear, modulate.c_str());
+                                  swizzle, modulate.c_str());
     }
 }
 
diff --git a/src/gpu/GrGLProgram.h b/src/gpu/GrGLProgram.h
index 9abf1b1..e0f8fbc 100644
--- a/src/gpu/GrGLProgram.h
+++ b/src/gpu/GrGLProgram.h
@@ -125,19 +125,26 @@
 
                 kFetchModeCnt,
             };
-            enum InputConfig {
+            /**
+              Describes how to swizzle the texture's components. If swizzling
+              can be applied outside of the shader (GL_ARB_texture_swizzle) that
+              is preferrable to using this enum. Changing the enum value used
+              causes another program to be generated.
+             */
+            enum Swizzle {
                 /**
-                  The texture has r,g,b, and optionally a.
+                  No swizzling applied to the inputs
                  */
-                kColor_InputConfig,
+                kNone_Swizzle,
                 /**
-                  The texture is alpha only. This should only be used if the
-                  caller is unable to map the r,g,b sample values to the
-                  texture's alpha channel (GL_ARB_texture_swizzle).
+                  Swap the R and B channels
+                 */
+                kSwapRAndB_Swizzle,
+                /**
+                 Smear alpha across all four channels.
                 */
-                kAlphaOnly_InputConfig,
-
-                kInputConfigCnt
+                kAlphaSmear_Swizzle,
+                kSwizzleCnt
             };
             enum CoordMapping {
                 kIdentity_CoordMapping,
@@ -151,7 +158,7 @@
             };
 
             uint8_t fOptFlags;
-            uint8_t fInputConfig;   // casts to enum InputConfig
+            uint8_t fSwizzle;       // casts to enum Swizzle
             uint8_t fFetchMode;     // casts to enum FetchMode
             uint8_t fCoordMapping;  // casts to enum CoordMapping
             uint8_t fKernelWidth;
diff --git a/src/gpu/GrGLTexture.h b/src/gpu/GrGLTexture.h
index 1f9636a..a6888ee 100644
--- a/src/gpu/GrGLTexture.h
+++ b/src/gpu/GrGLTexture.h
@@ -55,6 +55,7 @@
         GrGLenum fFilter;
         GrGLenum fWrapS;
         GrGLenum fWrapT;
+        GrGLenum fSwizzleRGBA[4];
         void invalidate() { memset(this, 0xff, sizeof(TexParams)); }
     };
 
diff --git a/src/gpu/GrGpu.h b/src/gpu/GrGpu.h
index 11edfe3..1482d13 100644
--- a/src/gpu/GrGpu.h
+++ b/src/gpu/GrGpu.h
@@ -184,6 +184,17 @@
     void forceRenderTargetFlush();
 
     /**
+     * readPixels with some configs may be slow. Given a desired config this
+     * function returns a fast-path config. The returned config must have the
+     * same components, component sizes, and not require conversion between
+     * pre- and unpremultiplied alpha. The caller is free to ignore the result
+     * and call readPixels with the original config.
+     */
+    virtual GrPixelConfig preferredReadPixelsConfig(GrPixelConfig config) {
+        return config;
+    }
+
+    /**
      * OpenGL's readPixels returns the result bottom-to-top while the skia
      * API is top-to-bottom. Thus we have to do a y-axis flip. The obvious
      * solution is to have the subclass do the flip using either the CPU or GPU.
diff --git a/src/gpu/GrGpuGL.cpp b/src/gpu/GrGpuGL.cpp
index 9697a4e..d768a24 100644
--- a/src/gpu/GrGpuGL.cpp
+++ b/src/gpu/GrGpuGL.cpp
@@ -576,6 +576,14 @@
     }
 }
 
+GrPixelConfig GrGpuGL::preferredReadPixelsConfig(GrPixelConfig config) {
+    if (GR_GL_RGBA_8888_READBACK_SLOW && GrPixelConfigIsRGBA8888(config)) {
+        return GrPixelConfigSwapRAndB(config);
+    } else {
+        return config;
+    }
+}
+
 void GrGpuGL::onResetContext() {
     if (gPrintStartupSpew && !fPrintedCaps) {
         fPrintedCaps = true;
@@ -1036,40 +1044,6 @@
 }
 #endif
 
-namespace {
-void set_tex_swizzle(GrPixelConfig config, const GrGLInterface* gl) {
-    // Today we always use GL_ALPHA for kAlpha_8_GrPixelConfig. However,
-    // this format is deprecated sometimes isn't a renderable format. If we
-    // were to spoof it in the future with GL_RED we'd want to notice that
-    // here.
-    // This isn't recorded in our tex params struct becauase we infer it
-    // from the pixel config.
-    const GrGLint* swiz;
-    if (GrPixelConfigIsAlphaOnly(config)) {
-        static const GrGLint gAlphaSwiz[] = {GR_GL_ALPHA, GR_GL_ALPHA,
-                                             GR_GL_ALPHA, GR_GL_ALPHA};
-        swiz = gAlphaSwiz;
-    } else {
-        static const GrGLint gColorSwiz[] = {GR_GL_RED,  GR_GL_GREEN,
-                                             GR_GL_BLUE, GR_GL_ALPHA};
-        swiz = gColorSwiz;
-    }
-    // should add texparameteri to interface to make 1 instead of 4 calls here
-    GR_GL_CALL(gl, TexParameteri(GR_GL_TEXTURE_2D,
-                                 GR_GL_TEXTURE_SWIZZLE_R,
-                                 swiz[0]));
-    GR_GL_CALL(gl, TexParameteri(GR_GL_TEXTURE_2D,
-                                 GR_GL_TEXTURE_SWIZZLE_G,
-                                 swiz[1]));
-    GR_GL_CALL(gl, TexParameteri(GR_GL_TEXTURE_2D,
-                                 GR_GL_TEXTURE_SWIZZLE_B,
-                                 swiz[2]));
-    GR_GL_CALL(gl, TexParameteri(GR_GL_TEXTURE_2D,
-                                 GR_GL_TEXTURE_SWIZZLE_A,
-                                 swiz[3]));
-}
-}
-
 GrTexture* GrGpuGL::onCreateTexture(const GrTextureDesc& desc,
                                     const void* srcData,
                                     size_t rowBytes) {
@@ -1149,29 +1123,27 @@
     this->setSpareTextureUnit();
     GL_CALL(BindTexture(GR_GL_TEXTURE_2D, glTexDesc.fTextureID));
 
-    // Some drivers like to know these before seeing glTexImage2D. Some drivers
-    // have a bug where an FBO won't be complete if it includes a texture that
-    // is not complete (i.e. has mip levels or non-mip min filter).
-    static const GrGLTexture::TexParams DEFAULT_TEX_PARAMS = {
-        GR_GL_NEAREST,
-        GR_GL_CLAMP_TO_EDGE,
-        GR_GL_CLAMP_TO_EDGE
-    };
+    // Some drivers like to know filter/wrap before seeing glTexImage2D. Some
+    // drivers have a bug where an FBO won't be complete if it includes a
+    // texture that is not mipmap complete (considering the filter in use).
+    GrGLTexture::TexParams initialTexParams;
+    // we only set a subset here so invalidate first
+    initialTexParams.invalidate();
+    initialTexParams.fFilter = GR_GL_NEAREST;
+    initialTexParams.fWrapS = GR_GL_CLAMP_TO_EDGE;
+    initialTexParams.fWrapT = GR_GL_CLAMP_TO_EDGE;
     GL_CALL(TexParameteri(GR_GL_TEXTURE_2D,
                           GR_GL_TEXTURE_MAG_FILTER,
-                          DEFAULT_TEX_PARAMS.fFilter));
+                          initialTexParams.fFilter));
     GL_CALL(TexParameteri(GR_GL_TEXTURE_2D,
                           GR_GL_TEXTURE_MIN_FILTER,
-                          DEFAULT_TEX_PARAMS.fFilter));
+                          initialTexParams.fFilter));
     GL_CALL(TexParameteri(GR_GL_TEXTURE_2D,
                           GR_GL_TEXTURE_WRAP_S,
-                          DEFAULT_TEX_PARAMS.fWrapS));
+                          initialTexParams.fWrapS));
     GL_CALL(TexParameteri(GR_GL_TEXTURE_2D,
                           GR_GL_TEXTURE_WRAP_T,
-                          DEFAULT_TEX_PARAMS.fWrapT));
-    if (fGLCaps.fTextureSwizzle) {
-        set_tex_swizzle(desc.fConfig, this->glInterface());
-    }
+                          initialTexParams.fWrapT));
     this->allocateAndUploadTexData(glTexDesc, internalFormat,srcData, rowBytes);
 
     GrGLTexture* tex;
@@ -1190,7 +1162,7 @@
     } else {
         tex = new GrGLTexture(this, glTexDesc);
     }
-    tex->setCachedTexParams(DEFAULT_TEX_PARAMS, this->getResetTimestamp());
+    tex->setCachedTexParams(initialTexParams, this->getResetTimestamp());
 #ifdef TRACE_TEXTURE_CREATION
     GrPrintf("--- new texture [%d] size=(%d %d) config=%d\n",
              glTexDesc.fTextureID, desc.fWidth, desc.fHeight, desc.fConfig);
@@ -2057,7 +2029,9 @@
     }
 }
 
-static unsigned grToGLFilter(GrSamplerState::Filter filter) {
+namespace {
+
+unsigned gr_to_gl_filter(GrSamplerState::Filter filter) {
     switch (filter) {
         case GrSamplerState::kBilinear_Filter:
         case GrSamplerState::k4x4Downsample_Filter:
@@ -2071,6 +2045,40 @@
     }
 }
 
+const GrGLenum* get_swizzle(GrPixelConfig config,
+                            const GrSamplerState& sampler) {
+    if (GrPixelConfigIsAlphaOnly(config)) {
+        static const GrGLenum gAlphaSmear[] = { GR_GL_ALPHA, GR_GL_ALPHA,
+                                                GR_GL_ALPHA, GR_GL_ALPHA };
+        return gAlphaSmear;
+    } else if (sampler.swapsRAndB()) {
+        static const GrGLenum gRedBlueSwap[] = { GR_GL_BLUE, GR_GL_GREEN,
+                                                 GR_GL_RED,  GR_GL_ALPHA };
+        return gRedBlueSwap;
+    } else {
+        static const GrGLenum gStraight[] = { GR_GL_RED, GR_GL_GREEN,
+                                              GR_GL_BLUE,  GR_GL_ALPHA };
+        return gStraight;
+    }
+}
+
+void set_tex_swizzle(GrGLenum swizzle[4], const GrGLInterface* gl) {
+    // should add texparameteri to interface to make 1 instead of 4 calls here
+    GR_GL_CALL(gl, TexParameteri(GR_GL_TEXTURE_2D,
+                                 GR_GL_TEXTURE_SWIZZLE_R,
+                                 swizzle[0]));
+    GR_GL_CALL(gl, TexParameteri(GR_GL_TEXTURE_2D,
+                                 GR_GL_TEXTURE_SWIZZLE_G,
+                                 swizzle[1]));
+    GR_GL_CALL(gl, TexParameteri(GR_GL_TEXTURE_2D,
+                                 GR_GL_TEXTURE_SWIZZLE_B,
+                                 swizzle[2]));
+    GR_GL_CALL(gl, TexParameteri(GR_GL_TEXTURE_2D,
+                                 GR_GL_TEXTURE_SWIZZLE_A,
+                                 swizzle[3]));
+}
+}
+
 bool GrGpuGL::flushGLStateCommon(GrPrimitiveType type) {
 
     // GrGpu::setupClipAndFlushState should have already checked this
@@ -2114,51 +2122,44 @@
             bool setAll = timestamp < this->getResetTimestamp();
             GrGLTexture::TexParams newTexParams;
 
-            newTexParams.fFilter = grToGLFilter(sampler.getFilter());
+            newTexParams.fFilter = gr_to_gl_filter(sampler.getFilter());
 
             const GrGLenum* wraps = 
                                 GrGLTexture::WrapMode2GLWrap(this->glBinding());
             newTexParams.fWrapS = wraps[sampler.getWrapX()];
             newTexParams.fWrapT = wraps[sampler.getWrapY()];
-            if (setAll) {
+            memcpy(newTexParams.fSwizzleRGBA,
+                   get_swizzle(nextTexture->config(), sampler),
+                   sizeof(newTexParams.fSwizzleRGBA));
+            if (setAll || newTexParams.fFilter != oldTexParams.fFilter) {
                 setTextureUnit(s);
                 GL_CALL(TexParameteri(GR_GL_TEXTURE_2D,
-                                      GR_GL_TEXTURE_MAG_FILTER,
-                                      newTexParams.fFilter));
+                                        GR_GL_TEXTURE_MAG_FILTER,
+                                        newTexParams.fFilter));
                 GL_CALL(TexParameteri(GR_GL_TEXTURE_2D,
-                                      GR_GL_TEXTURE_MIN_FILTER,
-                                      newTexParams.fFilter));
+                                        GR_GL_TEXTURE_MIN_FILTER,
+                                        newTexParams.fFilter));
+            }
+            if (setAll || newTexParams.fWrapS != oldTexParams.fWrapS) {
+                setTextureUnit(s);
                 GL_CALL(TexParameteri(GR_GL_TEXTURE_2D,
-                                      GR_GL_TEXTURE_WRAP_S,
-                                      newTexParams.fWrapS));
+                                        GR_GL_TEXTURE_WRAP_S,
+                                        newTexParams.fWrapS));
+            }
+            if (setAll || newTexParams.fWrapT != oldTexParams.fWrapT) {
+                setTextureUnit(s);
                 GL_CALL(TexParameteri(GR_GL_TEXTURE_2D,
-                                      GR_GL_TEXTURE_WRAP_T,
-                                      newTexParams.fWrapT));
-                if (this->glCaps().fTextureSwizzle) {
-                    set_tex_swizzle(nextTexture->config(), this->glInterface());
-                }
-            } else {
-                if (newTexParams.fFilter != oldTexParams.fFilter) {
-                    setTextureUnit(s);
-                    GL_CALL(TexParameteri(GR_GL_TEXTURE_2D,
-                                          GR_GL_TEXTURE_MAG_FILTER,
-                                          newTexParams.fFilter));
-                    GL_CALL(TexParameteri(GR_GL_TEXTURE_2D,
-                                          GR_GL_TEXTURE_MIN_FILTER,
-                                          newTexParams.fFilter));
-                }
-                if (newTexParams.fWrapS != oldTexParams.fWrapS) {
-                    setTextureUnit(s);
-                    GL_CALL(TexParameteri(GR_GL_TEXTURE_2D,
-                                          GR_GL_TEXTURE_WRAP_S,
-                                          newTexParams.fWrapS));
-                }
-                if (newTexParams.fWrapT != oldTexParams.fWrapT) {
-                    setTextureUnit(s);
-                    GL_CALL(TexParameteri(GR_GL_TEXTURE_2D,
-                                          GR_GL_TEXTURE_WRAP_T,
-                                          newTexParams.fWrapT));
-                }
+                                        GR_GL_TEXTURE_WRAP_T,
+                                        newTexParams.fWrapT));
+            }
+            if (this->glCaps().fTextureSwizzle &&
+                (setAll ||
+                 memcmp(newTexParams.fSwizzleRGBA,
+                        oldTexParams.fSwizzleRGBA,
+                        sizeof(newTexParams.fSwizzleRGBA)))) {
+                setTextureUnit(s);
+                set_tex_swizzle(newTexParams.fSwizzleRGBA,
+                                this->glInterface());
             }
             nextTexture->setCachedTexParams(newTexParams,
                                             this->getResetTimestamp());
diff --git a/src/gpu/GrGpuGL.h b/src/gpu/GrGpuGL.h
index 1dd9c80..84c62f8 100644
--- a/src/gpu/GrGpuGL.h
+++ b/src/gpu/GrGpuGL.h
@@ -29,6 +29,9 @@
     GrGLBinding glBinding() const { return fGLBinding; }
     GrGLVersion glVersion() const { return fGLVersion; }
 
+    virtual GrPixelConfig preferredReadPixelsConfig(GrPixelConfig config)
+                                                                    SK_OVERRIDE;
+
     virtual bool readPixelsWillPayForYFlip(
                                     GrRenderTarget* renderTarget,
                                     int left, int top,
diff --git a/src/gpu/GrGpuGLShaders.cpp b/src/gpu/GrGpuGLShaders.cpp
index 8e7c25f..0961548 100644
--- a/src/gpu/GrGpuGLShaders.cpp
+++ b/src/gpu/GrGpuGLShaders.cpp
@@ -272,7 +272,7 @@
             idx = (int)(random.nextF() * GR_ARRAY_COUNT(STAGE_OPTS));
             StageDesc& stage = pdesc.fStages[s];
             stage.fOptFlags = STAGE_OPTS[idx];
-            stage.fInputConfig = random_val(&random, StageDesc::kInputConfigCnt);
+            stage.fSwizzle = random_val(&random, StageDesc::kSwizzleCnt);
             stage.fCoordMapping =  random_val(&random, StageDesc::kCoordMappingCnt);
             stage.fFetchMode = random_val(&random, StageDesc::kFetchModeCnt);
             // convolution shaders don't work with persp tex matrix
@@ -1001,14 +1001,19 @@
                 stage.fOptFlags |= StageDesc::kCustomTextureDomain_OptFlagBit;
             }
 
-            if (!this->glCaps().fTextureSwizzle &&
-                GrPixelConfigIsAlphaOnly(texture->config())) {
-                // if we don't have texture swizzle support then
-                // the shader must do an alpha smear after reading
-                // the texture
-                stage.fInputConfig = StageDesc::kAlphaOnly_InputConfig;
+            if (!this->glCaps().fTextureSwizzle) {
+                if (GrPixelConfigIsAlphaOnly(texture->config())) {
+                    // if we don't have texture swizzle support then
+                    // the shader must do an alpha smear after reading
+                    // the texture
+                    stage.fSwizzle = StageDesc::kAlphaSmear_Swizzle;
+                } else if (sampler.swapsRAndB()) {
+                    stage.fSwizzle = StageDesc::kSwapRAndB_Swizzle;
+                } else {
+                    stage.fSwizzle = StageDesc::kNone_Swizzle;
+                }
             } else {
-                stage.fInputConfig = StageDesc::kColor_InputConfig;
+                stage.fSwizzle = StageDesc::kNone_Swizzle;
             }
 
             if (sampler.getFilter() == GrSamplerState::kConvolution_Filter) {
@@ -1018,8 +1023,8 @@
             }
         } else {
             stage.fOptFlags     = 0;
-            stage.fCoordMapping = (StageDesc::CoordMapping)0;
-            stage.fInputConfig  = (StageDesc::InputConfig)0;
+            stage.fCoordMapping = (StageDesc::CoordMapping) 0;
+            stage.fSwizzle      = (StageDesc::Swizzle) 0;
             stage.fFetchMode    = (StageDesc::FetchMode) 0;
             stage.fKernelWidth  = 0;
         }