Use GrCustomStage to implement color matrix.

R=robertphillips@google.com
Review URL: https://codereview.appspot.com/6716044

git-svn-id: http://skia.googlecode.com/svn/trunk@5975 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/include/core/SkColorFilter.h b/include/core/SkColorFilter.h
index 935b691..44d3f85 100644
--- a/include/core/SkColorFilter.h
+++ b/include/core/SkColorFilter.h
@@ -15,6 +15,8 @@
 #include "SkXfermode.h"
 
 class SkBitmap;
+class GrCustomStage;
+class GrContext;
 
 class SK_API SkColorFilter : public SkFlattenable {
 public:
@@ -113,6 +115,11 @@
     */
     static SkColorFilter* CreateLightingFilter(SkColor mul, SkColor add);
 
+    /** A subclass may implement this factory function to work with the GPU backend. If the return
+        is non-NULL then the caller owns a ref on the returned object.
+     */
+    virtual GrCustomStage* asNewCustomStage(GrContext*) const;
+
     SK_DECLARE_FLATTENABLE_REGISTRAR_GROUP()
 protected:
     SkColorFilter() {}
diff --git a/include/effects/SkColorMatrix.h b/include/effects/SkColorMatrix.h
index ff02d9d..84a3b7c 100644
--- a/include/effects/SkColorMatrix.h
+++ b/include/effects/SkColorMatrix.h
@@ -39,6 +39,12 @@
     void setSaturation(SkScalar sat);
     void setRGB2YUV();
     void setYUV2RGB();
+
+    bool operator==(const SkColorMatrix& other) const {
+        return 0 == memcmp(fMat, other.fMat, sizeof(fMat));
+    }
+
+    bool operator!=(const SkColorMatrix& other) const { return !((*this) == other); }
 };
 
 #endif
diff --git a/include/effects/SkColorMatrixFilter.h b/include/effects/SkColorMatrixFilter.h
index 005781f..08eef5d 100644
--- a/include/effects/SkColorMatrixFilter.h
+++ b/include/effects/SkColorMatrixFilter.h
@@ -21,6 +21,9 @@
     virtual void filterSpan16(const uint16_t src[], int count, uint16_t[]) SK_OVERRIDE;
     virtual uint32_t getFlags() SK_OVERRIDE;
     virtual bool asColorMatrix(SkScalar matrix[20]) SK_OVERRIDE;
+#if SK_SUPPORT_GPU
+    virtual GrCustomStage* asNewCustomStage(GrContext*) const SK_OVERRIDE;
+#endif
 
     struct State {
         int32_t fArray[20];
diff --git a/include/gpu/GrCustomStageUnitTest.h b/include/gpu/GrCustomStageUnitTest.h
index e0f179d..0e12f2d 100644
--- a/include/gpu/GrCustomStageUnitTest.h
+++ b/include/gpu/GrCustomStageUnitTest.h
@@ -58,7 +58,7 @@
 
 /** GrCustomStage subclasses should insert this macro in their implemenation file. They must then
  *  also implement this static function:
- *      GrCustomStage* CreateStage(SkRandom*, GrContext*, GrTexture* dummyTextures[2]);
+ *      GrCustomStage* TestCreate(SkRandom*, GrContext*, GrTexture* dummyTextures[2]);
  *  dummyTextures[] are valied textures that they can optionally use for their texture accesses. The
   * first texture has config kSkia8888_PM_GrPixelConfig and the second has kAlpha_8_GrPixelConfig.
   * TestCreate functions are also free to create additional textures using the GrContext.
diff --git a/include/gpu/GrPaint.h b/include/gpu/GrPaint.h
index 843a934..06c16af 100644
--- a/include/gpu/GrPaint.h
+++ b/include/gpu/GrPaint.h
@@ -25,8 +25,7 @@
  * The primitive color computation starts with the color specified by setColor(). This color is the
  * input to the first color stage. Each color stage feeds its output to the next color stage. The
  * final color stage's output color is input to the color filter specified by
- * setXfermodeColorFilter which it turn feeds into the color matrix. The output of the color matrix
- * is the final source color, S.
+ * setXfermodeColorFilter which produces the final source color, S.
  *
  * Fractional pixel coverage follows a similar flow. The coverage is initially the value specified
  * by setCoverage(). This is input to the first coverage stage. Coverage stages are chained
@@ -40,7 +39,7 @@
  * Note that the coverage is applied after the blend. This is why they are computed as distinct
  * values.
  *
- * TODO: Encapsulate setXfermodeColorFilter and color matrix in stages and remove from GrPaint.
+ * TODO: Encapsulate setXfermodeColorFilter in a GrCustomStage and remove from GrPaint.
  */
 class GrPaint {
 public:
@@ -104,28 +103,11 @@
     GrColor getColorFilterColor() const { return fColorFilterColor; }
 
     /**
-     * Turns off application of a color matrix. By default the color matrix is disabled.
-     */
-    void disableColorMatrix() { fColorMatrixEnabled = false; }
-
-    /**
-     * Specifies and enables a 4 x 5 color matrix.
-     */
-    void setColorMatrix(const float matrix[20]) {
-        fColorMatrixEnabled = true;
-        memcpy(fColorMatrix, matrix, sizeof(fColorMatrix));
-    }
-
-    bool isColorMatrixEnabled() const { return fColorMatrixEnabled; }
-    const float* getColorMatrix() const { return fColorMatrix; }
-
-    /**
-     * Disables both the matrix and SkXfermode::Mode color filters.
+     * Disables the SkXfermode::Mode color filter.
      */
     void resetColorFilter() {
         fColorFilterXfermode = SkXfermode::kDst_Mode;
         fColorFilterColor = GrColorPackRGBA(0xff, 0xff, 0xff, 0xff);
-        fColorMatrixEnabled = false;
     }
 
     /**
@@ -246,10 +228,6 @@
 
         fColorFilterColor = paint.fColorFilterColor;
         fColorFilterXfermode = paint.fColorFilterXfermode;
-        fColorMatrixEnabled = paint.fColorMatrixEnabled;
-        if (fColorMatrixEnabled) {
-            memcpy(fColorMatrix, paint.fColorMatrix, sizeof(fColorMatrix));
-        }
 
         for (int i = 0; i < kMaxColorStages; ++i) {
             if (paint.isColorStageEnabled(i)) {
@@ -295,14 +273,12 @@
     GrBlendCoeff                fDstBlendCoeff;
     bool                        fAntiAlias;
     bool                        fDither;
-    bool                        fColorMatrixEnabled;
 
     GrColor                     fColor;
     uint8_t                     fCoverage;
 
     GrColor                     fColorFilterColor;
     SkXfermode::Mode            fColorFilterXfermode;
-    float                       fColorMatrix[20];
 
     void resetBlend() {
         fSrcBlendCoeff = kOne_GrBlendCoeff;
diff --git a/src/core/SkColorFilter.cpp b/src/core/SkColorFilter.cpp
index fee34ed..3651d7e 100644
--- a/src/core/SkColorFilter.cpp
+++ b/src/core/SkColorFilter.cpp
@@ -40,6 +40,10 @@
     return SkUnPreMultiply::PMColorToColor(dst);
 }
 
+GrCustomStage* SkColorFilter::asNewCustomStage(GrContext*) const {
+    return NULL;
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 
 SkFilterShader::SkFilterShader(SkShader* shader, SkColorFilter* filter) {
diff --git a/src/effects/SkColorMatrixFilter.cpp b/src/effects/SkColorMatrixFilter.cpp
index 2102e9b..f3bc2a7 100644
--- a/src/effects/SkColorMatrixFilter.cpp
+++ b/src/effects/SkColorMatrixFilter.cpp
@@ -317,3 +317,115 @@
     }
     return true;
 }
+
+#if SK_SUPPORT_GPU
+#include "GrCustomStage.h"
+#include "gl/GrGLProgramStage.h"
+
+class ColorMatrixEffect : public GrCustomStage {
+public:
+    static const char* Name() { return "Color Matrix"; }
+
+    ColorMatrixEffect(const SkColorMatrix& matrix) : GrCustomStage(0), fMatrix(matrix) {}
+
+    virtual const GrProgramStageFactory& getFactory() const SK_OVERRIDE {
+        return GrTProgramStageFactory<ColorMatrixEffect>::getInstance();
+    }
+
+    virtual bool isEqual(const GrCustomStage& s) const {
+        const ColorMatrixEffect& cme = static_cast<const ColorMatrixEffect&>(s);
+        return cme.fMatrix == fMatrix;
+    }
+
+    GR_DECLARE_CUSTOM_STAGE_TEST;
+
+    class GLProgramStage : public GrGLProgramStage {
+    public:
+        // this class always generates the same code.
+        static StageKey GenKey(const GrCustomStage& s, const GrGLCaps&) { return 0; }
+
+        GLProgramStage(const GrProgramStageFactory& factory,
+                       const GrCustomStage& stage)
+        : GrGLProgramStage(factory)
+        , fMatrixHandle(GrGLUniformManager::kInvalidUniformHandle)
+        , fVectorHandle(GrGLUniformManager::kInvalidUniformHandle) {
+        }
+
+        virtual void setupVariables(GrGLShaderBuilder* builder) SK_OVERRIDE {
+            fMatrixHandle = builder->addUniform(GrGLShaderBuilder::kFragment_ShaderType,
+                                                kMat44f_GrSLType,
+                                                "ColorMatrix");
+            fVectorHandle = builder->addUniform(GrGLShaderBuilder::kFragment_ShaderType,
+                                                kVec4f_GrSLType,
+                                                "ColorMatrixVector");
+        }
+
+        virtual void emitVS(GrGLShaderBuilder* builder, const char* vertexCoords) SK_OVERRIDE {
+        }
+
+        virtual void emitFS(GrGLShaderBuilder* builder,
+                            const char* outputColor,
+                            const char* inputColor,
+                            const TextureSamplerArray&) SK_OVERRIDE {
+            if (NULL == inputColor) {
+                // could optimize this case, but we aren't for now.
+                inputColor = GrGLSLOnesVecf(4);
+            }
+            // The max() is to guard against 0 / 0 during unpremul when the incoming color is 
+            // transparent black.
+            builder->fFSCode.appendf("\tfloat nonZeroAlpha = max(%s.a, 0.00001);\n", inputColor);
+            builder->fFSCode.appendf("\t%s = %s * vec4(%s.rgb / nonZeroAlpha, nonZeroAlpha) + %s;\n",
+                                     outputColor,
+                                     builder->getUniformCStr(fMatrixHandle),
+                                     inputColor,
+                                     builder->getUniformCStr(fVectorHandle));
+            builder->fFSCode.appendf("\t%s.rgb *= %s.a;\n", outputColor, outputColor);
+        }
+
+        virtual void setData(const GrGLUniformManager& uniManager,
+                                const GrCustomStage& stage,
+                                const GrRenderTarget*,
+                                int /* stageNum */) SK_OVERRIDE {
+            const ColorMatrixEffect& cme = static_cast<const ColorMatrixEffect&>(stage);
+            const float* m = cme.fMatrix.fMat;
+            // The GL matrix is transposed from SkColorMatrix.
+            GrGLfloat mt[]  = {
+                m[0], m[5], m[10], m[15],
+                m[1], m[6], m[11], m[16],
+                m[2], m[7], m[12], m[17],
+                m[3], m[8], m[13], m[18],
+            };
+            static const float kScale = 1.0f / 255.0f;
+            GrGLfloat vec[] = {
+                m[4] * kScale, m[9] * kScale, m[14] * kScale, m[19] * kScale,
+            };
+            uniManager.setMatrix4fv(fMatrixHandle, 0, 1, mt);
+            uniManager.set4fv(fVectorHandle, 0, 1, vec);
+        }
+
+    private:
+        GrGLUniformManager::UniformHandle fMatrixHandle;
+        GrGLUniformManager::UniformHandle fVectorHandle;
+    };
+
+private:
+    SkColorMatrix fMatrix;
+};
+
+GR_DEFINE_CUSTOM_STAGE_TEST(ColorMatrixEffect);
+
+GrCustomStage* ColorMatrixEffect::TestCreate(SkRandom* random,
+                                             GrContext*,
+                                             GrTexture* dummyTextures[2]) {
+    SkColorMatrix colorMatrix;
+    for (int i = 0; i < SK_ARRAY_COUNT(colorMatrix.fMat); ++i) {
+        colorMatrix.fMat[i] = random->nextSScalar1();
+    }
+    return SkNEW_ARGS(ColorMatrixEffect, (colorMatrix));
+}
+
+GrCustomStage* SkColorMatrixFilter::asNewCustomStage(GrContext*) const {
+    return SkNEW_ARGS(ColorMatrixEffect, (fMatrix));
+}
+
+#endif
diff --git a/src/gpu/GrDrawState.cpp b/src/gpu/GrDrawState.cpp
index c95049a..0f0a27a 100644
--- a/src/gpu/GrDrawState.cpp
+++ b/src/gpu/GrDrawState.cpp
@@ -36,12 +36,6 @@
     this->setState(GrDrawState::kDither_StateBit, paint.isDither());
     this->setState(GrDrawState::kHWAntialias_StateBit, paint.isAntiAlias());
 
-    if (paint.isColorMatrixEnabled()) {
-        this->enableState(GrDrawState::kColorMatrix_StateBit);
-        this->setColorMatrix(paint.getColorMatrix());
-    } else {
-        this->disableState(GrDrawState::kColorMatrix_StateBit);
-    }
     this->setBlendFunc(paint.getSrcBlendCoeff(), paint.getDstBlendCoeff());
     this->setColorFilter(paint.getColorFilterColor(), paint.getColorFilterMode());
     this->setCoverage(paint.getCoverage());
diff --git a/src/gpu/GrDrawState.h b/src/gpu/GrDrawState.h
index 7bfb5e5..e3121ea 100644
--- a/src/gpu/GrDrawState.h
+++ b/src/gpu/GrDrawState.h
@@ -651,22 +651,6 @@
     /// @}
 
     ///////////////////////////////////////////////////////////////////////////
-    /// @name Color Matrix
-    ////
-
-    /**
-     * Sets the color matrix to use for the next draw.
-     * @param matrix  the 5x4 matrix to apply to the incoming color
-     */
-    void setColorMatrix(const float matrix[20]) {
-        memcpy(fColorMatrix, matrix, sizeof(fColorMatrix));
-    }
-
-    const float* getColorMatrix() const { return fColorMatrix; }
-
-    /// @}
-
-    ///////////////////////////////////////////////////////////////////////////
     // @name Edge AA
     // Edge equations can be specified to perform anti-aliasing. Because the
     // edges are specified as per-vertex data, vertices that are shared by
@@ -743,11 +727,6 @@
          * operations.
          */
         kNoColorWrites_StateBit = 0x08,
-        /**
-         * Draws will apply the color matrix, otherwise the color matrix is
-         * ignored.
-         */
-        kColorMatrix_StateBit   = 0x10,
 
         // Users of the class may add additional bits to the vector
         kDummyStateBit,
@@ -878,14 +857,6 @@
                 return false;
             }
         }
-        if (kColorMatrix_StateBit & s.fFlagBits) {
-            if (memcmp(fColorMatrix,
-                        s.fColorMatrix,
-                        sizeof(fColorMatrix))) {
-                return false;
-            }
-        }
-
         return true;
     }
     bool operator !=(const GrDrawState& s) const { return !(*this == s); }
@@ -914,10 +885,6 @@
             }
         }
 
-        if (kColorMatrix_StateBit & s.fFlagBits) {
-            memcpy(this->fColorMatrix, s.fColorMatrix, sizeof(fColorMatrix));
-        }
-
         return *this;
     }
 
@@ -942,8 +909,6 @@
     // This field must be last; it will not be copied or compared
     // if the corresponding fTexture[] is NULL.
     GrSamplerState      fSamplerStates[kNumStages];
-    // only compared if the color matrix enable flag is set
-    float               fColorMatrix[20];       // 5 x 4 matrix
 
     typedef GrRefCnt INHERITED;
 };
diff --git a/src/gpu/SkGpuDevice.cpp b/src/gpu/SkGpuDevice.cpp
index 02bfdbd..7ea1e75 100644
--- a/src/gpu/SkGpuDevice.cpp
+++ b/src/gpu/SkGpuDevice.cpp
@@ -500,28 +500,37 @@
         grPaint->setColor(SkColor2GrColor(skPaint.getColor()));
         GrAssert(!grPaint->isColorStageEnabled(kShaderTextureIdx));
     }
+
     SkColorFilter* colorFilter = skPaint.getColorFilter();
-    SkColor color;
-    SkXfermode::Mode filterMode;
-    SkScalar matrix[20];
-    SkBitmap colorTransformTable;
-    // TODO: SkColorFilter::asCustomStage()
-    if (colorFilter != NULL && colorFilter->asColorMode(&color, &filterMode)) {
-        if (!constantColor) {
-            grPaint->setXfermodeColorFilter(filterMode, SkColor2GrColor(color));
-        } else {
+    if (NULL != colorFilter) {
+        // if the source color is a constant then apply the filter here once rather than per pixel
+        // in a shader.
+        if (constantColor) {
             SkColor filtered = colorFilter->filterColor(skPaint.getColor());
             grPaint->setColor(SkColor2GrColor(filtered));
+        } else {
+            SkAutoTUnref<GrCustomStage> stage(colorFilter->asNewCustomStage(dev->context()));
+            if (NULL != stage.get()) {
+                grPaint->colorSampler(kColorFilterTextureIdx)->setCustomStage(stage);
+            } else {
+                // TODO: rewrite these using asNewCustomStage()
+                SkColor color;
+                SkXfermode::Mode filterMode;
+                SkBitmap colorTransformTable;
+                if (colorFilter->asColorMode(&color, &filterMode)) {
+                    grPaint->setXfermodeColorFilter(filterMode, SkColor2GrColor(color));
+                } else if (colorFilter != NULL &&
+                           colorFilter->asComponentTable(&colorTransformTable)) {
+                    // pass NULL because the color table effect doesn't use tiling or filtering.
+                    GrTexture* texture = act->set(dev, colorTransformTable, NULL);
+                    GrSamplerState* colorSampler = grPaint->colorSampler(kColorFilterTextureIdx);
+                    colorSampler->reset();
+                    colorSampler->setCustomStage(SkNEW_ARGS(GrColorTableEffect, (texture)))->unref();
+                }
+            }
         }
-    } else if (colorFilter != NULL && colorFilter->asColorMatrix(matrix)) {
-        grPaint->setColorMatrix(matrix);
-    } else if (colorFilter != NULL && colorFilter->asComponentTable(&colorTransformTable)) {
-        // pass NULL because the color table effect doesn't use tiling or filtering.
-        GrTexture* texture = act->set(dev, colorTransformTable, NULL);
-        GrSamplerState* colorSampler = grPaint->colorSampler(kColorFilterTextureIdx);
-        colorSampler->reset();
-        colorSampler->setCustomStage(SkNEW_ARGS(GrColorTableEffect, (texture)))->unref();
     }
+
     return true;
 }
 
diff --git a/src/gpu/gl/GrGLProgram.cpp b/src/gpu/gl/GrGLProgram.cpp
index 2703110..3c3d852 100644
--- a/src/gpu/gl/GrGLProgram.cpp
+++ b/src/gpu/gl/GrGLProgram.cpp
@@ -511,7 +511,6 @@
 #endif
 
     SkXfermode::Coeff colorCoeff, uniformCoeff;
-    bool applyColorMatrix = SkToBool(fDesc.fColorMatrixEnabled);
     // The rest of transfer mode color filters have not been implemented
     if (fDesc.fColorFilterXfermode < SkXfermode::kCoeffModesCnt) {
         GR_DEBUGCODE(bool success =)
@@ -524,17 +523,15 @@
         uniformCoeff = SkXfermode::kZero_Coeff;
     }
 
-    // no need to do the color filter / matrix at all if coverage is 0. The
-    // output color is scaled by the coverage. All the dual source outputs are
-    // scaled by the coverage as well.
+    // no need to do the color filter if coverage is 0. The output color is scaled by the coverage.
+    // All the dual source outputs are scaled by the coverage as well.
     if (Desc::kTransBlack_ColorInput == fDesc.fCoverageInput) {
         colorCoeff = SkXfermode::kZero_Coeff;
         uniformCoeff = SkXfermode::kZero_Coeff;
-        applyColorMatrix = false;
     }
 
     // If we know the final color is going to be all zeros then we can
-    // simplify the color filter coeffecients. needComputedColor will then
+    // simplify the color filter coefficients. needComputedColor will then
     // come out false below.
     if (Desc::kTransBlack_ColorInput == fDesc.fColorInput) {
         colorCoeff = SkXfermode::kZero_Coeff;
@@ -664,8 +661,7 @@
     }
     bool wroteFragColorZero = false;
     if (SkXfermode::kZero_Coeff == uniformCoeff &&
-        SkXfermode::kZero_Coeff == colorCoeff &&
-        !applyColorMatrix) {
+        SkXfermode::kZero_Coeff == colorCoeff) {
         builder.fFSCode.appendf("\t%s = %s;\n",
                                 colorOutput.getName().c_str(),
                                 GrGLSLZerosVecf(4));
@@ -677,22 +673,6 @@
                        colorCoeff, colorFilterColorUniName, color);
         inColor = "filteredColor";
     }
-    if (applyColorMatrix) {
-        const char* colMatrixName;
-        const char* colMatrixVecName;
-        fUniforms.fColorMatrixUni = builder.addUniform(GrGLShaderBuilder::kFragment_ShaderType,
-                                                       kMat44f_GrSLType, "ColorMatrix",
-                                                       &colMatrixName);
-        fUniforms.fColorMatrixVecUni = builder.addUniform(GrGLShaderBuilder::kFragment_ShaderType,
-                                                          kVec4f_GrSLType, "ColorMatrixVec",
-                                                          &colMatrixVecName);
-        const char* color = adjustInColor(inColor);
-        builder.fFSCode.appendf("\tvec4 matrixedColor = %s * vec4(%s.rgb / %s.a, %s.a) + %s;\n",
-                                colMatrixName, color, color, color, colMatrixVecName);
-        builder.fFSCode.append("\tmatrixedColor.rgb *= matrixedColor.a;\n");
-
-        inColor = "matrixedColor";
-    }
 
     ///////////////////////////////////////////////////////////////////////////
     // compute the partial coverage (coverage stages and edge aa)
diff --git a/src/gpu/gl/GrGLProgram.h b/src/gpu/gl/GrGLProgram.h
index e51f663..cdb2c4b 100644
--- a/src/gpu/gl/GrGLProgram.h
+++ b/src/gpu/gl/GrGLProgram.h
@@ -155,7 +155,6 @@
         uint8_t fDualSrcOutput;     // casts to enum DualSrcOutput
         int8_t fFirstCoverageStage;
         SkBool8 fEmitsPointSize;
-        SkBool8 fColorMatrixEnabled;
 
         uint8_t fColorFilterXfermode;  // casts to enum SkXfermode::Mode
     };
@@ -224,16 +223,12 @@
         UniformHandle fColorUni;
         UniformHandle fCoverageUni;
         UniformHandle fColorFilterUni;
-        UniformHandle fColorMatrixUni;
-        UniformHandle fColorMatrixVecUni;
         StageUniforms fStages[GrDrawState::kNumStages];
         Uniforms() {
             fViewMatrixUni = GrGLUniformManager::kInvalidUniformHandle;
             fColorUni = GrGLUniformManager::kInvalidUniformHandle;
             fCoverageUni = GrGLUniformManager::kInvalidUniformHandle;
             fColorFilterUni = GrGLUniformManager::kInvalidUniformHandle;
-            fColorMatrixUni = GrGLUniformManager::kInvalidUniformHandle;
-            fColorMatrixVecUni = GrGLUniformManager::kInvalidUniformHandle;
         }
     };
 
diff --git a/src/gpu/gl/GrGpuGL.cpp b/src/gpu/gl/GrGpuGL.cpp
index 157d9bc..4df7a98 100644
--- a/src/gpu/gl/GrGpuGL.cpp
+++ b/src/gpu/gl/GrGpuGL.cpp
@@ -2020,10 +2020,12 @@
     GrDrawState* drawState = this->drawState();
     // FIXME: Assuming at most one texture per custom stage
     const GrCustomStage* customStage = drawState->sampler(stage)->getCustomStage();
-    GrGLTexture* nextTexture =  static_cast<GrGLTexture*>(customStage->texture(0));
-    if (NULL != nextTexture) {
-        const GrTextureParams& texParams = customStage->textureAccess(0).getParams();
-        this->flushBoundTextureAndParams(stage, texParams, nextTexture);
+    if (customStage->numTextures() > 0) {
+        GrGLTexture* nextTexture =  static_cast<GrGLTexture*>(customStage->texture(0));
+        if (NULL != nextTexture) {
+            const GrTextureParams& texParams = customStage->textureAccess(0).getParams();
+            this->flushBoundTextureAndParams(stage, texParams, nextTexture);
+        }
     }
 }
 
diff --git a/src/gpu/gl/GrGpuGL.h b/src/gpu/gl/GrGpuGL.h
index 351ff53..0a0c52b 100644
--- a/src/gpu/gl/GrGpuGL.h
+++ b/src/gpu/gl/GrGpuGL.h
@@ -229,14 +229,6 @@
     // sets the MVP matrix uniform for currently bound program
     void flushViewMatrix(DrawType type);
 
-    // flushes the parameters to two point radial gradient
-    void flushRadial2(int stage);
-
-    // flushes the parameters for convolution
-    void flushConvolution(int stage);
-
-    // flushes the color matrix
-    void flushColorMatrix();
 
     // flushes dithering, color-mask, and face culling stat
     void flushMiscFixedFunctionState();
@@ -251,7 +243,7 @@
                       const GrCustomStage** customStages,
                       ProgramDesc* desc);
 
-    // Inits GrDrawTarget::Caps, sublcass may enable additional caps.
+    // Inits GrDrawTarget::Caps, subclass may enable additional caps.
     void initCaps();
 
     void initFSAASupport();
diff --git a/src/gpu/gl/GrGpuGL_program.cpp b/src/gpu/gl/GrGpuGL_program.cpp
index e579331..5f5a3a6 100644
--- a/src/gpu/gl/GrGpuGL_program.cpp
+++ b/src/gpu/gl/GrGpuGL_program.cpp
@@ -200,6 +200,9 @@
 
     // FIXME: Still assuming only a single texture per custom stage
     const GrCustomStage* stage = drawState.getSampler(s).getCustomStage();
+    if (0 == stage->numTextures()) {
+        return;
+    }
     const GrGLTexture* texture = static_cast<const GrGLTexture*>(stage->texture(0));
     if (NULL != texture) {
 
@@ -240,26 +243,6 @@
     }
 }
 
-void GrGpuGL::flushColorMatrix() {
-    UniformHandle matrixUni = fCurrentProgram->fUniforms.fColorMatrixUni;
-    UniformHandle vecUni = fCurrentProgram->fUniforms.fColorMatrixVecUni;
-    if (kInvalidUniformHandle != matrixUni && kInvalidUniformHandle != vecUni) {
-        const float* m = this->getDrawState().getColorMatrix();
-        GrGLfloat mt[]  = {
-            m[0], m[5], m[10], m[15],
-            m[1], m[6], m[11], m[16],
-            m[2], m[7], m[12], m[17],
-            m[3], m[8], m[13], m[18],
-        };
-        static float scale = 1.0f / 255.0f;
-        GrGLfloat vec[] = {
-            m[4] * scale, m[9] * scale, m[14] * scale, m[19] * scale,
-        };
-        fCurrentProgram->fUniformManager.setMatrix4f(matrixUni, mt);
-        fCurrentProgram->fUniformManager.set4fv(vecUni, 0, 1, vec);
-    }
-}
-
 void GrGpuGL::flushColor(GrColor color) {
     const ProgramDesc& desc = fCurrentProgram->getDesc();
     const GrDrawState& drawState = this->getDrawState();
@@ -414,7 +397,6 @@
                 }
             }
         }
-        this->flushColorMatrix();
     }
     this->flushStencil(type);
     this->flushViewMatrix(type);
@@ -645,8 +627,6 @@
                                 SkXfermode::kDst_Mode :
                                 drawState.getColorFilterMode();
 
-    desc->fColorMatrixEnabled = drawState.isStateFlagEnabled(GrDrawState::kColorMatrix_StateBit);
-
     // no reason to do edge aa or look at per-vertex coverage if coverage is
     // ignored
     if (skipCoverage) {
@@ -701,16 +681,23 @@
             const GrSamplerState& sampler = drawState.getSampler(s);
             // FIXME: Still assuming one texture per custom stage
             const GrCustomStage* customStage = drawState.getSampler(s).getCustomStage();
-            const GrGLTexture* texture = static_cast<const GrGLTexture*>(customStage->texture(0));
-            GrMatrix samplerMatrix;
-            sampler.getTotalMatrix(&samplerMatrix);
-            if (NULL != texture) {
-                // We call this helper function rather then simply checking the client-specified
-                // texture matrix. This is because we may have to concat a y-inversion to account
-                // for texture orientation.
-                stage.fOptFlags |= TextureMatrixOptFlags(texture, sampler);
-            }
 
+            if (customStage->numTextures() > 0) {
+                const GrGLTexture* texture =
+                    static_cast<const GrGLTexture*>(customStage->texture(0));
+                GrMatrix samplerMatrix;
+                sampler.getTotalMatrix(&samplerMatrix);
+                if (NULL != texture) {
+                    // We call this helper function rather then simply checking the client-specified
+                    // texture matrix. This is because we may have to concat a y-inversion to account
+                    // for texture orientation.
+                    stage.fOptFlags |= TextureMatrixOptFlags(texture, sampler);
+                }
+            } else {
+                // Set identity to do the minimal amount of extra work for the no texture case.
+                // This will go away when custom stages manage their own texture matrix.
+                stage.fOptFlags |= StageDesc::kIdentityMatrix_OptFlagBit;
+            }
             setup_custom_stage(&stage, sampler, this->glCaps(), customStages,
                                fCurrentProgram.get(), s);
 
@@ -724,7 +711,7 @@
     desc->fDualSrcOutput = ProgramDesc::kNone_DualSrcOutput;
 
     // Currently the experimental GS will only work with triangle prims (and it doesn't do anything
-    // other than pass through values fromthe VS to the FS anyway).
+    // other than pass through values from the VS to the FS anyway).
 #if 0 && GR_GL_EXPERIMENTAL_GS
     desc->fExperimentalGS = this->getCaps().fGeometryShaderSupport;
 #endif
diff --git a/tests/GLProgramsTest.cpp b/tests/GLProgramsTest.cpp
index deaf034..16fd565 100644
--- a/tests/GLProgramsTest.cpp
+++ b/tests/GLProgramsTest.cpp
@@ -114,8 +114,6 @@
         } else {
         }
 
-        pdesc.fColorMatrixEnabled = random_bool(&random);
-
         if (this->getCaps().dualSourceBlendingSupport()) {
             pdesc.fDualSrcOutput = random_int(&random, ProgramDesc::kDualSrcOutputCnt);
         } else {
@@ -174,17 +172,17 @@
     REPORTER_ASSERT(reporter, shadersGpu->programUnitTest());
 }
 
-
 #include "TestClassDef.h"
 DEFINE_GPUTESTCLASS("GLPrograms", GLProgramsTestClass, GLProgramsTest)
 
 // This is evil evil evil. The linker may throw away whole translation units as dead code if it
-// thinks none of the functions are called. It will do this even if there are static initilializers
+// thinks none of the functions are called. It will do this even if there are static initializers
 // in the unit that could pass pointers to functions from the unit out to other translation units!
 // We force some of the effects that would otherwise be discarded to link here.
 
 #include "SkLightingImageFilter.h"
 #include "SkMagnifierImageFilter.h"
+#include "SkColorMatrixFilter.h"
 
 void forceLinking();
 
@@ -192,6 +190,8 @@
     SkLightingImageFilter::CreateDistantLitDiffuse(SkPoint3(0,0,0), 0, 0, 0);
     SkMagnifierImageFilter mag(SkRect::MakeWH(SK_Scalar1, SK_Scalar1), SK_Scalar1);
     GrConfigConversionEffect::Create(NULL, false);
+    SkScalar matrix[20];
+    SkColorMatrixFilter cmf(matrix);
 }
 
 #endif