Make GrGLProgramDesc's key variable length by compacting the effect key array

R=robertphillips@google.com

Review URL: https://codereview.chromium.org/15252004

git-svn-id: http://skia.googlecode.com/svn/trunk@9239 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/src/gpu/gl/GrGLProgram.cpp b/src/gpu/gl/GrGLProgram.cpp
index 1b9c3a2..0445073 100644
--- a/src/gpu/gl/GrGLProgram.cpp
+++ b/src/gpu/gl/GrGLProgram.cpp
@@ -96,7 +96,7 @@
 
 void GrGLProgram::overrideBlend(GrBlendCoeff* srcCoeff,
                                 GrBlendCoeff* dstCoeff) const {
-    switch (fDesc.fCoverageOutput) {
+    switch (fDesc.getHeader().fCoverageOutput) {
         case GrGLProgramDesc::kModulate_CoverageOutput:
             break;
         // The prog will write a coverage value to the secondary
@@ -221,7 +221,7 @@
 }
 
 GrSLConstantVec GrGLProgram::genInputColor(GrGLShaderBuilder* builder, SkString* inColor) {
-    switch (fDesc.fColorInput) {
+    switch (fDesc.getHeader().fColorInput) {
         case GrGLProgramDesc::kAttribute_ColorInput: {
             builder->addAttribute(kVec4f_GrSLType, COL_ATTR_NAME);
             const char *vsName, *fsName;
@@ -250,7 +250,7 @@
 }
 
 GrSLConstantVec GrGLProgram::genInputCoverage(GrGLShaderBuilder* builder, SkString* inCoverage) {
-    switch (fDesc.fCoverageInput) {
+    switch (fDesc.getHeader().fCoverageInput) {
         case GrGLProgramDesc::kAttribute_ColorInput: {
             builder->addAttribute(kVec4f_GrSLType, COV_ATTR_NAME);
             const char *vsName, *fsName;
@@ -282,13 +282,13 @@
 void GrGLProgram::genGeometryShader(GrGLShaderBuilder* builder) const {
 #if GR_GL_EXPERIMENTAL_GS
     // TODO: The builder should add all this glue code.
-    if (fDesc.fExperimentalGS) {
+    if (fDesc.getHeader().fExperimentalGS) {
         GrAssert(fContext.info().glslGeneration() >= k150_GrGLSLGeneration);
         builder->fGSHeader.append("layout(triangles) in;\n"
                                    "layout(triangle_strip, max_vertices = 6) out;\n");
         builder->gsCodeAppend("\tfor (int i = 0; i < 3; ++i) {\n"
                               "\t\tgl_Position = gl_in[i].gl_Position;\n");
-        if (fDesc.fEmitsPointSize) {
+        if (fDesc.getHeader().fEmitsPointSize) {
             builder->gsCodeAppend("\t\tgl_PointSize = 1.0;\n");
         }
         GrAssert(builder->fGSInputs.count() == builder->fGSOutputs.count());
@@ -309,7 +309,7 @@
     if (inColor.size()) {
           return inColor.c_str();
     } else {
-        if (GrGLProgramDesc::kSolidWhite_ColorInput == fDesc.fColorInput) {
+        if (GrGLProgramDesc::kSolidWhite_ColorInput == fDesc.getHeader().fColorInput) {
             return GrGLSLOnesVecf(4);
         } else {
             return GrGLSLZerosVecf(4);
@@ -412,7 +412,7 @@
 
     fGShaderID = 0;
 #if GR_GL_EXPERIMENTAL_GS
-    if (fDesc.fExperimentalGS) {
+    if (fDesc.getHeader().fExperimentalGS) {
         builder.getShader(GrGLShaderBuilder::kGeometry_ShaderType, &shader);
         if (c_PrintShaders) {
             GrPrintf(shader.c_str());
@@ -439,6 +439,8 @@
 bool GrGLProgram::genProgram(const GrEffectStage* stages[]) {
     GrAssert(0 == fProgramID);
 
+    const GrGLProgramDesc::KeyHeader& header = fDesc.getHeader();
+
     GrGLShaderBuilder builder(fContext.info(), fUniformManager, fDesc);
 
     // the dual source output has no canonical var name, have to
@@ -467,9 +469,9 @@
     GrSLConstantVec knownColorValue = this->genInputColor(&builder, &inColor);
 
     // we output point size in the GS if present
-    if (fDesc.fEmitsPointSize
+    if (header.fEmitsPointSize
 #if GR_GL_EXPERIMENTAL_GS
-        && !fDesc.fExperimentalGS
+        && !header.fExperimentalGS
 #endif
         ) {
         builder.vsCodeAppend("\tgl_PointSize = 1.0;\n");
@@ -479,7 +481,7 @@
     SkXfermode::Coeff colorCoeff;
     SkXfermode::Coeff filterColorCoeff;
     SkAssertResult(
-        SkXfermode::ModeAsCoeff(static_cast<SkXfermode::Mode>(fDesc.fColorFilterXfermode),
+        SkXfermode::ModeAsCoeff(static_cast<SkXfermode::Mode>(header.fColorFilterXfermode),
                                 &filterColorCoeff,
                                 &colorCoeff));
     bool needColor, needFilterColor;
@@ -489,13 +491,13 @@
     SkTArray<GrGLUniformManager::UniformHandle, true>* stageUniformArrays[GrDrawState::kNumStages];
 
     if (needColor) {
-        for (int s = 0; s < fDesc.fFirstCoverageStage; ++s) {
+        for (int s = 0; s < fDesc.numColorEffects(); ++s) {
             stageUniformArrays[s] = &fUniformHandles.fEffectSamplerUnis[s];
         }
 
         builder.emitEffects(stages,
-                            fDesc.fEffectKeys,
-                            fDesc.fFirstCoverageStage,
+                            fDesc.effectKeys(),
+                            fDesc.numColorEffects(),
                             &inColor,
                             &knownColorValue,
                             stageUniformArrays,
@@ -503,7 +505,7 @@
     }
 
     // Insert the color filter. This will soon be replaced by a color effect.
-    if (SkXfermode::kDst_Mode != fDesc.fColorFilterXfermode) {
+    if (SkXfermode::kDst_Mode != header.fColorFilterXfermode) {
         const char* colorFilterColorUniName = NULL;
         fUniformHandles.fColorFilterUni = builder.addUniform(GrGLShaderBuilder::kFragment_ShaderType,
                                                              kVec4f_GrSLType, "FilterColor",
@@ -529,20 +531,20 @@
     SkString inCoverage;
     GrSLConstantVec knownCoverageValue = this->genInputCoverage(&builder, &inCoverage);
 
-    for (int s = fDesc.fFirstCoverageStage, i = 0; s < GrDrawState::kNumStages; ++s, ++i) {
-        stageUniformArrays[i] = &fUniformHandles.fEffectSamplerUnis[s];
+    for (int s = 0; s < fDesc.numCoverageEffects(); ++s) {
+        stageUniformArrays[s] = &fUniformHandles.fEffectSamplerUnis[s + fDesc.numColorEffects()];
     }
 
-    builder.emitEffects(stages + fDesc.fFirstCoverageStage,
-                        fDesc.fEffectKeys + fDesc.fFirstCoverageStage,
-                        GrDrawState::kNumStages - fDesc.fFirstCoverageStage,
+    builder.emitEffects(stages + fDesc.numColorEffects(),
+                        fDesc.getEffectKeys() + fDesc.numColorEffects(),
+                        fDesc.numCoverageEffects(),
                         &inCoverage,
                         &knownCoverageValue,
                         stageUniformArrays,
-                        fEffects + fDesc.fFirstCoverageStage);
+                        fEffects + fDesc.numColorEffects());
 
     // discard if coverage is zero
-    if (fDesc.fDiscardIfZeroCoverage && kOnes_GrSLConstantVec != knownCoverageValue) {
+    if (header.fDiscardIfZeroCoverage && kOnes_GrSLConstantVec != knownCoverageValue) {
         if (kZeros_GrSLConstantVec == knownCoverageValue) {
             // This is unfortunate.
             builder.fsCodeAppend("\tdiscard;\n");
@@ -553,7 +555,7 @@
     }
 
     GrGLProgramDesc::CoverageOutput coverageOutput =
-        static_cast<GrGLProgramDesc::CoverageOutput>(fDesc.fCoverageOutput);
+        static_cast<GrGLProgramDesc::CoverageOutput>(header.fCoverageOutput);
     if (GrGLProgramDesc::CoverageOutputUsesSecondaryOutput(coverageOutput)) {
         builder.fFSOutputs.push_back().set(kVec4f_GrSLType,
                                            GrGLShaderVar::kOut_TypeModifier,
@@ -561,7 +563,7 @@
         // default coeff to ones for kCoverage_DualSrcOutput
         SkString coeff;
         GrSLConstantVec knownCoeffValue = kOnes_GrSLConstantVec;
-        if (GrGLProgramDesc::kSecondaryCoverageISA_CoverageOutput == fDesc.fCoverageOutput) {
+        if (GrGLProgramDesc::kSecondaryCoverageISA_CoverageOutput == header.fCoverageOutput) {
             // Get (1-A) into coeff
             SkString inColorAlpha;
             GrGLSLGetComponent4f(&inColorAlpha,
@@ -687,20 +689,22 @@
         GL_CALL(BindFragDataLocationIndexed(fProgramID, 0, 1, dual_source_output_name()));
     }
 
+    const GrGLProgramDesc::KeyHeader& header = fDesc.getHeader();
+
     // Bind the attrib locations to same values for all shaders
     GL_CALL(BindAttribLocation(fProgramID,
-                               fDesc.fPositionAttributeIndex,
+                               header.fPositionAttributeIndex,
                                builder.positionAttribute().c_str()));
-    if (-1 != fDesc.fLocalCoordAttributeIndex) {
+    if (-1 != header.fLocalCoordAttributeIndex) {
         GL_CALL(BindAttribLocation(fProgramID,
-                                   fDesc.fLocalCoordAttributeIndex,
+                                   header.fLocalCoordAttributeIndex,
                                    builder.localCoordsAttribute().c_str()));
     }
-    if (-1 != fDesc.fColorAttributeIndex) {
-        GL_CALL(BindAttribLocation(fProgramID, fDesc.fColorAttributeIndex, COL_ATTR_NAME));
+    if (-1 != header.fColorAttributeIndex) {
+        GL_CALL(BindAttribLocation(fProgramID, header.fColorAttributeIndex, COL_ATTR_NAME));
     }
-    if (-1 != fDesc.fCoverageAttributeIndex) {
-        GL_CALL(BindAttribLocation(fProgramID, fDesc.fCoverageAttributeIndex, COV_ATTR_NAME));
+    if (-1 != header.fCoverageAttributeIndex) {
+        GL_CALL(BindAttribLocation(fProgramID, header.fCoverageAttributeIndex, COV_ATTR_NAME));
     }
 
     const GrGLShaderBuilder::AttributePair* attribEnd = builder.getEffectAttributes().end();
@@ -761,12 +765,25 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 void GrGLProgram::setData(GrGpuGL* gpu,
-                          GrColor color,
-                          GrColor coverage,
+                          GrDrawState::BlendOptFlags blendOpts,
+                          const GrEffectStage* stages[],
                           const GrDeviceCoordTexture* dstCopy,
                           SharedGLState* sharedState) {
     const GrDrawState& drawState = gpu->getDrawState();
 
+    GrColor color;
+    GrColor coverage;
+    if (blendOpts & GrDrawState::kEmitTransBlack_BlendOptFlag) {
+        color = 0;
+        coverage = 0;
+    } else if (blendOpts & GrDrawState::kEmitCoverage_BlendOptFlag) {
+        color = 0xffffffff;
+        coverage = drawState.getCoverage();
+    } else {
+        color = drawState.getColor();
+        coverage = drawState.getCoverage();
+    }
+
     this->setColor(drawState, color, sharedState);
     this->setCoverage(drawState, coverage, sharedState);
     this->setMatrixAndRenderTargetHeight(drawState);
@@ -803,26 +820,25 @@
                     fUniformHandles.fDstCopySamplerUni);
         }
     } else {
-        GrAssert(GrGLUniformManager::kInvalidUniformHandle ==
-                    fUniformHandles.fDstCopyTopLeftUni);
-        GrAssert(GrGLUniformManager::kInvalidUniformHandle ==
-                    fUniformHandles.fDstCopyScaleUni);
-        GrAssert(GrGLUniformManager::kInvalidUniformHandle ==
-                    fUniformHandles.fDstCopySamplerUni);
+        GrAssert(GrGLUniformManager::kInvalidUniformHandle == fUniformHandles.fDstCopyTopLeftUni);
+        GrAssert(GrGLUniformManager::kInvalidUniformHandle == fUniformHandles.fDstCopyScaleUni);
+        GrAssert(GrGLUniformManager::kInvalidUniformHandle == fUniformHandles.fDstCopySamplerUni);
     }
-    for (int s = 0; s < GrDrawState::kNumStages; ++s) {
-        if (NULL != fEffects[s]) {
-            const GrEffectStage& stage = drawState.getStage(s);
-            GrAssert(NULL != stage.getEffect());
 
-            bool explicitLocalCoords = -1 != fDesc.fLocalCoordAttributeIndex;
-            GrDrawEffect drawEffect(stage, explicitLocalCoords);
-            fEffects[s]->setData(fUniformManager, drawEffect);
-            int numSamplers = fUniformHandles.fEffectSamplerUnis[s].count();
+    int numEffects = fDesc.numTotalEffects();
+    for (int e = 0; e < numEffects; ++e) {
+        GrAssert(NULL != stages[e]);
+        // We may have omitted the GrGLEffect because of the color filter logic in genProgram.
+        // This can be removed when the color filter is an effect.
+        if (NULL != fEffects[e]) {
+            bool explicitLocalCoords = -1 != fDesc.getHeader().fLocalCoordAttributeIndex;
+            GrDrawEffect drawEffect(*stages[e], explicitLocalCoords);
+            fEffects[e]->setData(fUniformManager, drawEffect);
+            int numSamplers = fUniformHandles.fEffectSamplerUnis[e].count();
             for (int u = 0; u < numSamplers; ++u) {
-                UniformHandle handle = fUniformHandles.fEffectSamplerUnis[s][u];
+                UniformHandle handle = fUniformHandles.fEffectSamplerUnis[e][u];
                 if (GrGLUniformManager::kInvalidUniformHandle != handle) {
-                    const GrTextureAccess& access = (*stage.getEffect())->textureAccess(u);
+                    const GrTextureAccess& access = (*stages[e]->getEffect())->textureAccess(u);
                     GrGLTexture* texture = static_cast<GrGLTexture*>(access.getTexture());
                     gpu->bindTexture(texUnitIdx, access.getParams(), texture);
                     ++texUnitIdx;
@@ -835,18 +851,19 @@
 void GrGLProgram::setColor(const GrDrawState& drawState,
                            GrColor color,
                            SharedGLState* sharedState) {
+    const GrGLProgramDesc::KeyHeader& header = fDesc.getHeader();
     if (!drawState.hasColorVertexAttribute()) {
-        switch (fDesc.fColorInput) {
+        switch (header.fColorInput) {
             case GrGLProgramDesc::kAttribute_ColorInput:
-                GrAssert(-1 != fDesc.fColorAttributeIndex);
+                GrAssert(-1 != header.fColorAttributeIndex);
                 if (sharedState->fConstAttribColor != color ||
-                    sharedState->fConstAttribColorIndex != fDesc.fColorAttributeIndex) {
+                    sharedState->fConstAttribColorIndex != header.fColorAttributeIndex) {
                     // OpenGL ES only supports the float varieties of glVertexAttrib
                     GrGLfloat c[4];
                     GrColorToRGBAFloat(color, c);
-                    GL_CALL(VertexAttrib4fv(fDesc.fColorAttributeIndex, c));
+                    GL_CALL(VertexAttrib4fv(header.fColorAttributeIndex, c));
                     sharedState->fConstAttribColor = color;
-                    sharedState->fConstAttribColorIndex = fDesc.fColorAttributeIndex;
+                    sharedState->fConstAttribColorIndex = header.fColorAttributeIndex;
                 }
                 break;
             case GrGLProgramDesc::kUniform_ColorInput:
@@ -876,17 +893,18 @@
 void GrGLProgram::setCoverage(const GrDrawState& drawState,
                               GrColor coverage,
                               SharedGLState* sharedState) {
+    const GrGLProgramDesc::KeyHeader& header = fDesc.getHeader();
     if (!drawState.hasCoverageVertexAttribute()) {
-        switch (fDesc.fCoverageInput) {
+        switch (header.fCoverageInput) {
             case GrGLProgramDesc::kAttribute_ColorInput:
                 if (sharedState->fConstAttribCoverage != coverage ||
-                    sharedState->fConstAttribCoverageIndex != fDesc.fCoverageAttributeIndex) {
+                    sharedState->fConstAttribCoverageIndex != header.fCoverageAttributeIndex) {
                     // OpenGL ES only supports the float varieties of  glVertexAttrib
                     GrGLfloat c[4];
                     GrColorToRGBAFloat(coverage, c);
-                    GL_CALL(VertexAttrib4fv(fDesc.fCoverageAttributeIndex, c));
+                    GL_CALL(VertexAttrib4fv(header.fCoverageAttributeIndex, c));
                     sharedState->fConstAttribCoverage = coverage;
-                    sharedState->fConstAttribCoverageIndex = fDesc.fCoverageAttributeIndex;
+                    sharedState->fConstAttribCoverageIndex = header.fCoverageAttributeIndex;
                 }
                 break;
             case GrGLProgramDesc::kUniform_ColorInput:
diff --git a/src/gpu/gl/GrGLProgram.h b/src/gpu/gl/GrGLProgram.h
index 578b4a9..be6e687 100644
--- a/src/gpu/gl/GrGLProgram.h
+++ b/src/gpu/gl/GrGLProgram.h
@@ -48,7 +48,7 @@
     void abandon();
 
     /**
-     * The shader may modify the blend coefficients. Params are in/out
+     * The shader may modify the blend coefficients. Params are in/out.
      */
     void overrideBlend(GrBlendCoeff* srcCoeff, GrBlendCoeff* dstCoeff) const;
 
@@ -104,12 +104,10 @@
      * This function uploads uniforms and calls each GrGLEffect's setData. It is called before a
      * draw occurs using the program after the program has already been bound. It also uses the
      * GrGpuGL object to bind the textures required by the GrGLEffects.
-     *
-     * The color and coverage params override the GrDrawState's getColor() and getCoverage() values.
      */
     void setData(GrGpuGL*,
-                 GrColor color,
-                 GrColor coverage,
+                 GrDrawState::BlendOptFlags,
+                 const GrEffectStage* stages[],       // output of GrGLProgramDesc:Build()
                  const GrDeviceCoordTexture* dstCopy, // can be NULL
                  SharedGLState*);
 
@@ -121,7 +119,9 @@
     bool succeeded() const { return 0 != fProgramID; }
 
     /**
-     *  This is the heavy initialization routine for building a GLProgram.
+     * This is the heavy initialization routine for building a GLProgram. stages is all the enabled
+     * color stages followed by all the enabled coverage stages as output by
+     * GrGLProgramDesc::Build()
      */
     bool genProgram(const GrEffectStage* stages[]);
 
diff --git a/src/gpu/gl/GrGLProgramDesc.cpp b/src/gpu/gl/GrGLProgramDesc.cpp
index e8812fa..276f9b5 100644
--- a/src/gpu/gl/GrGLProgramDesc.cpp
+++ b/src/gpu/gl/GrGLProgramDesc.cpp
@@ -12,6 +12,8 @@
 #include "GrGLShaderBuilder.h"
 #include "GrGpuGL.h"
 
+#include "SkChecksum.h"
+
 void GrGLProgramDesc::Build(const GrDrawState& drawState,
                             bool isPoints,
                             GrDrawState::BlendOptFlags blendOpts,
@@ -19,8 +21,8 @@
                             GrBlendCoeff dstCoeff,
                             const GrGpuGL* gpu,
                             const GrDeviceCoordTexture* dstCopy,
+                            const GrEffectStage* stages[],
                             GrGLProgramDesc* desc) {
-
     // This should already have been caught
     GrAssert(!(GrDrawState::kSkipDraw_BlendOptFlag & blendOpts));
 
@@ -34,131 +36,169 @@
     // bindings in use or other descriptor field settings) it should be set
     // to a canonical value to avoid duplicate programs with different keys.
 
-
-    desc->fEmitsPointSize = isPoints;
-
     bool requiresColorAttrib = !skipColor && drawState.hasColorVertexAttribute();
     bool requiresCoverageAttrib = !skipCoverage && drawState.hasCoverageVertexAttribute();
     // we only need the local coords if we're actually going to generate effect code
     bool requiresLocalCoordAttrib = !(skipCoverage  && skipColor) &&
                                     drawState.hasLocalCoordAttribute();
 
-    // fColorInput/fCoverageInput records how colors are specified for the program so we strip the
-    // bits from the bindings to avoid false negatives when searching for an existing program in the
-    // cache.
-
-    desc->fColorFilterXfermode = skipColor ? SkXfermode::kDst_Mode : drawState.getColorFilterMode();
-
-
     bool colorIsTransBlack = SkToBool(blendOpts & GrDrawState::kEmitTransBlack_BlendOptFlag);
     bool colorIsSolidWhite = (blendOpts & GrDrawState::kEmitCoverage_BlendOptFlag) ||
                              (!requiresColorAttrib && 0xffffffff == drawState.getColor());
-    if (colorIsTransBlack) {
-        desc->fColorInput = kTransBlack_ColorInput;
-    } else if (colorIsSolidWhite) {
-        desc->fColorInput = kSolidWhite_ColorInput;
-    } else if (GR_GL_NO_CONSTANT_ATTRIBUTES && !requiresColorAttrib) {
-        desc->fColorInput = kUniform_ColorInput;
-    } else {
-        desc->fColorInput = kAttribute_ColorInput;
+
+    // Do an initial loop over the stages to count them. We count the color and coverage effects
+    // separately here. Later we may decide the distinction doesn't matter and will count all
+    // effects as color in desc. Two things will allow simplication of this mess: GrDrawState will
+    // have tight lists of color and coverage stages rather than a fixed size array with NULLS and
+    // the xfermode-color filter will be removed.
+    int colorEffectCnt = 0;
+    int coverageEffectCnt = 0;
+    if (!skipColor) {
+        for (int s = 0; s < drawState.getFirstCoverageStage(); ++s) {
+            if (drawState.isStageEnabled(s)) {
+                stages[colorEffectCnt] = &drawState.getStage(s);
+                ++colorEffectCnt;
+            }
+        }
+    }
+    if (!skipCoverage) {
+        for (int s = drawState.getFirstCoverageStage(); s < GrDrawState::kNumStages; ++s) {
+            if (drawState.isStageEnabled(s)) {
+                stages[colorEffectCnt + coverageEffectCnt] = &drawState.getStage(s);
+                ++coverageEffectCnt;
+            }
+        }
     }
 
-    bool covIsSolidWhite = !requiresCoverageAttrib && 0xffffffff == drawState.getCoverage();
-
-    if (skipCoverage) {
-        desc->fCoverageInput = kTransBlack_ColorInput;
-    } else if (covIsSolidWhite) {
-        desc->fCoverageInput = kSolidWhite_ColorInput;
-    } else if (GR_GL_NO_CONSTANT_ATTRIBUTES && !requiresCoverageAttrib) {
-        desc->fCoverageInput = kUniform_ColorInput;
-    } else {
-        desc->fCoverageInput = kAttribute_ColorInput;
+    size_t newKeyLength = KeyLength(colorEffectCnt + coverageEffectCnt);
+    bool allocChanged;
+    desc->fKey.reset(newKeyLength, SkAutoMalloc::kAlloc_OnShrink, &allocChanged);
+    if (allocChanged || !desc->fInitialized) {
+        // make sure any padding in the header is zero if we we haven't used this allocation before.
+        memset(desc->header(), 0, kHeaderSize);
     }
+    // write the key length
+    *desc->atOffset<uint32_t, kLengthOffset>() = newKeyLength;
 
+    KeyHeader* header = desc->header();
+    EffectKey* effectKeys = desc->effectKeys();
+
+    int currEffectKey = 0;
     bool readsDst = false;
     bool readFragPosition = false;
-    int lastEnabledStage = -1;
-
     for (int s = 0; s < GrDrawState::kNumStages; ++s) {
-
         bool skip = s < drawState.getFirstCoverageStage() ? skipColor : skipCoverage;
         if (!skip && drawState.isStageEnabled(s)) {
-            lastEnabledStage = s;
             const GrEffectRef& effect = *drawState.getStage(s).getEffect();
             const GrBackendEffectFactory& factory = effect->getFactory();
             GrDrawEffect drawEffect(drawState.getStage(s), requiresLocalCoordAttrib);
-            desc->fEffectKeys[s] = factory.glEffectKey(drawEffect, gpu->glCaps());
+            effectKeys[currEffectKey] = factory.glEffectKey(drawEffect, gpu->glCaps());
+            ++currEffectKey;
             if (effect->willReadDstColor()) {
                 readsDst = true;
             }
             if (effect->willReadFragmentPosition()) {
                 readFragPosition = true;
             }
-        } else {
-            desc->fEffectKeys[s] = 0;
         }
     }
 
+    header->fEmitsPointSize = isPoints;
+    header->fColorFilterXfermode = skipColor ? SkXfermode::kDst_Mode : drawState.getColorFilterMode();
+
+    // Currently the experimental GS will only work with triangle prims (and it doesn't do anything
+    // other than pass through values from the VS to the FS anyway).
+#if GR_GL_EXPERIMENTAL_GS
+#if 0
+    header->fExperimentalGS = gpu->caps().geometryShaderSupport();
+#else
+    header->fExperimentalGS = false;
+#endif
+#endif
+    if (colorIsTransBlack) {
+        header->fColorInput = kTransBlack_ColorInput;
+    } else if (colorIsSolidWhite) {
+        header->fColorInput = kSolidWhite_ColorInput;
+    } else if (GR_GL_NO_CONSTANT_ATTRIBUTES && !requiresColorAttrib) {
+        header->fColorInput = kUniform_ColorInput;
+    } else {
+        header->fColorInput = kAttribute_ColorInput;
+    }
+
+    bool covIsSolidWhite = !requiresCoverageAttrib && 0xffffffff == drawState.getCoverage();
+
+    if (skipCoverage) {
+        header->fCoverageInput = kTransBlack_ColorInput;
+    } else if (covIsSolidWhite) {
+        header->fCoverageInput = kSolidWhite_ColorInput;
+    } else if (GR_GL_NO_CONSTANT_ATTRIBUTES && !requiresCoverageAttrib) {
+        header->fCoverageInput = kUniform_ColorInput;
+    } else {
+        header->fCoverageInput = kAttribute_ColorInput;
+    }
+
     if (readsDst) {
         GrAssert(NULL != dstCopy || gpu->caps()->dstReadInShaderSupport());
         const GrTexture* dstCopyTexture = NULL;
         if (NULL != dstCopy) {
             dstCopyTexture = dstCopy->texture();
         }
-        desc->fDstReadKey = GrGLShaderBuilder::KeyForDstRead(dstCopyTexture, gpu->glCaps());
-        GrAssert(0 != desc->fDstReadKey);
+        header->fDstReadKey = GrGLShaderBuilder::KeyForDstRead(dstCopyTexture, gpu->glCaps());
+        GrAssert(0 != header->fDstReadKey);
     } else {
-        desc->fDstReadKey = 0;
+        header->fDstReadKey = 0;
     }
 
     if (readFragPosition) {
-        desc->fFragPosKey = GrGLShaderBuilder::KeyForFragmentPosition(drawState.getRenderTarget(),
+        header->fFragPosKey = GrGLShaderBuilder::KeyForFragmentPosition(drawState.getRenderTarget(),
                                                                       gpu->glCaps());
     } else {
-        desc->fFragPosKey = 0;
+        header->fFragPosKey = 0;
     }
 
-    desc->fCoverageOutput = kModulate_CoverageOutput;
-
-    // Currently the experimental GS will only work with triangle prims (and it doesn't do anything
-    // other than pass through values from the VS to the FS anyway).
-#if GR_GL_EXPERIMENTAL_GS
-#if 0
-    desc->fExperimentalGS = gpu->caps().geometryShaderSupport();
-#else
-    desc->fExperimentalGS = false;
-#endif
-#endif
-
-    // We leave this set to kNumStages until we discover that the coverage/color distinction is
-    // material to the generated program. We do this to avoid distinct keys that generate equivalent
-    // programs.
-    desc->fFirstCoverageStage = GrDrawState::kNumStages;
-    // This tracks the actual first coverage stage.
-    int firstCoverageStage = GrDrawState::kNumStages;
-    desc->fDiscardIfZeroCoverage = false; // Enabled below if stenciling and there is coverage.
-    bool hasCoverage = false;
-    // If we're rendering coverage-as-color then it's as though there are no coverage stages.
-    if (!drawState.isCoverageDrawing()) {
-        // We can have coverage either through a stage or coverage vertex attributes.
-        if (drawState.getFirstCoverageStage() <= lastEnabledStage) {
-            firstCoverageStage = drawState.getFirstCoverageStage();
-            hasCoverage = true;
-        } else {
-            hasCoverage = requiresCoverageAttrib;
-        }
+    // Record attribute indices
+    header->fPositionAttributeIndex = drawState.positionAttributeIndex();
+    header->fLocalCoordAttributeIndex = drawState.localCoordAttributeIndex();
+    
+    // For constant color and coverage we need an attribute with an index beyond those already set
+    int availableAttributeIndex = drawState.getVertexAttribCount();
+    if (requiresColorAttrib) {
+        header->fColorAttributeIndex = drawState.colorVertexAttributeIndex();
+    } else if (GrGLProgramDesc::kAttribute_ColorInput == header->fColorInput) {
+        GrAssert(availableAttributeIndex < GrDrawState::kMaxVertexAttribCnt);
+        header->fColorAttributeIndex = availableAttributeIndex;
+        availableAttributeIndex++;
+    } else {
+        header->fColorAttributeIndex = -1;
+    }
+    
+    if (requiresCoverageAttrib) {
+        header->fCoverageAttributeIndex = drawState.coverageVertexAttributeIndex();
+    } else if (GrGLProgramDesc::kAttribute_ColorInput == header->fCoverageInput) {
+        GrAssert(availableAttributeIndex < GrDrawState::kMaxVertexAttribCnt);
+        header->fCoverageAttributeIndex = availableAttributeIndex;
+    } else {
+        header->fCoverageAttributeIndex = -1;
     }
 
-    if (hasCoverage) {
+    // Here we deal with whether/how we handle color and coverage separately.
+    
+    // Set these defaults and then possibly change our mind if there is coverage.
+    header->fDiscardIfZeroCoverage = false;
+    header->fCoverageOutput = kModulate_CoverageOutput;
+
+    // If we do have coverage determine whether it matters.
+    bool separateCoverageFromColor = false;
+    if (!drawState.isCoverageDrawing() && (coverageEffectCnt > 0 || requiresCoverageAttrib)) {
         // color filter is applied between color/coverage computation
-        if (SkXfermode::kDst_Mode != desc->fColorFilterXfermode) {
-            desc->fFirstCoverageStage = firstCoverageStage;
+        if (SkXfermode::kDst_Mode != header->fColorFilterXfermode) {
+            separateCoverageFromColor = true;
         }
 
         // If we're stenciling then we want to discard samples that have zero coverage
         if (drawState.getStencil().doesWrite()) {
-            desc->fDiscardIfZeroCoverage = true;
-            desc->fFirstCoverageStage = firstCoverageStage;
+            header->fDiscardIfZeroCoverage = true;
+            separateCoverageFromColor = true;
         }
 
         if (gpu->caps()->dualSourceBlendingSupport() &&
@@ -166,46 +206,45 @@
                            GrDrawState::kCoverageAsAlpha_BlendOptFlag))) {
             if (kZero_GrBlendCoeff == dstCoeff) {
                 // write the coverage value to second color
-                desc->fCoverageOutput =  kSecondaryCoverage_CoverageOutput;
-                desc->fFirstCoverageStage = firstCoverageStage;
+                header->fCoverageOutput =  kSecondaryCoverage_CoverageOutput;
+                separateCoverageFromColor = true;
             } else if (kSA_GrBlendCoeff == dstCoeff) {
                 // SA dst coeff becomes 1-(1-SA)*coverage when dst is partially covered.
-                desc->fCoverageOutput = kSecondaryCoverageISA_CoverageOutput;
-                desc->fFirstCoverageStage = firstCoverageStage;
+                header->fCoverageOutput = kSecondaryCoverageISA_CoverageOutput;
+                separateCoverageFromColor = true;
             } else if (kSC_GrBlendCoeff == dstCoeff) {
                 // SA dst coeff becomes 1-(1-SA)*coverage when dst is partially covered.
-                desc->fCoverageOutput = kSecondaryCoverageISC_CoverageOutput;
-                desc->fFirstCoverageStage = firstCoverageStage;
+                header->fCoverageOutput = kSecondaryCoverageISC_CoverageOutput;
+                separateCoverageFromColor = true;
             }
         } else if (readsDst &&
                    kOne_GrBlendCoeff == srcCoeff &&
                    kZero_GrBlendCoeff == dstCoeff) {
-            desc->fCoverageOutput = kCombineWithDst_CoverageOutput;
-            desc->fFirstCoverageStage = firstCoverageStage;
+            header->fCoverageOutput = kCombineWithDst_CoverageOutput;
+            separateCoverageFromColor = true;
         }
     }
-
-    desc->fPositionAttributeIndex = drawState.positionAttributeIndex();
-    desc->fLocalCoordAttributeIndex = drawState.localCoordAttributeIndex();
-
-    // For constant color and coverage we need an attribute with an index beyond those already set
-    int availableAttributeIndex = drawState.getVertexAttribCount();
-    if (requiresColorAttrib) {
-        desc->fColorAttributeIndex = drawState.colorVertexAttributeIndex();
-    } else if (GrGLProgramDesc::kAttribute_ColorInput == desc->fColorInput) {
-        GrAssert(availableAttributeIndex < GrDrawState::kMaxVertexAttribCnt);
-        desc->fColorAttributeIndex = availableAttributeIndex;
-        availableAttributeIndex++;
+    if (separateCoverageFromColor) {
+        header->fColorEffectCnt = colorEffectCnt;
+        header->fCoverageEffectCnt = coverageEffectCnt;
     } else {
-        desc->fColorAttributeIndex = -1;
+        header->fColorEffectCnt = colorEffectCnt + coverageEffectCnt;
+        header->fCoverageEffectCnt = 0;
     }
 
-    if (requiresCoverageAttrib) {
-        desc->fCoverageAttributeIndex = drawState.coverageVertexAttributeIndex();
-    } else if (GrGLProgramDesc::kAttribute_ColorInput == desc->fCoverageInput) {
-        GrAssert(availableAttributeIndex < GrDrawState::kMaxVertexAttribCnt);
-        desc->fCoverageAttributeIndex = availableAttributeIndex;
-    } else {
-        desc->fCoverageAttributeIndex = -1;
-    }
+    *desc->checksum() = 0;
+    *desc->checksum() = SkChecksum::Compute(reinterpret_cast<uint32_t*>(desc->fKey.get()),
+                                            newKeyLength);
+    desc->fInitialized = true;
 }
+
+GrGLProgramDesc& GrGLProgramDesc::operator= (const GrGLProgramDesc& other) {
+    fInitialized = other.fInitialized;
+    if (fInitialized) {
+        size_t keyLength = other.keyLength();
+        fKey.reset(keyLength);
+        memcpy(fKey.get(), other.fKey.get(), keyLength);
+    }
+    return *this;
+}
+
diff --git a/src/gpu/gl/GrGLProgramDesc.h b/src/gpu/gl/GrGLProgramDesc.h
index b49cb72..f83275d 100644
--- a/src/gpu/gl/GrGLProgramDesc.h
+++ b/src/gpu/gl/GrGLProgramDesc.h
@@ -25,26 +25,37 @@
     to be API-neutral then so could this class. */
 class GrGLProgramDesc {
 public:
-    GrGLProgramDesc() {
-        // since we use this as part of a key we can't have any uninitialized padding
-        memset(this, 0, sizeof(GrGLProgramDesc));
+    GrGLProgramDesc() : fInitialized(false) {}
+    GrGLProgramDesc(const GrGLProgramDesc& desc) { *this = desc; }
+
+    // Returns this as a uint32_t array to be used as a key in the program cache. 
+    const uint32_t* asKey() const {
+        GrAssert(fInitialized);
+        return reinterpret_cast<const uint32_t*>(fKey.get());
     }
 
-    // Returns this as a uint32_t array to be used as a key in the program cache
-    const uint32_t* asKey() const {
-        return reinterpret_cast<const uint32_t*>(this);
-    }
+    // Gets the number of bytes in asKey(). It will be a 4-byte aligned value. When comparing two
+    // keys the size of either key can be used with memcmp() since the lengths themselves begin the
+    // keys and thus the memcmp will exit early if the keys are of different lengths.
+    uint32_t keyLength() const { return *this->atOffset<uint32_t, kLengthOffset>(); }
+
+    // Gets the a checksum of the key. Can be used as a hash value for a fast lookup in a cache.
+    uint32_t getChecksum() const { return *this->atOffset<uint32_t, kChecksumOffset>(); }
 
     // For unit testing.
     void setRandom(SkMWCRandom*,
                    const GrGpuGL* gpu,
-                   const GrTexture* dummyDstTexture,
-                   const GrEffectStage* stages[GrDrawState::kNumStages],
+                   const GrRenderTarget* dummyDstRenderTarget,
+                   const GrTexture* dummyDstCopyTexture,
+                   const GrEffectStage* stages[],
+                   int numColorStages,
+                   int numCoverageStages,
                    int currAttribIndex);
 
     /**
      * Builds a program descriptor from a GrDrawState. Whether the primitive type is points, the
-     * output of GrDrawState::getBlendOpts, and the caps of the GrGpuGL are also inputs.
+     * output of GrDrawState::getBlendOpts, and the caps of the GrGpuGL are also inputs. It also
+     * writes a tightly packed array of GrEffectStage* from the drawState.
      */
     static void Build(const GrDrawState&,
                       bool isPoints,
@@ -53,8 +64,37 @@
                       GrBlendCoeff dstCoeff,
                       const GrGpuGL* gpu,
                       const GrDeviceCoordTexture* dstCopy,
+                      const GrEffectStage* outStages[GrDrawState::kNumStages],
                       GrGLProgramDesc* outDesc);
 
+    int numColorEffects() const {
+        GrAssert(fInitialized);
+        return this->getHeader().fColorEffectCnt;
+    }
+
+    int numCoverageEffects() const {
+        GrAssert(fInitialized);
+        return this->getHeader().fCoverageEffectCnt;
+    }
+
+    int numTotalEffects() const { return this->numColorEffects() + this->numCoverageEffects(); }
+
+    GrGLProgramDesc& operator= (const GrGLProgramDesc& other);
+
+    bool operator== (const GrGLProgramDesc& other) const {
+        GrAssert(fInitialized && other.fInitialized);
+        // The length is masked as a hint to the compiler that the address will be 4 byte aligned.
+        return 0 == memcmp(this->asKey(), other.asKey(), this->keyLength() & ~0x3);
+    }
+
+    bool operator!= (const GrGLProgramDesc& other) const {
+        return !(*this == other);
+    }
+
+    static bool Less(const GrGLProgramDesc& a, const GrGLProgramDesc& b) {
+        return memcmp(a.asKey(), b.asKey(), a.keyLength() & ~0x3) < 0;
+    }
+
 private:
     // Specifies where the initial color comes from before the stages are applied.
     enum ColorInput {
@@ -96,37 +136,78 @@
         }
     }
 
-    /** Non-zero if this stage has an effect */
-    GrGLEffect::EffectKey       fEffectKeys[GrDrawState::kNumStages];
-
-    // To enable experimental geometry shader code (not for use in
-    // production)
-#if GR_GL_EXPERIMENTAL_GS
-    bool                     fExperimentalGS;
-#endif
-
-    GrGLShaderBuilder::DstReadKey fDstReadKey;          // set by GrGLShaderBuilder if there
+    struct KeyHeader {
+        GrGLShaderBuilder::DstReadKey fDstReadKey;      // set by GrGLShaderBuilder if there
                                                         // are effects that must read the dst.
                                                         // Otherwise, 0.
-    GrGLShaderBuilder::FragPosKey fFragPosKey;          // set by GrGLShaderBuilder if there are
+        GrGLShaderBuilder::FragPosKey fFragPosKey;      // set by GrGLShaderBuilder if there are
                                                         // effects that read the fragment position.
                                                         // Otherwise, 0.
 
-    // should the FS discard if the coverage is zero (to avoid stencil manipulation)
-    SkBool8                     fDiscardIfZeroCoverage;
+        // should the FS discard if the coverage is zero (to avoid stencil manipulation)
+        SkBool8                     fDiscardIfZeroCoverage;
 
-    uint8_t                     fColorInput;            // casts to enum ColorInput
-    uint8_t                     fCoverageInput;         // casts to enum ColorInput
-    uint8_t                     fCoverageOutput;        // casts to enum CoverageOutput
+        uint8_t                     fColorInput;            // casts to enum ColorInput
+        uint8_t                     fCoverageInput;         // casts to enum ColorInput
+        uint8_t                     fCoverageOutput;        // casts to enum CoverageOutput
 
-    int8_t                      fFirstCoverageStage;
-    SkBool8                     fEmitsPointSize;
-    uint8_t                     fColorFilterXfermode;   // casts to enum SkXfermode::Mode
+        SkBool8                     fEmitsPointSize;
+        uint8_t                     fColorFilterXfermode;   // casts to enum SkXfermode::Mode
 
-    int8_t                      fPositionAttributeIndex;
-    int8_t                      fLocalCoordAttributeIndex;
-    int8_t                      fColorAttributeIndex;
-    int8_t                      fCoverageAttributeIndex;
+        // To enable experimental geometry shader code (not for use in
+        // production)
+#if GR_GL_EXPERIMENTAL_GS
+        SkBool8                     fExperimentalGS;
+#endif
+
+        int8_t                      fPositionAttributeIndex;
+        int8_t                      fLocalCoordAttributeIndex;
+        int8_t                      fColorAttributeIndex;
+        int8_t                      fCoverageAttributeIndex;
+        
+        int8_t                      fColorEffectCnt;
+        int8_t                      fCoverageEffectCnt;
+    };
+
+    // The key is 1 uint32_t for the length, followed another for the checksum, the header, and then
+    // the effect keys. Everything is fixed length except the effect key array.
+    enum {
+        kLengthOffset = 0,
+        kChecksumOffset = kLengthOffset + sizeof(uint32_t),
+        kHeaderOffset = kChecksumOffset + sizeof(uint32_t),
+        kHeaderSize = SkAlign4(sizeof(KeyHeader)),
+        kEffectKeyOffset = kHeaderOffset + kHeaderSize,
+    };
+
+    template<typename T, size_t OFFSET> T* atOffset() {
+        return reinterpret_cast<T*>(reinterpret_cast<intptr_t>(fKey.get()) + OFFSET);
+    }
+
+    template<typename T, size_t OFFSET> const T* atOffset() const {
+        return reinterpret_cast<const T*>(reinterpret_cast<intptr_t>(fKey.get()) + OFFSET);
+    }
+
+    typedef GrGLEffect::EffectKey EffectKey;
+
+    uint32_t* checksum() { return this->atOffset<uint32_t, kChecksumOffset>(); }
+    KeyHeader* header() { return this->atOffset<KeyHeader, kHeaderOffset>(); }
+    EffectKey* effectKeys() { return this->atOffset<EffectKey, kEffectKeyOffset>(); }
+
+    const KeyHeader& getHeader() const { return *this->atOffset<KeyHeader, kHeaderOffset>(); }
+    const EffectKey* getEffectKeys() const { return this->atOffset<EffectKey, kEffectKeyOffset>(); }
+
+    static size_t KeyLength(int effectCnt) {
+        GR_STATIC_ASSERT(!(sizeof(EffectKey) & 0x3));
+        return kEffectKeyOffset + effectCnt * sizeof(EffectKey);
+    }
+
+    enum {
+        kMaxPreallocEffects = 16,
+        kPreAllocSize = kEffectKeyOffset +  kMaxPreallocEffects * sizeof(EffectKey),
+    };
+
+    SkAutoSMalloc<kPreAllocSize> fKey;
+    bool fInitialized;
 
     // GrGLProgram and GrGLShaderBuilder read the private fields to generate code. TODO: Move all
     // code generation to GrGLShaderBuilder (and maybe add getters rather than friending).
diff --git a/src/gpu/gl/GrGLShaderBuilder.cpp b/src/gpu/gl/GrGLShaderBuilder.cpp
index ddcc615..36eb6d5 100644
--- a/src/gpu/gl/GrGLShaderBuilder.cpp
+++ b/src/gpu/gl/GrGLShaderBuilder.cpp
@@ -105,7 +105,7 @@
     , fUniformManager(uniformManager)
     , fFSFeaturesAddedMask(0)
 #if GR_GL_EXPERIMENTAL_GS
-    , fUsesGS(desc.fExperimentalGS)
+    , fUsesGS(SkToBool(desc.getHeader().fExperimentalGS))
 #else
     , fUsesGS(false)
 #endif
@@ -113,11 +113,13 @@
     , fRTHeightUniform(GrGLUniformManager::kInvalidUniformHandle)
     , fDstCopyTopLeftUniform (GrGLUniformManager::kInvalidUniformHandle)
     , fDstCopyScaleUniform (GrGLUniformManager::kInvalidUniformHandle)
-    , fTopLeftFragPosRead(kTopLeftFragPosRead_FragPosKey == desc.fFragPosKey) {
+    , fTopLeftFragPosRead(kTopLeftFragPosRead_FragPosKey == desc.getHeader().fFragPosKey) {
+
+    const GrGLProgramDesc::KeyHeader& header = desc.getHeader();
 
     fPositionVar = &fVSAttrs.push_back();
     fPositionVar->set(kVec2f_GrSLType, GrGLShaderVar::kAttribute_TypeModifier, "aPosition");
-    if (-1 != desc.fLocalCoordAttributeIndex) {
+    if (-1 != header.fLocalCoordAttributeIndex) {
         fLocalCoordsVar = &fVSAttrs.push_back();
         fLocalCoordsVar->set(kVec2f_GrSLType,
                              GrGLShaderVar::kAttribute_TypeModifier,
@@ -126,13 +128,13 @@
         fLocalCoordsVar = fPositionVar;
     }
     // Emit code to read the dst copy textue if necessary.
-    if (kNoDstRead_DstReadKey != desc.fDstReadKey &&
+    if (kNoDstRead_DstReadKey != header.fDstReadKey &&
         GrGLCaps::kNone_FBFetchType == ctxInfo.caps()->fbFetchType()) {
-        bool topDown = SkToBool(kTopLeftOrigin_DstReadKeyBit & desc.fDstReadKey);
+        bool topDown = SkToBool(kTopLeftOrigin_DstReadKeyBit & header.fDstReadKey);
         const char* dstCopyTopLeftName;
         const char* dstCopyCoordScaleName;
         uint32_t configMask;
-        if (SkToBool(kUseAlphaConfig_DstReadKeyBit & desc.fDstReadKey)) {
+        if (SkToBool(kUseAlphaConfig_DstReadKeyBit & header.fDstReadKey)) {
             configMask = kA_GrColorComponentFlag;
         } else {
             configMask = kRGBA_GrColorComponentFlags;
@@ -656,11 +658,7 @@
     SkString outColor;
 
     for (int e = 0; e < effectCnt; ++e) {
-        if (NULL == effectStages[e] || GrGLEffect::kNoEffectKey == effectKeys[e]) {
-            continue;
-        }
-
-        GrAssert(NULL != effectStages[e]->getEffect());
+        GrAssert(NULL != effectStages[e] && NULL != effectStages[e]->getEffect());
         const GrEffectStage& stage = *effectStages[e];
         const GrEffectRef& effect = *stage.getEffect();
 
diff --git a/src/gpu/gl/GrGpuGL.h b/src/gpu/gl/GrGpuGL.h
index 0f75280..8f80a72 100644
--- a/src/gpu/gl/GrGpuGL.h
+++ b/src/gpu/gl/GrGpuGL.h
@@ -177,47 +177,36 @@
 
         void abandon();
         GrGLProgram* getProgram(const GrGLProgramDesc& desc, const GrEffectStage* stages[]);
+
     private:
         enum {
-            kKeySize = sizeof(GrGLProgramDesc),
             // We may actually have kMaxEntries+1 shaders in the GL context because we create a new
             // shader before evicting from the cache.
-            kMaxEntries = 32
+            kMaxEntries = 32,
+            kHashBits = 6,
         };
 
-        class Entry;
-        // The value of the hash key is based on the ProgramDesc.
-        typedef GrTBinHashKey<Entry, kKeySize> ProgramHashKey;
+        struct Entry;
 
-        class Entry : public ::GrNoncopyable {
-        public:
-            Entry() : fProgram(NULL), fLRUStamp(0) {}
-            Entry& operator = (const Entry& entry) {
-                GrSafeRef(entry.fProgram.get());
-                fProgram.reset(entry.fProgram.get());
-                fKey = entry.fKey;
-                fLRUStamp = entry.fLRUStamp;
-                return *this;
-            }
-            int compare(const ProgramHashKey& key) const {
-                return fKey.compare(key);
-            }
+        struct ProgDescLess;
 
-        public:
-            SkAutoTUnref<GrGLProgram>   fProgram;
-            ProgramHashKey              fKey;
-            unsigned int                fLRUStamp; // Move outside entry?
-        };
+        // binary search for entry matching desc. returns index into fEntries that matches desc or ~
+        // of the index of where it should be inserted.
+        int search(const GrGLProgramDesc& desc) const;
 
-        GrTHashTable<Entry, ProgramHashKey, 8> fHashCache;
+        // sorted array of all the entries
+        Entry*                      fEntries[kMaxEntries];
+        // hash table based on lowest kHashBits bits of the program key. Used to avoid binary
+        // searching fEntries.
+        Entry*                      fHashTable[1 << kHashBits];
 
-        Entry                       fEntries[kMaxEntries];
         int                         fCount;
         unsigned int                fCurrLRUStamp;
         const GrGLContext&          fGL;
 #ifdef PROGRAM_CACHE_STATS
         int                         fTotalRequests;
         int                         fCacheMisses;
+        int                         fHashMisses; // cache hit but hash table missed
 #endif
     };
 
diff --git a/src/gpu/gl/GrGpuGL_program.cpp b/src/gpu/gl/GrGpuGL_program.cpp
index 833d811..b017d5d 100644
--- a/src/gpu/gl/GrGpuGL_program.cpp
+++ b/src/gpu/gl/GrGpuGL_program.cpp
@@ -9,12 +9,32 @@
 
 #include "GrEffect.h"
 #include "GrGLEffect.h"
+#include "SkTSearch.h"
 
 typedef GrGLUniformManager::UniformHandle UniformHandle;
 static const UniformHandle kInvalidUniformHandle = GrGLUniformManager::kInvalidUniformHandle;
 
-#define SKIP_CACHE_CHECK    true
-#define GR_UINT32_MAX   static_cast<uint32_t>(-1)
+struct GrGpuGL::ProgramCache::Entry {
+    SK_DECLARE_INST_COUNT_ROOT(Entry);
+    Entry() : fProgram(NULL), fLRUStamp(0) {}
+
+    SkAutoTUnref<GrGLProgram>   fProgram;
+    unsigned int                fLRUStamp;
+};
+
+SK_DEFINE_INST_COUNT(GrGpuGL::ProgramCache::Entry);
+
+struct GrGpuGL::ProgramCache::ProgDescLess {
+    bool operator() (const GrGLProgramDesc& desc, const Entry* entry) {
+        GrAssert(NULL != entry->fProgram.get());
+        return GrGLProgramDesc::Less(desc, entry->fProgram->getDesc());
+    }
+
+    bool operator() (const Entry* entry, const GrGLProgramDesc& desc) {
+        GrAssert(NULL != entry->fProgram.get());
+        return GrGLProgramDesc::Less(entry->fProgram->getDesc(), desc);
+    }
+};
 
 GrGpuGL::ProgramCache::ProgramCache(const GrGLContext& gl)
     : fCount(0)
@@ -23,70 +43,147 @@
 #ifdef PROGRAM_CACHE_STATS
     , fTotalRequests(0)
     , fCacheMisses(0)
+    , fHashMisses(0)
 #endif
 {
+    for (int i = 0; i < 1 << kHashBits; ++i) {
+        fHashTable[i] = NULL;
+    }
 }
 
 GrGpuGL::ProgramCache::~ProgramCache() {
+    for (int i = 0; i < fCount; ++i){
+        SkDELETE(fEntries[i]);
+    }
     // dump stats
 #ifdef PROGRAM_CACHE_STATS
     SkDebugf("--- Program Cache ---\n");
     SkDebugf("Total requests: %d\n", fTotalRequests);
     SkDebugf("Cache misses: %d\n", fCacheMisses);
-    SkDebugf("Cache miss %%: %f\n", (fTotalRequests > 0)
-                                    ? (float)fCacheMisses/(float)fTotalRequests : 0.0f);
+    SkDebugf("Cache miss %%: %f\n", (fTotalRequests > 0) ?
+                                        100.f * fCacheMisses / fTotalRequests :
+                                        0.f);
+    int cacheHits = fTotalRequests - fCacheMisses;
+    SkDebugf("Hash miss %%: %f\n", (cacheHits > 0) ? 100.f * fHashMisses / cacheHits : 0.f);
     SkDebugf("---------------------\n");
 #endif
 }
 
 void GrGpuGL::ProgramCache::abandon() {
     for (int i = 0; i < fCount; ++i) {
-        GrAssert(NULL != fEntries[i].fProgram.get());
-        fEntries[i].fProgram->abandon();
-        fEntries[i].fProgram.reset(NULL);
+        GrAssert(NULL != fEntries[i]->fProgram.get());
+        fEntries[i]->fProgram->abandon();
+        fEntries[i]->fProgram.reset(NULL);
     }
     fCount = 0;
 }
 
+int GrGpuGL::ProgramCache::search(const GrGLProgramDesc& desc) const {
+    ProgDescLess less;
+    return SkTSearch(fEntries, fCount, desc, sizeof(Entry*), less);
+}
+
 GrGLProgram* GrGpuGL::ProgramCache::getProgram(const GrGLProgramDesc& desc,
                                                const GrEffectStage* stages[]) {
-    Entry newEntry;
-    newEntry.fKey.setKeyData(desc.asKey());
 #ifdef PROGRAM_CACHE_STATS
     ++fTotalRequests;
 #endif
 
-    Entry* entry = fHashCache.find(newEntry.fKey);
+    Entry* entry = NULL;
+
+    uint32_t hashIdx = desc.getChecksum();
+    hashIdx ^= hashIdx >> 16;
+    if (kHashBits <= 8) {
+        hashIdx ^= hashIdx >> 8;
+    }
+    hashIdx &=((1 << kHashBits) - 1);
+    Entry* hashedEntry = fHashTable[hashIdx];
+    if (NULL != hashedEntry && hashedEntry->fProgram->getDesc() == desc) {
+        GrAssert(NULL != hashedEntry->fProgram);
+        entry = hashedEntry;
+    }
+
+    int entryIdx;
     if (NULL == entry) {
+        entryIdx = this->search(desc);
+        if (entryIdx >= 0) {
+            entry = fEntries[entryIdx];
+#ifdef PROGRAM_CACHE_STATS
+            ++fHashMisses;
+#endif
+        }
+    }
+
+    if (NULL == entry) {
+        // We have a cache miss
 #ifdef PROGRAM_CACHE_STATS
         ++fCacheMisses;
 #endif
-        newEntry.fProgram.reset(GrGLProgram::Create(fGL, desc, stages));
-        if (NULL == newEntry.fProgram.get()) {
+        GrGLProgram* program = GrGLProgram::Create(fGL, desc, stages);
+        if (NULL == program) {
             return NULL;
         }
+        int purgeIdx = 0;
         if (fCount < kMaxEntries) {
-            entry = fEntries + fCount;
-            ++fCount;
+            entry = SkNEW(Entry);
+            purgeIdx = fCount++;
+            fEntries[purgeIdx] = entry;
         } else {
-            GrAssert(kMaxEntries == fCount);
-            entry = fEntries;
+            GrAssert(fCount == kMaxEntries);
+            purgeIdx = 0;
             for (int i = 1; i < kMaxEntries; ++i) {
-                if (fEntries[i].fLRUStamp < entry->fLRUStamp) {
-                    entry = fEntries + i;
+                if (fEntries[i]->fLRUStamp < fEntries[purgeIdx]->fLRUStamp) {
+                    purgeIdx = i;
                 }
             }
-            fHashCache.remove(entry->fKey, entry);
+            entry = fEntries[purgeIdx];
+            int purgedHashIdx = entry->fProgram->getDesc().getChecksum() & ((1 << kHashBits) - 1);
+            if (fHashTable[purgedHashIdx] == entry) {
+                fHashTable[purgedHashIdx] = NULL;
+            }
         }
-        *entry = newEntry;
-        fHashCache.insert(entry->fKey, entry);
+        GrAssert(fEntries[purgeIdx] == entry);
+        entry->fProgram.reset(program);
+        // We need to shift fEntries around so that the entry currently at purgeIdx is placed 
+        // just before the entry at ~entryIdx (in order to keep fEntries sorted by descriptor).
+        entryIdx = ~entryIdx;
+        if (entryIdx < purgeIdx) {
+            //  Let E and P be the entries at index entryIdx and purgeIdx, respectively.
+            //  If the entries array looks like this:
+            //       aaaaEbbbbbPccccc
+            //  we rearrange it to look like this:
+            //       aaaaPEbbbbbccccc
+            size_t copySize = (purgeIdx - entryIdx) * sizeof(Entry*);
+            memmove(fEntries + entryIdx + 1, fEntries + entryIdx, copySize);
+            fEntries[entryIdx] = entry;
+        } else if (purgeIdx < entryIdx) {
+            //  If the entries array looks like this:
+            //       aaaaPbbbbbEccccc
+            //  we rearrange it to look like this:
+            //       aaaabbbbbPEccccc
+            size_t copySize = (entryIdx - purgeIdx - 1) * sizeof(Entry*);
+            memmove(fEntries + purgeIdx, fEntries + purgeIdx + 1, copySize);
+            fEntries[entryIdx - 1] = entry;
+        }
+#if GR_DEBUG
+        GrAssert(NULL != fEntries[0]->fProgram.get());
+        for (int i = 0; i < fCount - 1; ++i) {
+            GrAssert(NULL != fEntries[i + 1]->fProgram.get());
+            const GrGLProgramDesc& a = fEntries[i]->fProgram->getDesc();
+            const GrGLProgramDesc& b = fEntries[i + 1]->fProgram->getDesc();
+            GrAssert(GrGLProgramDesc::Less(a, b));
+            GrAssert(!GrGLProgramDesc::Less(b, a));
+        }
+#endif
     }
 
+    fHashTable[hashIdx] = entry;
     entry->fLRUStamp = fCurrLRUStamp;
-    if (GR_UINT32_MAX == fCurrLRUStamp) {
+
+    if (SK_MaxU32 == fCurrLRUStamp) {
         // wrap around! just trash our LRU, one time hit.
         for (int i = 0; i < fCount; ++i) {
-            fEntries[i].fLRUStamp = 0;
+            fEntries[i]->fLRUStamp = 0;
         }
     }
     ++fCurrLRUStamp;
@@ -177,9 +274,6 @@
         }
 
         const GrEffectStage* stages[GrDrawState::kNumStages];
-        for (int i = 0; i < GrDrawState::kNumStages; ++i) {
-            stages[i] = drawState.isStageEnabled(i) ? &drawState.getStage(i) : NULL;
-        }
         GrGLProgramDesc desc;
         GrGLProgramDesc::Build(this->getDrawState(),
                                kDrawPoints_DrawType == type,
@@ -188,6 +282,7 @@
                                dstCoeff,
                                this,
                                dstCopy,
+                               stages,
                                &desc);
 
         fCurrentProgram.reset(fProgramCache->getProgram(desc, stages));
@@ -206,19 +301,7 @@
         fCurrentProgram->overrideBlend(&srcCoeff, &dstCoeff);
         this->flushBlend(kDrawLines_DrawType == type, srcCoeff, dstCoeff);
 
-        GrColor color;
-        GrColor coverage;
-        if (blendOpts & GrDrawState::kEmitTransBlack_BlendOptFlag) {
-            color = 0;
-            coverage = 0;
-        } else if (blendOpts & GrDrawState::kEmitCoverage_BlendOptFlag) {
-            color = 0xffffffff;
-            coverage = drawState.getCoverage();
-        } else {
-            color = drawState.getColor();
-            coverage = drawState.getCoverage();
-        }
-        fCurrentProgram->setData(this, color, coverage, dstCopy, &fSharedGLProgramState);
+        fCurrentProgram->setData(this, blendOpts, stages, dstCopy, &fSharedGLProgramState);
     }
     this->flushStencil(type);
     this->flushScissor();
diff --git a/tests/GLProgramsTest.cpp b/tests/GLProgramsTest.cpp
index 73d6db2..2cf21c5 100644
--- a/tests/GLProgramsTest.cpp
+++ b/tests/GLProgramsTest.cpp
@@ -18,59 +18,86 @@
 #include "GrDrawEffect.h"
 #include "effects/GrConfigConversionEffect.h"
 
+#include "SkChecksum.h"
 #include "SkRandom.h"
 #include "Test.h"
 
 void GrGLProgramDesc::setRandom(SkMWCRandom* random,
                                 const GrGpuGL* gpu,
-                                const GrTexture* dstTexture,
-                                const GrEffectStage* stages[GrDrawState::kNumStages],
+                                const GrRenderTarget* dstRenderTarget,
+                                const GrTexture* dstCopyTexture,
+                                const GrEffectStage* stages[],
+                                int numColorStages,
+                                int numCoverageStages,
                                 int currAttribIndex) {
-    fEmitsPointSize = random->nextBool();
+    int numEffects = numColorStages + numCoverageStages;
+    size_t keyLength = KeyLength(numEffects);
+    fKey.reset(keyLength);
+    *this->atOffset<uint32_t, kLengthOffset>() = static_cast<uint32_t>(keyLength);
+    memset(this->header(), 0, kHeaderSize);
 
-    fPositionAttributeIndex = 0;
+    KeyHeader* header = this->header();
+    header->fEmitsPointSize = random->nextBool();
+
+    header->fPositionAttributeIndex = 0;
 
     // if the effects have used up all off the available attributes,
     // don't try to use color or coverage attributes as input
     do {
-        fColorInput = random->nextULessThan(kColorInputCnt);
+        header->fColorInput = random->nextULessThan(kColorInputCnt);
     } while (GrDrawState::kMaxVertexAttribCnt <= currAttribIndex &&
-             kAttribute_ColorInput == fColorInput);
-    fColorAttributeIndex = (fColorInput == kAttribute_ColorInput) ? currAttribIndex++ : -1;
+             kAttribute_ColorInput == header->fColorInput);
+    header->fColorAttributeIndex = (header->fColorInput == kAttribute_ColorInput) ?
+                                        currAttribIndex++ :
+                                        -1;
 
     do {
-        fCoverageInput = random->nextULessThan(kColorInputCnt);
+        header->fCoverageInput = random->nextULessThan(kColorInputCnt);
     } while (GrDrawState::kMaxVertexAttribCnt <= currAttribIndex  &&
-             kAttribute_ColorInput == fCoverageInput);
-    fCoverageAttributeIndex = (fCoverageInput == kAttribute_ColorInput) ? currAttribIndex++ : -1;
+             kAttribute_ColorInput == header->fCoverageInput);
+    header->fCoverageAttributeIndex = (header->fCoverageInput == kAttribute_ColorInput) ?
+                                        currAttribIndex++ :
+                                        -1;
 
-    fColorFilterXfermode = random->nextULessThan(SkXfermode::kLastCoeffMode + 1);
-
-    fFirstCoverageStage = random->nextULessThan(GrDrawState::kNumStages);
+    header->fColorFilterXfermode = random->nextULessThan(SkXfermode::kLastCoeffMode + 1);
 
 #if GR_GL_EXPERIMENTAL_GS
-    fExperimentalGS = gpu->caps()->geometryShaderSupport() && random->nextBool();
+    header->fExperimentalGS = gpu->caps()->geometryShaderSupport() && random->nextBool();
 #endif
 
-    fDiscardIfZeroCoverage = random->nextBool();
+    header->fDiscardIfZeroCoverage = random->nextBool();
 
     bool useLocalCoords = random->nextBool() && currAttribIndex < GrDrawState::kMaxVertexAttribCnt;
-    fLocalCoordAttributeIndex = useLocalCoords ? currAttribIndex++ : -1;
+    header->fLocalCoordAttributeIndex = useLocalCoords ? currAttribIndex++ : -1;
+
+    header->fColorEffectCnt = numColorStages;
+    header->fCoverageEffectCnt = numCoverageStages;
 
     bool dstRead = false;
-    for (int s = 0; s < GrDrawState::kNumStages; ++s) {
-        if (NULL != stages[s]) {
-            const GrBackendEffectFactory& factory = (*stages[s]->getEffect())->getFactory();
-            GrDrawEffect drawEffect(*stages[s], useLocalCoords);
-            fEffectKeys[s] = factory.glEffectKey(drawEffect, gpu->glCaps());
-            if ((*stages[s]->getEffect())->willReadDstColor()) {
-                dstRead = true;
-            }
+    bool fragPos = false;
+    int numStages = numColorStages + numCoverageStages;
+    for (int s = 0; s < numStages; ++s) {
+        const GrBackendEffectFactory& factory = (*stages[s]->getEffect())->getFactory();
+        GrDrawEffect drawEffect(*stages[s], useLocalCoords);
+        this->effectKeys()[s] = factory.glEffectKey(drawEffect, gpu->glCaps());
+        if ((*stages[s]->getEffect())->willReadDstColor()) {
+            dstRead = true;
+        }
+        if ((*stages[s]->getEffect())->willReadFragmentPosition()) {
+            fragPos = true;
         }
     }
 
     if (dstRead) {
-        this->fDstReadKey = GrGLShaderBuilder::KeyForDstRead(dstTexture, gpu->glCaps());
+        header->fDstReadKey = GrGLShaderBuilder::KeyForDstRead(dstCopyTexture, gpu->glCaps());
+    } else {
+        header->fDstReadKey = 0;
+    }
+    if (fragPos) {
+        header->fFragPosKey = GrGLShaderBuilder::KeyForFragmentPosition(dstRenderTarget,
+                                                                         gpu->glCaps());
+    } else {
+        header->fFragPosKey = 0;
     }
 
     CoverageOutput coverageOutput;
@@ -82,7 +109,11 @@
                                 (!dstRead && kCombineWithDst_CoverageOutput == coverageOutput);
     } while (illegalCoverageOutput);
 
-    fCoverageOutput = coverageOutput;
+    header->fCoverageOutput = coverageOutput;
+
+    *this->checksum() = 0;
+    *this->checksum() = SkChecksum::Compute(reinterpret_cast<uint32_t*>(fKey.get()), keyLength);
+    fInitialized = true;
 }
 
 bool GrGpuGL::programUnitTest(int maxStages) {
@@ -90,10 +121,12 @@
     maxStages = GrMin(maxStages, (int)GrDrawState::kNumStages);
 
     GrTextureDesc dummyDesc;
+    dummyDesc.fFlags = kRenderTarget_GrTextureFlagBit;
     dummyDesc.fConfig = kSkia8888_GrPixelConfig;
     dummyDesc.fWidth = 34;
     dummyDesc.fHeight = 18;
     SkAutoTUnref<GrTexture> dummyTexture1(this->createTexture(dummyDesc, NULL, 0));
+    dummyDesc.fFlags = kNone_GrTextureFlags;
     dummyDesc.fConfig = kAlpha_8_GrPixelConfig;
     dummyDesc.fWidth = 16;
     dummyDesc.fHeight = 22;
@@ -113,43 +146,52 @@
 #endif
 
         GrGLProgramDesc pdesc;
-        const GrEffectStage* stages[GrDrawState::kNumStages];
-        memset(stages, 0, sizeof(stages));
 
         int currAttribIndex = 1;  // we need to always leave room for position
         int attribIndices[2];
         GrTexture* dummyTextures[] = {dummyTexture1.get(), dummyTexture2.get()};
-        for (int s = 0; s < maxStages; ++s) {
-            // enable the stage?
-            if (random.nextBool()) {
-                SkAutoTUnref<const GrEffectRef> effect(GrEffectTestFactory::CreateStage(
-                                                                                &random,
-                                                                                this->getContext(),
-                                                                                *this->caps(),
-                                                                                dummyTextures));
-                int numAttribs = (*effect)->numVertexAttribs();
 
-                // If adding this effect would exceed the max attrib count then generate a
-                // new random effect.
-                if (currAttribIndex + numAttribs > GrDrawState::kMaxVertexAttribCnt) {
-                    --s;
-                    continue;
-                }
-                for (int i = 0; i < numAttribs; ++i) {
-                    attribIndices[i] = currAttribIndex++;
-                }
-                GrEffectStage* stage = SkNEW(GrEffectStage);
-                stage->setEffect(effect.get(), attribIndices[0], attribIndices[1]);
-                stages[s] = stage;
+        int numStages = random.nextULessThan(maxStages + 1);
+        int numColorStages = random.nextULessThan(numStages + 1);
+        int numCoverageStages = numStages - numColorStages;
+
+        SkAutoSTMalloc<8, const GrEffectStage*> stages(numStages);
+
+        for (int s = 0; s < numStages; ++s) {
+            SkAutoTUnref<const GrEffectRef> effect(GrEffectTestFactory::CreateStage(
+                                                                            &random,
+                                                                            this->getContext(),
+                                                                            *this->caps(),
+                                                                            dummyTextures));
+            int numAttribs = (*effect)->numVertexAttribs();
+
+            // If adding this effect would exceed the max attrib count then generate a
+            // new random effect.
+            if (currAttribIndex + numAttribs > GrDrawState::kMaxVertexAttribCnt) {
+                --s;
+                continue;
             }
+            for (int i = 0; i < numAttribs; ++i) {
+                attribIndices[i] = currAttribIndex++;
+            }
+            GrEffectStage* stage = SkNEW(GrEffectStage);
+            stage->setEffect(effect.get(), attribIndices[0], attribIndices[1]);
+            stages[s] = stage;
         }
         const GrTexture* dstTexture = random.nextBool() ? dummyTextures[0] : dummyTextures[1];
-        pdesc.setRandom(&random, this, dstTexture, stages, currAttribIndex);
+        pdesc.setRandom(&random,
+                        this,
+                        dummyTextures[0]->asRenderTarget(),
+                        dstTexture,
+                        stages.get(),
+                        numColorStages,
+                        numCoverageStages,
+                        currAttribIndex);
 
         SkAutoTUnref<GrGLProgram> program(GrGLProgram::Create(this->glContext(),
                                                               pdesc,
-                                                              stages));
-        for (int s = 0; s < maxStages; ++s) {
+                                                              stages.get()));
+        for (int s = 0; s < numStages; ++s) {
             SkDELETE(stages[s]);
         }
         if (NULL == program.get()) {