Add 4x4 downsample filter with 4 bilinear texture reads, use for ssaa.

Review URL: http://codereview.appspot.com/4483042/

git-svn-id: http://skia.googlecode.com/svn/trunk@1250 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/gpu/src/GrContext.cpp b/gpu/src/GrContext.cpp
index 399eaf8..ff3119a 100644
--- a/gpu/src/GrContext.cpp
+++ b/gpu/src/GrContext.cpp
@@ -128,7 +128,7 @@
 
         if (tiled && !isPow2) {
             bits |= kNPOTBit;
-            if (sampler.isFilter()) {
+            if (GrSamplerState::kNearest_Filter != sampler.getFilter()) {
                 bits |= kFilterBit;
             }
         }
@@ -223,9 +223,18 @@
             fGpu->disableState(GrDrawTarget::kDither_StateBit |
                                GrDrawTarget::kClip_StateBit   |
                                GrDrawTarget::kAntialias_StateBit);
+            GrSamplerState::Filter filter;
+            // if filtering is not desired then we want to ensure all
+            // texels in the resampled image are copies of texels from
+            // the original.
+            if (GrSamplerState::kNearest_Filter == sampler.getFilter()) {
+                filter = GrSamplerState::kNearest_Filter;
+            } else {
+                filter = GrSamplerState::kBilinear_Filter;
+            }
             GrSamplerState stretchSampler(GrSamplerState::kClamp_WrapMode,
                                           GrSamplerState::kClamp_WrapMode,
-                                          sampler.isFilter());
+                                          filter);
             fGpu->setSamplerState(0, stretchSampler);
 
             static const GrVertexLayout layout =
@@ -490,9 +499,13 @@
     int scale;
     // Using MSAA seems to be slower for some yet unknown reason.
     if (false && fGpu->supportsFullsceneAA()) {
+        record->fDownsample = OffscreenRecord::kFSAA_Downsample;
         scale = GR_Scalar1;
         desc.fAALevel = kMed_GrAALevel;
     } else {
+        record->fDownsample = (fGpu->supports4x4DownsampleFilter()) ?
+                                OffscreenRecord::k4x4SinglePass_Downsample :
+                                OffscreenRecord::k4x4TwoPass_Downsample;
         scale = 4;
         desc.fAALevel = kNone_GrAALevel;
     }
@@ -506,7 +519,7 @@
         return false;
     }
 
-    if (scale > 1) {
+    if (OffscreenRecord::k4x4TwoPass_Downsample == record->fDownsample) {
         desc.fWidth /= 2;
         desc.fHeight /= 2;
         record->fEntry1 = this->lockKeylessTexture(desc);
@@ -550,16 +563,22 @@
 
     GrAssert(NULL != record->fEntry0);
 
-    bool downsample =  NULL != record->fEntry1;
-    
+    GrSamplerState::Filter filter;
+    if (OffscreenRecord::k4x4SinglePass_Downsample == record->fDownsample) {
+        filter = GrSamplerState::k4x4Downsample_Filter;
+    } else {
+        filter = GrSamplerState::kBilinear_Filter;
+    }
+
     GrMatrix sampleM;
     GrSamplerState sampler(GrSamplerState::kClamp_WrapMode, 
-                           GrSamplerState::kClamp_WrapMode, true);
+                           GrSamplerState::kClamp_WrapMode, filter);
 
     GrTexture* src = record->fEntry0->texture();
     int scale;
 
-    if (downsample) {
+    if (OffscreenRecord::k4x4TwoPass_Downsample == record->fDownsample) {
+        GrAssert(NULL != record->fEntry1);
         scale = 2;
         GrRenderTarget* dst = record->fEntry1->texture()->asRenderTarget();
         
@@ -577,10 +596,14 @@
         target->drawSimpleRect(rect, NULL, 1 << kOffscreenStage);
         
         src = record->fEntry1->texture();
-    } else {
+    } else if (OffscreenRecord::kFSAA_Downsample == record->fDownsample) {
         scale = 1;
         GrIRect rect(0, 0, boundRect.width(), boundRect.height());
         src->asRenderTarget()->overrideResolveRect(rect);
+    } else {
+        GrAssert(OffscreenRecord::k4x4SinglePass_Downsample == 
+                 record->fDownsample);
+        scale = 4;
     }
 
     // setup for draw back to main RT
@@ -607,7 +630,7 @@
 
     this->unlockTexture(record->fEntry0);
     record->fEntry0 = NULL;
-    if (downsample) {
+    if (NULL != record->fEntry1) {
         this->unlockTexture(record->fEntry1);
         record->fEntry1 = NULL;
     }
diff --git a/gpu/src/GrGLProgram.cpp b/gpu/src/GrGLProgram.cpp
index 0365470..ce4f000 100644
--- a/gpu/src/GrGLProgram.cpp
+++ b/gpu/src/GrGLProgram.cpp
@@ -42,6 +42,8 @@
 
 }  // namespace
 
+#define PRINT_SHADERS 0
+
 #if GR_GL_ATTRIBUTE_MATRICES
     #define VIEW_MATRIX_NAME "aViewM"
 #else
@@ -93,6 +95,11 @@
     s->appendInt(stage);
 }
 
+static void normalized_texel_size_name(int stage, GrStringBuilder* s) {
+    *s = "uTexelSize";
+    s->appendInt(stage);
+}
+
 static void sampler_name(int stage, GrStringBuilder* s) {
     *s = "uSampler";
     s->appendInt(stage);
@@ -172,8 +179,7 @@
     }
 }
 
-void GrGLProgram::genProgram(GrGLProgram::CachedData* programData, 
-                             const GrDrawTarget* target) const {
+void GrGLProgram::genProgram(GrGLProgram::CachedData* programData) const {
 
     ShaderCodeSegments segments;
     const uint32_t& layout = fProgramDesc.fVertexLayout;
@@ -219,7 +225,7 @@
     // add texture coordinates that are used to the list of vertex attr decls
     GrTokenString texCoordAttrs[GrDrawTarget::kMaxTexCoords];
     for (int t = 0; t < GrDrawTarget::kMaxTexCoords; ++t) {
-        if (target->VertexUsesTexCoordIdx(t, layout)) {
+        if (GrDrawTarget::VertexUsesTexCoordIdx(t, layout)) {
             tex_attr_name(t, texCoordAttrs + t);
 
             segments.fVSAttrs += "attribute vec2 ";
@@ -315,11 +321,11 @@
     ++stringCnt;
 
 #if PRINT_SHADERS
-    GrPrintf("%s%s%s%s\n",
-             segments.fVSUnis.cstr(),
-             segments.fVSAttrs.cstr(),
-             segments.fVaryings.cstr(),
-             segments.fVSCode.cstr());
+    GrPrintf(segments.fVSUnis.cstr());
+    GrPrintf(segments.fVSAttrs.cstr());
+    GrPrintf(segments.fVaryings.cstr());
+    GrPrintf(segments.fVSCode.cstr());
+    GrPrintf("\n");
 #endif
     programData->fVShaderID = CompileShader(GR_GL_VERTEX_SHADER,
                                         stringCnt,
@@ -350,11 +356,11 @@
     ++stringCnt;
 
 #if PRINT_SHADERS
-    GrPrintf("%s%s%s%s\n",
-             GR_SHADER_PRECISION,
-             segments.fFSUnis.cstr(),
-             segments.fVaryings.cstr(),
-             segments.fFSCode.cstr());
+    GrPrintf(GrShaderPrecision());
+    GrPrintf(segments.fFSUnis.cstr());
+    GrPrintf(segments.fVaryings.cstr());
+    GrPrintf(segments.fFSCode.cstr());
+    GrPrintf("\n");
 #endif
     programData->fFShaderID = CompileShader(GR_GL_FRAGMENT_SHADER,
                                             stringCnt,
@@ -464,6 +470,16 @@
                 locations.fSamplerUni = -1;
             }
 
+            if (locations.fNormalizedTexelSizeUni) {
+                GrTokenString texelSizeName;
+                normalized_texel_size_name(s, &texelSizeName);
+                locations.fNormalizedTexelSizeUni = 
+                   GR_GL(GetUniformLocation(progID, texelSizeName.cstr()));
+                GrAssert(-1 != locations.fNormalizedTexelSizeUni);
+            } else {
+                locations.fNormalizedTexelSizeUni = -1;
+            }
+
             if (locations.fRadial2Uni) {
                 GrTokenString radial2ParamName;
                 radial2_param_name(s, &radial2ParamName);
@@ -478,6 +494,7 @@
             locations.fSamplerUni = -1;
             locations.fRadial2Uni = -1;
             locations.fTextureMatrixUni = -1;
+            locations.fNormalizedTexelSizeUni = -1;
         }
     }
     GR_GL(UseProgram(progID));
@@ -490,8 +507,11 @@
         programData->fTextureMatrices[s] = GrMatrix::InvalidMatrix();
         programData->fRadial2CenterX1[s] = GR_ScalarMax;
         programData->fRadial2Radius0[s] = -GR_ScalarMax;
+        programData->fTextureWidth[s] = -1;
+        programData->fTextureHeight[s] = -1;
     }
     programData->fViewMatrix = GrMatrix::InvalidMatrix();
+    programData->fColor = GrColor_ILLEGAL;
 }
 
 GrGLuint GrGLProgram::CompileShader(GrGLenum type,
@@ -535,12 +555,12 @@
 //============================================================================
 
 void GrGLProgram::genStageCode(int stageNum,
-                                  const GrGLProgram::ProgramDesc::StageDesc& desc,
-                                  const char* fsInColor, // NULL means no incoming color
-                                  const char* fsOutColor,
-                                  const char* vsInCoord,
-                                  ShaderCodeSegments* segments,
-                                  StageUniLocations* locations) const {
+                               const GrGLProgram::ProgramDesc::StageDesc& desc,
+                               const char* fsInColor, // NULL means no incoming color
+                               const char* fsOutColor,
+                               const char* vsInCoord,
+                               ShaderCodeSegments* segments,
+                               StageUniLocations* locations) const {
 
     GrAssert(stageNum >= 0 && stageNum <= 9);
 
@@ -585,6 +605,14 @@
     segments->fFSUnis += ";\n";
     locations->fSamplerUni = 1;
 
+    GrTokenString texelSizeName;
+    if (ProgramDesc::StageDesc::k2x2_FetchMode == desc.fFetchMode) {
+        normalized_texel_size_name(stageNum, &texelSizeName);
+        segments->fFSUnis += "uniform vec2 ";
+        segments->fFSUnis += texelSizeName;
+        segments->fFSUnis += ";\n";
+    }
+
     segments->fVaryings += "varying ";
     segments->fVaryings += float_vector_type(varyingDims);
     segments->fVaryings += " ";
@@ -661,14 +689,15 @@
         GrAssert(varyingDims == coordDims);
         fsCoordName = varyingName;
     } else {
-        // if we have to do some non-matrix op on the varyings to get
+        // if we have to do some special op on the varyings to get
         // our final tex coords then when in perspective we have to
         // do an explicit divide
-        if  (ProgramDesc::StageDesc::kIdentity_CoordMapping == desc.fCoordMapping) {
+        if  (ProgramDesc::StageDesc::kIdentity_CoordMapping == desc.fCoordMapping &&
+             ProgramDesc::StageDesc::kSingle_FetchMode == desc.fFetchMode) {
             texFunc += "Proj";
             fsCoordName = varyingName;
         } else {
-            fsCoordName = "tCoord";
+            fsCoordName = "inCoord";
             fsCoordName.appendInt(stageNum);
 
             segments->fFSCode += "\t";
@@ -686,6 +715,7 @@
     }
 
     GrSStringBuilder<96> sampleCoords;
+    bool complexCoord = false;
     switch (desc.fCoordMapping) {
     case ProgramDesc::StageDesc::kIdentity_CoordMapping:
         sampleCoords = fsCoordName;
@@ -696,11 +726,13 @@
         sampleCoords += ".y, -";
         sampleCoords += fsCoordName;
         sampleCoords += ".x)*0.1591549430918 + 0.5, 0.5)";
+        complexCoord = true;
         break;
     case ProgramDesc::StageDesc::kRadialGradient_CoordMapping:
         sampleCoords = "vec2(length(";
         sampleCoords += fsCoordName;
         sampleCoords += ".xy), 0.5)";
+        complexCoord = true;
         break;
     case ProgramDesc::StageDesc::kRadial2Gradient_CoordMapping: {
         GrTokenString cName    = "c";
@@ -769,26 +801,85 @@
         sampleCoords += ") * ";
         sampleCoords += radial2ParamsName;
         sampleCoords += "[1], 0.5)\n";
+        complexCoord = true;
         break;}
     };
 
-    segments->fFSCode += "\t";
-    segments->fFSCode += fsOutColor;
-    segments->fFSCode += " = ";
-    if (NULL != fsInColor) {
-        segments->fFSCode += fsInColor;
-        segments->fFSCode += " * ";
+    if (ProgramDesc::StageDesc::k2x2_FetchMode == desc.fFetchMode) {
+        locations->fNormalizedTexelSizeUni = 1;
+        if (complexCoord) {
+            GrTokenString coordVar("tCoord");
+            coordVar.appendInt(stageNum);
+            segments->fFSCode += "\t";
+            segments->fFSCode += float_vector_type(coordDims);
+            segments->fFSCode += " ";
+            segments->fFSCode += coordVar;
+            segments->fFSCode += " = ";
+            segments->fFSCode += sampleCoords;
+            segments->fFSCode += ";\n";
+            sampleCoords = coordVar;
+        }
+        static const char sign[] = {'-','+'};
+        GrTokenString stageAccumVar("stage2x2Accum");
+        stageAccumVar.appendInt(stageNum);
+        segments->fFSCode += "\tvec4 ";
+        segments->fFSCode += stageAccumVar;
+        segments->fFSCode += " = ";
+        GrAssert(2 == coordDims);
+        for (int y = 0; y < 2; ++y) {
+            for (int x = 0; x < 2; ++x) {
+                segments->fFSCode += texFunc;
+                segments->fFSCode += "(";
+                segments->fFSCode += samplerName;
+                segments->fFSCode += ", ";
+                segments->fFSCode += sampleCoords;
+                segments->fFSCode += " + vec2(";
+                segments->fFSCode += sign[x];
+                segments->fFSCode += texelSizeName;
+                segments->fFSCode += ".x, ";
+                segments->fFSCode += sign[y];
+                segments->fFSCode += texelSizeName;
+                segments->fFSCode += ".y))";
+                if (desc.fModulation == ProgramDesc::StageDesc::kAlpha_Modulation) {
+                    segments->fFSCode += ".aaaa";
+                }
+                segments->fFSCode += ";\n";
+                if (1 != x || 1 !=y ) {
+                    segments->fFSCode += "\t";
+                    segments->fFSCode += stageAccumVar;
+                    segments->fFSCode += " += ";
+                }
+            }
+        }
+        segments->fFSCode += "\t";
+        segments->fFSCode += fsOutColor;
+        segments->fFSCode += " = ";
+        if (NULL != fsInColor) {
+            segments->fFSCode += fsInColor;
+            segments->fFSCode += " * ";
+        }
+        segments->fFSCode += stageAccumVar;
+        segments->fFSCode += " / 4;\n";
+    } else {
+        segments->fFSCode += "\t";
+        segments->fFSCode += fsOutColor;
+        segments->fFSCode += " = ";
+        if (NULL != fsInColor) {
+            segments->fFSCode += fsInColor;
+            segments->fFSCode += " * ";
+        }
+
+        segments->fFSCode += texFunc;
+        segments->fFSCode += "(";
+        segments->fFSCode += samplerName;
+        segments->fFSCode += ", ";
+        segments->fFSCode += sampleCoords;
+        segments->fFSCode += ")";
+        if (desc.fModulation == ProgramDesc::StageDesc::kAlpha_Modulation) {
+            segments->fFSCode += ".aaaa";
+        }
+        segments->fFSCode += ";\n";
     }
-    segments->fFSCode += texFunc;
-    segments->fFSCode += "(";
-    segments->fFSCode += samplerName;
-    segments->fFSCode += ", ";
-    segments->fFSCode += sampleCoords;
-    segments->fFSCode += ")";
-    if (desc.fModulation == ProgramDesc::StageDesc::kAlpha_Modulation) {
-        segments->fFSCode += ".aaaa";
-    }
-    segments->fFSCode += ";\n";
 
     if(fStageEffects[stageNum]) {
         fStageEffects[stageNum]->genShaderCode(segments);
diff --git a/gpu/src/GrGLProgram.h b/gpu/src/GrGLProgram.h
index bc74038..4fc1d43 100644
--- a/gpu/src/GrGLProgram.h
+++ b/gpu/src/GrGLProgram.h
@@ -58,7 +58,7 @@
      *  The result of heavy init is not stored in datamembers of GrGLProgam,
      *  but in a separate cacheable container.
      */
-    void genProgram(CachedData* programData, const GrDrawTarget* target) const;
+    void genProgram(CachedData* programData) const;
 
     /**
      *  Routine that is called before rendering. Sets-up all the state and
@@ -107,20 +107,25 @@
                 kIdentityMatrix_OptFlagBit = 0x2
             };
 
-            unsigned fOptFlags : 8;
-            unsigned fEnabled : 8;
+            unsigned fOptFlags;
+            bool fEnabled;
 
             enum Modulation {
                 kColor_Modulation,
                 kAlpha_Modulation
-            } fModulation : 8;
+            } fModulation;
+
+            enum FetchMode {
+                kSingle_FetchMode,
+                k2x2_FetchMode
+            } fFetchMode;
 
             enum CoordMapping {
                 kIdentity_CoordMapping,
                 kRadialGradient_CoordMapping,
                 kSweepGradient_CoordMapping,
                 kRadial2Gradient_CoordMapping
-            } fCoordMapping : 8;
+            } fCoordMapping;
         } fStages[GrDrawTarget::kNumStages];
     } fProgramDesc;
 
@@ -129,6 +134,7 @@
 public:
     struct StageUniLocations {
         GrGLint fTextureMatrixUni;
+        GrGLint fNormalizedTexelSizeUni;
         GrGLint fSamplerUni;
         GrGLint fRadial2Uni;
     };
@@ -188,6 +194,9 @@
         // (GL uniform values travel with program)
         GrColor                     fColor;
         GrMatrix                    fTextureMatrices[GrDrawTarget::kNumStages];
+        // width and height used for normalized texel size
+        int                         fTextureWidth[GrDrawTarget::kNumStages];
+        int                         fTextureHeight[GrDrawTarget::kNumStages]; 
         GrScalar                    fRadial2CenterX1[GrDrawTarget::kNumStages];
         GrScalar                    fRadial2Radius0[GrDrawTarget::kNumStages];
         bool                        fRadial2PosRoot[GrDrawTarget::kNumStages];
diff --git a/gpu/src/GrGpu.cpp b/gpu/src/GrGpu.cpp
index 5fff20f..e18c4a4 100644
--- a/gpu/src/GrGpu.cpp
+++ b/gpu/src/GrGpu.cpp
@@ -755,7 +755,7 @@
     GrSamplerState::kClamp_WrapMode,
     GrSamplerState::kNormal_SampleMode,
     GrMatrix::I(),
-    false);
+    GrSamplerState::kNearest_Filter);
 
 
 
diff --git a/gpu/src/GrGpuGL.cpp b/gpu/src/GrGpuGL.cpp
index 721d084..c63c766 100644
--- a/gpu/src/GrGpuGL.cpp
+++ b/gpu/src/GrGpuGL.cpp
@@ -1754,8 +1754,12 @@
                                                 nextTexture->getTexParams();
             GrGLTexture::TexParams newTexParams;
 
-            newTexParams.fFilter = sampler.isFilter() ? GR_GL_LINEAR :
-                                                        GR_GL_NEAREST;
+            if (GrSamplerState::kNearest_Filter == sampler.getFilter()) {
+                newTexParams.fFilter = GR_GL_NEAREST;
+            } else {
+                newTexParams.fFilter = GR_GL_LINEAR;
+            }
+
             newTexParams.fWrapS =
                         GrGLTexture::WrapMode2GLWrap()[sampler.getWrapX()];
             newTexParams.fWrapT =
diff --git a/gpu/src/GrGpuGLFixed.cpp b/gpu/src/GrGpuGLFixed.cpp
index 4440bcd..dbfac43 100644
--- a/gpu/src/GrGpuGLFixed.cpp
+++ b/gpu/src/GrGpuGLFixed.cpp
@@ -56,6 +56,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 GrGpuGLFixed::GrGpuGLFixed() {
+    f4X4DownsampleFilterSupport = false;
 }
 
 GrGpuGLFixed::~GrGpuGLFixed() {
diff --git a/gpu/src/GrGpuGLShaders.cpp b/gpu/src/GrGpuGLShaders.cpp
index d8f99bc..c977ea4 100644
--- a/gpu/src/GrGpuGLShaders.cpp
+++ b/gpu/src/GrGpuGLShaders.cpp
@@ -22,8 +22,8 @@
 #include "GrMemory.h"
 #include "GrNoncopyable.h"
 #include "GrStringBuilder.h"
+#include "GrRandom.h"
 
-#define PRINT_SHADERS           0
 #define SKIP_CACHE_CHECK    true
 #define GR_UINT32_MAX   static_cast<uint32_t>(-1)
 
@@ -52,7 +52,7 @@
         void copyAndTakeOwnership(Entry& entry) {
             fProgramData.copyAndTakeOwnership(entry.fProgramData);
             fKey.copyAndTakeOwnership(entry.fKey); // ownership transfer
-            fLRUStamp = entry.fLRUStamp;        
+            fLRUStamp = entry.fLRUStamp;
         }
 
     public:
@@ -96,8 +96,7 @@
         }
     }
 
-    GrGLProgram::CachedData* getProgramData(const GrGLProgram& desc, 
-                                            const GrDrawTarget* target) {
+    GrGLProgram::CachedData* getProgramData(const GrGLProgram& desc) {
         ProgramHashKey key;
         while (key.doPass()) {
             desc.buildKey(key);
@@ -119,7 +118,7 @@
                 GrGpuGLShaders::DeleteProgram(&entry->fProgramData);
             }
             entry->fKey.copyAndTakeOwnership(key);
-            desc.genProgram(&entry->fProgramData, target);
+            desc.genProgram(&entry->fProgramData);
             fHashCache.insert(entry->fKey, entry);
         }
 
@@ -142,13 +141,99 @@
     GR_DEBUGCODE(memset(programData, 0, sizeof(*programData));)
 }
 
+void GrGpuGLShaders::ProgramUnitTest() {
+
+    static const int STAGE_OPTS[] = {
+        0,
+        GrGLProgram::ProgramDesc::StageDesc::kNoPerspective_OptFlagBit,
+        GrGLProgram::ProgramDesc::StageDesc::kIdentity_CoordMapping
+    };
+    static const GrGLProgram::ProgramDesc::StageDesc::Modulation STAGE_MODULATES[] = {
+        GrGLProgram::ProgramDesc::StageDesc::kColor_Modulation,
+        GrGLProgram::ProgramDesc::StageDesc::kAlpha_Modulation
+    };
+    static const GrGLProgram::ProgramDesc::StageDesc::CoordMapping STAGE_COORD_MAPPINGS[] = {
+        GrGLProgram::ProgramDesc::StageDesc::kIdentity_CoordMapping,
+        GrGLProgram::ProgramDesc::StageDesc::kRadialGradient_CoordMapping,
+        GrGLProgram::ProgramDesc::StageDesc::kSweepGradient_CoordMapping,
+        GrGLProgram::ProgramDesc::StageDesc::kRadial2Gradient_CoordMapping
+    };
+    static const GrGLProgram::ProgramDesc::StageDesc::FetchMode FETCH_MODES[] = {
+        GrGLProgram::ProgramDesc::StageDesc::kSingle_FetchMode,
+        GrGLProgram::ProgramDesc::StageDesc::k2x2_FetchMode,
+    };
+    GrGLProgram program;
+    GrGLProgram::ProgramDesc& pdesc = program.fProgramDesc;
+
+    static const int NUM_TESTS = 512;
+
+    // GrRandoms nextU() values have patterns in the low bits
+    // So using nextU() % array_count might never take some values.
+    GrRandom random;
+    for (int t = 0; t < NUM_TESTS; ++t) {
+
+        pdesc.fVertexLayout = 0;
+        pdesc.fEmitsPointSize = random.nextF() > .5f;
+        float colorType = random.nextF();
+        if (colorType < 1.f / 3.f) {
+            pdesc.fColorType = GrGLProgram::ProgramDesc::kAttribute_ColorType;
+        } else if (colorType < 2.f / 3.f) {
+            pdesc.fColorType = GrGLProgram::ProgramDesc::kUniform_ColorType;
+        } else {
+            pdesc.fColorType = GrGLProgram::ProgramDesc::kNone_ColorType;
+        }
+        for (int s = 0; s < kNumStages; ++s) {
+            // enable the stage?
+            if (random.nextF() > .5f) {
+                // use separate tex coords?
+                if (random.nextF() > .5f) {
+                    int t = (int)(random.nextF() * kMaxTexCoords);
+                    pdesc.fVertexLayout |= StageTexCoordVertexLayoutBit(s, t);
+                } else {
+                    pdesc.fVertexLayout |= StagePosAsTexCoordVertexLayoutBit(s);
+                }
+            }
+            // use text-formatted verts?
+            if (random.nextF() > .5f) {
+                pdesc.fVertexLayout |= kTextFormat_VertexLayoutBit;
+            }
+        }
+
+        for (int s = 0; s < kNumStages; ++s) {
+            int x;
+            pdesc.fStages[s].fEnabled = VertexUsesStage(s, pdesc.fVertexLayout);
+            x = (int)(random.nextF() * GR_ARRAY_COUNT(STAGE_OPTS));
+            pdesc.fStages[s].fOptFlags = STAGE_OPTS[x];
+            x = (int)(random.nextF() * GR_ARRAY_COUNT(STAGE_MODULATES));
+            pdesc.fStages[s].fModulation = STAGE_MODULATES[x];
+            x = (int)(random.nextF() * GR_ARRAY_COUNT(STAGE_COORD_MAPPINGS));
+            pdesc.fStages[s].fCoordMapping = STAGE_COORD_MAPPINGS[x];
+            x = (int)(random.nextF() * GR_ARRAY_COUNT(FETCH_MODES));
+            pdesc.fStages[s].fFetchMode = FETCH_MODES[x];
+        }
+        GrGLProgram::CachedData cachedData;
+        program.genProgram(&cachedData);
+        DeleteProgram(&cachedData);
+        bool again = false;
+        if (again) {
+            program.genProgram(&cachedData);
+            DeleteProgram(&cachedData);
+        }
+    }
+}
+
 
 GrGpuGLShaders::GrGpuGLShaders() {
 
     resetContext();
+    f4X4DownsampleFilterSupport = true;
 
     fProgramData = NULL;
     fProgramCache = new ProgramCache();
+
+#if 0
+    ProgramUnitTest();
+#endif
 }
 
 GrGpuGLShaders::~GrGpuGLShaders() {
@@ -217,59 +302,90 @@
 #endif
 }
 
-void GrGpuGLShaders::flushTextureMatrix(int stage) {
-    GrAssert(NULL != fCurrDrawState.fTextures[stage]);
+void GrGpuGLShaders::flushTextureMatrix(int s) {
+    const int& uni = fProgramData->fUniLocations.fStages[s].fTextureMatrixUni;
+    GrGLTexture* texture = (GrGLTexture*) fCurrDrawState.fTextures[s];
+    if (NULL != texture) {
+        if (-1 != uni &&
+            (((1 << s) & fDirtyFlags.fTextureChangedMask) ||
+            getHWSamplerMatrix(s) != getSamplerMatrix(s))) {
 
-    GrGLTexture* texture = (GrGLTexture*) fCurrDrawState.fTextures[stage];
+                GrAssert(NULL != fCurrDrawState.fTextures[s]);
 
-    GrMatrix m = getSamplerMatrix(stage);
-    GrSamplerState::SampleMode mode = 
-        fCurrDrawState.fSamplerStates[stage].getSampleMode();
-    AdjustTextureMatrix(texture, mode, &m);
+                GrGLTexture* texture = (GrGLTexture*) fCurrDrawState.fTextures[s];
 
-    // ES doesn't allow you to pass true to the transpose param,
-    // so do our own transpose
-    GrScalar mt[]  = {
-        m[GrMatrix::kScaleX],
-        m[GrMatrix::kSkewY],
-        m[GrMatrix::kPersp0],
-        m[GrMatrix::kSkewX],
-        m[GrMatrix::kScaleY],
-        m[GrMatrix::kPersp1],
-        m[GrMatrix::kTransX],
-        m[GrMatrix::kTransY],
-        m[GrMatrix::kPersp2]
-    };
-#if GR_GL_ATTRIBUTE_MATRICES
-    GR_GL(VertexAttrib4fv(TEXMAT_ATTR_LOCATION(0)+0, mt+0));
-    GR_GL(VertexAttrib4fv(TEXMAT_ATTR_LOCATION(0)+1, mt+3));
-    GR_GL(VertexAttrib4fv(TEXMAT_ATTR_LOCATION(0)+2, mt+6));
-#else
-    GR_GL(UniformMatrix3fv(fProgramData->fUniLocations.fStages[stage].fTextureMatrixUni,
-                           1, false, mt));
-#endif
+                GrMatrix m = getSamplerMatrix(s);
+                GrSamplerState::SampleMode mode = 
+                    fCurrDrawState.fSamplerStates[s].getSampleMode();
+                AdjustTextureMatrix(texture, mode, &m);
+
+                // ES doesn't allow you to pass true to the transpose param,
+                // so do our own transpose
+                GrScalar mt[]  = {
+                    m[GrMatrix::kScaleX],
+                    m[GrMatrix::kSkewY],
+                    m[GrMatrix::kPersp0],
+                    m[GrMatrix::kSkewX],
+                    m[GrMatrix::kScaleY],
+                    m[GrMatrix::kPersp1],
+                    m[GrMatrix::kTransX],
+                    m[GrMatrix::kTransY],
+                    m[GrMatrix::kPersp2]
+                };
+            #if GR_GL_ATTRIBUTE_MATRICES
+                GR_GL(VertexAttrib4fv(TEXMAT_ATTR_LOCATION(0)+0, mt+0));
+                GR_GL(VertexAttrib4fv(TEXMAT_ATTR_LOCATION(0)+1, mt+3));
+                GR_GL(VertexAttrib4fv(TEXMAT_ATTR_LOCATION(0)+2, mt+6));
+            #else
+                GR_GL(UniformMatrix3fv(uni, 1, false, mt));
+            #endif
+            recordHWSamplerMatrix(s, getSamplerMatrix(s));
+        }
+    }
 }
 
-void GrGpuGLShaders::flushRadial2(int stage) {
+void GrGpuGLShaders::flushRadial2(int s) {
 
-    const GrSamplerState& sampler = fCurrDrawState.fSamplerStates[stage];
+    const int &uni = fProgramData->fUniLocations.fStages[s].fRadial2Uni;
+    const GrSamplerState& sampler = fCurrDrawState.fSamplerStates[s];
+    if (-1 != uni &&
+        (fProgramData->fRadial2CenterX1[s] != sampler.getRadial2CenterX1() ||
+         fProgramData->fRadial2Radius0[s]  != sampler.getRadial2Radius0()  ||
+         fProgramData->fRadial2PosRoot[s]  != sampler.isRadial2PosRoot())) {
 
-    GrScalar centerX1 = sampler.getRadial2CenterX1();
-    GrScalar radius0 = sampler.getRadial2Radius0();
+        GrScalar centerX1 = sampler.getRadial2CenterX1();
+        GrScalar radius0 = sampler.getRadial2Radius0();
 
-    GrScalar a = GrMul(centerX1, centerX1) - GR_Scalar1;
+        GrScalar a = GrMul(centerX1, centerX1) - GR_Scalar1;
 
-    float unis[6] = {
-        GrScalarToFloat(a),
-        1 / (2.f * unis[0]),
-        GrScalarToFloat(centerX1),
-        GrScalarToFloat(radius0),
-        GrScalarToFloat(GrMul(radius0, radius0)),
-        sampler.isRadial2PosRoot() ? 1.f : -1.f
-    };
-    GR_GL(Uniform1fv(fProgramData->fUniLocations.fStages[stage].fRadial2Uni,
-                     6,
-                     unis));
+        float values[6] = {
+            GrScalarToFloat(a),
+            1 / (2.f * values[0]),
+            GrScalarToFloat(centerX1),
+            GrScalarToFloat(radius0),
+            GrScalarToFloat(GrMul(radius0, radius0)),
+            sampler.isRadial2PosRoot() ? 1.f : -1.f
+        };
+        GR_GL(Uniform1fv(uni, 6, values));
+        fProgramData->fRadial2CenterX1[s] = sampler.getRadial2CenterX1();
+        fProgramData->fRadial2Radius0[s]  = sampler.getRadial2Radius0();
+        fProgramData->fRadial2PosRoot[s]  = sampler.isRadial2PosRoot();
+    }
+}
+
+void GrGpuGLShaders::flushTexelSize(int s) {
+    const GrSamplerState& sampler = fCurrDrawState.fSamplerStates[s];
+    const int& uni = fProgramData->fUniLocations.fStages[s].fNormalizedTexelSizeUni;
+    if (-1 != uni) {
+        GrGLTexture* texture = (GrGLTexture*) fCurrDrawState.fTextures[s];
+        if (texture->allocWidth() != fProgramData->fTextureWidth[s] ||
+            texture->allocHeight() != fProgramData->fTextureWidth[s]) {
+
+            float texelSize[] = {1.f / texture->allocWidth(),
+                                 1.f / texture->allocHeight()};
+            GR_GL(Uniform2fv(uni, 1, texelSize));
+        }
+    }
 }
 
 void GrGpuGLShaders::flushColor() {
@@ -334,7 +450,7 @@
     }
 
     buildProgram(type);
-    fProgramData = fProgramCache->getProgramData(fCurrentProgram, this);
+    fProgramData = fProgramCache->getProgramData(fCurrentProgram);
 
     if (fHWProgramID != fProgramData->fProgramID) {
         GR_GL(UseProgram(fProgramData->fProgramID));
@@ -345,7 +461,7 @@
         return false;
     }
 
-    flushColor();
+    this->flushColor();
 
 #if GR_GL_ATTRIBUTE_MATRICES
     GrMatrix& currViewMatrix = fHWDrawState.fViewMatrix;
@@ -359,28 +475,11 @@
     }
 
     for (int s = 0; s < kNumStages; ++s) {
-        GrGLTexture* texture = (GrGLTexture*) fCurrDrawState.fTextures[s];
-        if (NULL != texture) {
-            if (-1 != fProgramData->fUniLocations.fStages[s].fTextureMatrixUni &&
-                (((1 << s) & fDirtyFlags.fTextureChangedMask) ||
-                getHWSamplerMatrix(s) != getSamplerMatrix(s))) {
-                flushTextureMatrix(s);
-                recordHWSamplerMatrix(s, getSamplerMatrix(s));
-            }
-        }
+        this->flushTextureMatrix(s);
 
-        const GrSamplerState& sampler = fCurrDrawState.fSamplerStates[s];
-        if (-1 != fProgramData->fUniLocations.fStages[s].fRadial2Uni &&
-            (fProgramData->fRadial2CenterX1[s] != sampler.getRadial2CenterX1() ||
-             fProgramData->fRadial2Radius0[s]  != sampler.getRadial2Radius0()  ||
-             fProgramData->fRadial2PosRoot[s]  != sampler.isRadial2PosRoot())) {
+        this->flushRadial2(s);
 
-            flushRadial2(s);
-
-            fProgramData->fRadial2CenterX1[s] = sampler.getRadial2CenterX1();
-            fProgramData->fRadial2Radius0[s]  = sampler.getRadial2Radius0();
-            fProgramData->fRadial2PosRoot[s]  = sampler.isRadial2PosRoot();
-        }
+        this->flushTexelSize(s);
     }
     resetDirtyFlags();
     return true;
@@ -531,21 +630,36 @@
                 stage.fOptFlags = 0;
             }
             switch (fCurrDrawState.fSamplerStates[s].getSampleMode()) {
-            case GrSamplerState::kNormal_SampleMode:
-                stage.fCoordMapping = GrGLProgram::ProgramDesc::StageDesc::kIdentity_CoordMapping;
-                break;
-            case GrSamplerState::kRadial_SampleMode:
-                stage.fCoordMapping = GrGLProgram::ProgramDesc::StageDesc::kRadialGradient_CoordMapping;
-                break;
-            case GrSamplerState::kRadial2_SampleMode:
-                stage.fCoordMapping = GrGLProgram::ProgramDesc::StageDesc::kRadial2Gradient_CoordMapping;
-                break;
-            case GrSamplerState::kSweep_SampleMode:
-                stage.fCoordMapping = GrGLProgram::ProgramDesc::StageDesc::kSweepGradient_CoordMapping;
-                break;
-            default:
-                GrAssert(!"Unexpected sample mode!");
-                break;
+                case GrSamplerState::kNormal_SampleMode:
+                    stage.fCoordMapping = GrGLProgram::ProgramDesc::StageDesc::kIdentity_CoordMapping;
+                    break;
+                case GrSamplerState::kRadial_SampleMode:
+                    stage.fCoordMapping = GrGLProgram::ProgramDesc::StageDesc::kRadialGradient_CoordMapping;
+                    break;
+                case GrSamplerState::kRadial2_SampleMode:
+                    stage.fCoordMapping = GrGLProgram::ProgramDesc::StageDesc::kRadial2Gradient_CoordMapping;
+                    break;
+                case GrSamplerState::kSweep_SampleMode:
+                    stage.fCoordMapping = GrGLProgram::ProgramDesc::StageDesc::kSweepGradient_CoordMapping;
+                    break;
+                default:
+                    GrCrash("Unexpected sample mode!");
+                    break;
+            }
+
+            switch (fCurrDrawState.fSamplerStates[s].getFilter()) {
+                // these both can use a regular texture2D()
+                case GrSamplerState::kNearest_Filter:
+                case GrSamplerState::kBilinear_Filter:
+                    stage.fFetchMode = GrGLProgram::ProgramDesc::StageDesc::kSingle_FetchMode;
+                    break;
+                // performs 4 texture2D()s
+                case GrSamplerState::k4x4Downsample_Filter:
+                    stage.fFetchMode = GrGLProgram::ProgramDesc::StageDesc::k2x2_FetchMode;
+                    break;
+                default:
+                    GrCrash("Unexpected filter!");
+                    break;
             }
 
             if (GrPixelConfigIsAlphaOnly(texture->config())) {
diff --git a/gpu/src/GrGpuGLShaders.h b/gpu/src/GrGpuGLShaders.h
index 92aab6c..a1bcaf0 100644
--- a/gpu/src/GrGpuGLShaders.h
+++ b/gpu/src/GrGpuGLShaders.h
@@ -60,6 +60,9 @@
     // flushes the parameters to two point radial gradient
     void flushRadial2(int stage);
 
+    // flushes the normalized texel size
+    void flushTexelSize(int stage);
+
     static void DeleteProgram(GrGLProgram::CachedData* programData);
 
     void ProgramUnitTest();
diff --git a/gpu/src/GrGpuGLShaders2.cpp b/gpu/src/GrGpuGLShaders2.cpp
index 6abfcdc..171a031 100644
--- a/gpu/src/GrGpuGLShaders2.cpp
+++ b/gpu/src/GrGpuGLShaders2.cpp
@@ -1135,6 +1135,7 @@
 
 GrGpuGLShaders2::GrGpuGLShaders2() {
 
+    f4X4DownsampleFilterSupport = false;
     fProgram = NULL;
     fProgramCache = new ProgramCache();
 
diff --git a/gpu/src/GrTextContext.cpp b/gpu/src/GrTextContext.cpp
index 09113e0..0222042 100644
--- a/gpu/src/GrTextContext.cpp
+++ b/gpu/src/GrTextContext.cpp
@@ -36,10 +36,15 @@
         GrDrawTarget::AutoStateRestore asr(fDrawTarget);
 
         // setup our sampler state for our text texture/atlas
-
+        GrSamplerState::Filter filter;
+        if (fExtMatrix.isIdentity()) {
+            filter = GrSamplerState::kNearest_Filter;
+        } else {
+            filter = GrSamplerState::kBilinear_Filter;
+        }
         GrSamplerState sampler(GrSamplerState::kRepeat_WrapMode,
                                GrSamplerState::kRepeat_WrapMode,
-                               !fExtMatrix.isIdentity());
+                               filter);
         fDrawTarget->setSamplerState(TEXT_STAGE, sampler);
 
         GrAssert(GrIsALIGN4(fCurrVertex));