Create GrOptDrawState before recording draw in GrInOrderDrawBuffer

Review URL: https://codereview.chromium.org/739673002
diff --git a/gyp/gpu.gypi b/gyp/gpu.gypi
index 5095c44..28c808d 100644
--- a/gyp/gpu.gypi
+++ b/gyp/gpu.gypi
@@ -16,15 +16,14 @@
       '<(skia_include_path)/gpu/GrContext.h',
       '<(skia_include_path)/gpu/GrCoordTransform.h',
       '<(skia_include_path)/gpu/GrFragmentProcessor.h',
+      '<(skia_include_path)/gpu/GrFragmentStage.h',
       '<(skia_include_path)/gpu/GrGpuResource.h',
       '<(skia_include_path)/gpu/GrInvariantOutput.h',
       '<(skia_include_path)/gpu/GrPaint.h',
       '<(skia_include_path)/gpu/GrPathRendererChain.h',
       '<(skia_include_path)/gpu/GrProcessor.h',
-      '<(skia_include_path)/gpu/GrProcessorStage.h',
       '<(skia_include_path)/gpu/GrProcessorUnitTest.h',
       '<(skia_include_path)/gpu/GrProgramElement.h',
-      '<(skia_include_path)/gpu/GrProgramElementRef.h',
       '<(skia_include_path)/gpu/GrGpuResourceRef.h',
       '<(skia_include_path)/gpu/GrRect.h',
       '<(skia_include_path)/gpu/GrRenderTarget.h',
@@ -118,6 +117,8 @@
       '<(skia_src_path)/gpu/GrPathRendering.h',
       '<(skia_src_path)/gpu/GrPathUtils.cpp',
       '<(skia_src_path)/gpu/GrPathUtils.h',
+      '<(skia_src_path)/gpu/GrPendingProgramElement.h',
+      '<(skia_src_path)/gpu/GrPendingFragmentStage.h',
       '<(skia_src_path)/gpu/GrProgramDesc.h',
       '<(skia_src_path)/gpu/GrProgramElement.cpp',
       '<(skia_src_path)/gpu/GrProcessor.cpp',
diff --git a/include/gpu/GrProcessorStage.h b/include/gpu/GrFragmentStage.h
similarity index 69%
rename from include/gpu/GrProcessorStage.h
rename to include/gpu/GrFragmentStage.h
index 0f24a30..8089e9f 100644
--- a/include/gpu/GrProcessorStage.h
+++ b/include/gpu/GrFragmentStage.h
@@ -1,4 +1,3 @@
-
 /*
  * Copyright 2010 Google Inc.
  *
@@ -6,23 +5,17 @@
  * found in the LICENSE file.
  */
 
+#ifndef GrFragmentStage_DEFINED
+#define GrFragmentStage_DEFINED
 
-
-#ifndef GrProcessorStage_DEFINED
-#define GrProcessorStage_DEFINED
-
-#include "GrBackendProcessorFactory.h"
-#include "GrCoordTransform.h"
 #include "GrFragmentProcessor.h"
-#include "GrProgramElementRef.h"
 #include "SkMatrix.h"
-#include "SkShader.h"
 
-// TODO: Make two variations on this class: One for GrDrawState that only owns regular refs
-// and supports compatibility checks and changing local coords. The second is for GrOptDrawState,
-// is immutable, and only owns pending execution refs. This requries removing the common base
-// class from GrDrawState and GrOptDrawState called GrRODrawState and converting to GrOptDrawState
-// when draws are enqueued in the GrInOrderDrawBuffer.
+/**
+ * Wraps a GrFragmentProcessor. It also contains a coord change matrix. This matrix should be
+ * concat'ed with all the processor's coord transforms that apply to local coords, unless
+ * explicit local coords are provided with the draw.
+ */
 class GrFragmentStage {
 public:
     explicit GrFragmentStage(const GrFragmentProcessor* proc)
@@ -35,9 +28,9 @@
         if (other.fCoordChangeMatrixSet) {
             fCoordChangeMatrix = other.fCoordChangeMatrix;
         }
-        fProc.initAndRef(other.fProc);
+        fProc.reset(SkRef(other.fProc.get()));
     }
-    
+
     static bool AreCompatible(const GrFragmentStage& a, const GrFragmentStage& b,
                               bool usingExplicitLocalCoords) {
         SkASSERT(a.fProc.get());
@@ -129,33 +122,12 @@
         }
     }
 
-    bool isPerspectiveCoordTransform(int matrixIndex, bool useExplicitLocalCoords) const {
-        const GrCoordTransform& coordTransform = this->getProcessor()->coordTransform(matrixIndex);
-        SkMatrix::TypeMask type0 = coordTransform.getMatrix().getType();
-        SkMatrix::TypeMask type1 = SkMatrix::kIdentity_Mask;
-        if (kLocal_GrCoordSet == coordTransform.sourceCoords()) {
-          type1 = useExplicitLocalCoords ?
-                  SkMatrix::kIdentity_Mask : this->getCoordChangeMatrix().getType();
-        }
-
-        int combinedTypes = type0 | type1;
-        if (SkMatrix::kPerspective_Mask & combinedTypes) {
-          return true;
-        } else {
-          return false;
-        }
-    }
-
-    const char* name() const { return fProc->name(); }
-
     const GrFragmentProcessor* getProcessor() const { return fProc.get(); }
 
-    void convertToPendingExec() { fProc.convertToPendingExec(); }
-
 protected:
-    bool                                           fCoordChangeMatrixSet;
-    SkMatrix                                       fCoordChangeMatrix;
-    GrProgramElementRef<const GrFragmentProcessor> fProc;
+    bool                                    fCoordChangeMatrixSet;
+    SkMatrix                                fCoordChangeMatrix;
+    SkAutoTUnref<const GrFragmentProcessor> fProc;
 };
 
 #endif
diff --git a/include/gpu/GrPaint.h b/include/gpu/GrPaint.h
index d44dd89..6a40a71 100644
--- a/include/gpu/GrPaint.h
+++ b/include/gpu/GrPaint.h
@@ -11,7 +11,7 @@
 #define GrPaint_DEFINED
 
 #include "GrColor.h"
-#include "GrProcessorStage.h"
+#include "GrFragmentStage.h"
 
 #include "SkXfermode.h"
 
diff --git a/include/gpu/GrProgramElement.h b/include/gpu/GrProgramElement.h
index 2cdd1cc..e1adcc3 100644
--- a/include/gpu/GrProgramElement.h
+++ b/include/gpu/GrProgramElement.h
@@ -35,10 +35,11 @@
     }
 
     void ref() const {
+        this->validate();
         // Once the ref cnt reaches zero it should never be ref'ed again.
         SkASSERT(fRefCnt > 0);
-        this->validate();
         ++fRefCnt;
+        this->validate();
     }
 
     void unref() const {
@@ -47,10 +48,12 @@
         if (0 == fRefCnt) {
             if (0 == fPendingExecutions) {
                 SkDELETE(this);
+                return;
             } else {
                 this->removeRefs();
             }
         }
+        this->validate();
     }
 
     /**
@@ -80,11 +83,33 @@
 private:
     static uint32_t CreateUniqueID();
 
-    void convertRefToPendingExecution() const;
+    void addPendingExecution() const {
+        this->validate();
+        SkASSERT(fRefCnt > 0);
+        if (0 == fPendingExecutions) {
+            this->addPendingIOs();
+        }
+        ++fPendingExecutions;
+        this->validate();
+    }
 
-    void completedExecution() const;
+    void completedExecution() const {
+        this->validate();
+        --fPendingExecutions;
+        if (0 == fPendingExecutions) {
+            if (0 == fRefCnt) {
+                SkDELETE(this);
+                return;
+            } else {
+                this->pendingIOComplete();
+            }
+        }
+        this->validate();
+    }
 
     void removeRefs() const;
+    void addPendingIOs() const;
+    void pendingIOComplete() const;
 
     mutable int32_t fRefCnt;
     // Count of deferred executions not yet issued to the 3D API.
@@ -93,8 +118,8 @@
 
     SkSTArray<4, const GrGpuResourceRef*, true> fGpuResources;
 
-    // Only this class can access convertRefToPendingExecution() and completedExecution().
-    template <typename T> friend class GrProgramElementRef;
+    // Only this class can access addPendingExecution() and completedExecution().
+    template <typename T> friend class GrPendingProgramElement;
 
     typedef SkNoncopyable INHERITED;
 };
diff --git a/include/gpu/GrProgramElementRef.h b/include/gpu/GrProgramElementRef.h
deleted file mode 100644
index ecc8023..0000000
--- a/include/gpu/GrProgramElementRef.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright 2014 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#ifndef GrProgramElementRef_DEFINED
-#define GrProgramElementRef_DEFINED
-
-#include "SkRefCnt.h"
-#include "GrTypes.h"
-
-/**
- * Helper for owning a GrProgramElement subclass and being able to convert a ref to pending
- * execution. It is like an SkAutoTUnref for program elements whose execution can be deferred. Once
- * in the pending execution state it is illegal to change the object that is owned by the
- * GrProgramElementRef. Its destructor will either unref the GrProgramElement or signal that
- * the pending execution has completed, depending on whether convertToPendingExec() was called.
- */
-template <typename T> class GrProgramElementRef : SkNoncopyable {
-public:
-    GrProgramElementRef() : fOwnPendingExec(false), fObj(NULL) {};
-
-    // Adopts a ref from the caller.
-    explicit GrProgramElementRef(T* obj) : fOwnPendingExec(false), fObj(obj)  {}
-
-    // Adopts a ref from the caller. Do not call after convertToPendingExec.
-    void reset(T* obj) {
-        SkASSERT(!fOwnPendingExec);
-        SkSafeUnref(fObj);
-        fObj = obj;
-    }
-
-    void convertToPendingExec() {
-        SkASSERT(!fOwnPendingExec);
-        fObj->convertRefToPendingExecution();
-        fOwnPendingExec = true;
-    }
-
-    // In the short term we need to support copying a GrProcessorStage and making the copy own
-    // the same type of ref as the source. This function exists to support this. TODO: Once
-    // GrDrawState and GrOptDrawState no longer share a base class they won't have to share
-    // GrProcessorStage and we can have GrOptDrawState always own pending executions rather than
-    // refs on GrProgramElements. At that point we should be able to delete this function.
-    // This function makes assumptions that are valid in the GrProcessorStage use case and should
-    // not be used elsewhere.
-    void initAndRef(const GrProgramElementRef& that) {
-        SkASSERT(!fObj);
-        SkASSERT(that.fObj);
-        if (that.fOwnPendingExec) {
-            SkASSERT(that.fObj->fPendingExecutions > 0);
-            that.fObj->fPendingExecutions++;
-        } else {
-            that.fObj->ref();
-        }
-        this->fOwnPendingExec = that.fOwnPendingExec;
-        this->fObj = that.fObj;
-    }
-
-    T* get() const { return fObj; }
-    operator T*() { return fObj; }
-
-    /** If T is const, the type returned from operator-> will also be const. */
-    typedef typename SkTConstType<typename SkAutoTUnref<T>::template BlockRef<T>,
-                                  SkTIsConst<T>::value>::type BlockRefType;
-
-    /**
-     * GrProgramElementRef assumes ownership of the ref and manages converting the ref to a
-     * pending execution. As a result, it is an error for the user to ref or unref through
-     * GrProgramElementRef. Therefore operator-> returns BlockRef<T>*.
-     */
-    BlockRefType *operator->() const {
-        return static_cast<BlockRefType*>(fObj);
-    }
-
-    ~GrProgramElementRef() {
-        if (fObj) {
-            if (fOwnPendingExec) {
-                fObj->completedExecution();
-            } else {
-                fObj->unref();
-            }
-        }
-    }
-
-private:
-    bool fOwnPendingExec;
-    T*   fObj;
-
-    typedef SkNoncopyable INHERITED;
-};
-#endif
diff --git a/include/gpu/SkGr.h b/include/gpu/SkGr.h
index 8025c95..0f67800 100644
--- a/include/gpu/SkGr.h
+++ b/include/gpu/SkGr.h
@@ -89,7 +89,7 @@
                              bool constantColor, GrPaint* grPaint);
 
 // This function is similar to skPaint2GrPaintNoShader but also converts
-// skPaint's shader to a GrTexture/GrProcessorStage if possible.
+// skPaint's shader to a GrFragmentProcessor if possible.
 // constantColor has the same meaning as in skPaint2GrPaintNoShader.
 void SkPaint2GrPaintShader(GrContext* context, const SkPaint& skPaint,
                            bool constantColor, GrPaint* grPaint);
diff --git a/src/gpu/GrDrawState.cpp b/src/gpu/GrDrawState.cpp
index 6558b0d..fef5494 100644
--- a/src/gpu/GrDrawState.cpp
+++ b/src/gpu/GrDrawState.cpp
@@ -72,43 +72,6 @@
     return true;
 }
 
-GrDrawState::CombinedState GrDrawState::CombineIfPossible(
-    const GrDrawState& a, const GrDrawState& b, const GrDrawTargetCaps& caps) {
-
-    if (!a.isEqual(b)) {
-        return kIncompatible_CombinedState;
-    }
-
-    // If the general draw states are equal (from check above) we know hasColorVertexAttribute()
-    // is equivalent for both a and b
-    if (a.hasColorVertexAttribute()) {
-        // If one is opaque and the other is not then the combined state is not opaque. Moreover,
-        // if the opaqueness affects the ability to get color/coverage blending correct then we
-        // don't combine the draw states.
-        bool aIsOpaque = (kVertexColorsAreOpaque_Hint & a.fHints);
-        bool bIsOpaque = (kVertexColorsAreOpaque_Hint & b.fHints);
-        if (aIsOpaque != bIsOpaque) {
-            const GrDrawState* opaque;
-            const GrDrawState* nonOpaque;
-            if (aIsOpaque) {
-                opaque = &a;
-                nonOpaque = &b;
-            } else {
-                opaque = &b;
-                nonOpaque = &a;
-            }
-            if (!opaque->hasSolidCoverage() && opaque->couldApplyCoverage(caps)) {
-                SkASSERT(!nonOpaque->hasSolidCoverage());
-                if (!nonOpaque->couldApplyCoverage(caps)) {
-                    return kIncompatible_CombinedState;
-                }
-            }
-            return aIsOpaque ? kB_CombinedState : kA_CombinedState;
-        }
-    }
-    return kAOrB_CombinedState;
-}
-
 //////////////////////////////////////////////////////////////////////////////s
 
 GrDrawState::GrDrawState(const GrDrawState& state, const SkMatrix& preConcatMatrix) {
@@ -125,10 +88,7 @@
 }
 
 GrDrawState& GrDrawState::operator=(const GrDrawState& that) {
-    SkASSERT(0 == fBlockEffectRemovalCnt || 0 == this->numTotalStages());
-    SkASSERT(!that.fRenderTarget.ownsPendingIO());
-    SkASSERT(!this->fRenderTarget.ownsPendingIO());
-    this->setRenderTarget(that.getRenderTarget());
+    fRenderTarget.reset(SkSafeRef(that.fRenderTarget.get()));
     fColor = that.fColor;
     fViewMatrix = that.fViewMatrix;
     fSrcBlend = that.fSrcBlend;
@@ -141,11 +101,7 @@
     fStencilSettings = that.fStencilSettings;
     fCoverage = that.fCoverage;
     fDrawFace = that.fDrawFace;
-    if (that.hasGeometryProcessor()) {
-        fGeometryProcessor.initAndRef(that.fGeometryProcessor);
-    } else {
-        fGeometryProcessor.reset(NULL);
-    }
+    fGeometryProcessor.reset(SkSafeRef(that.fGeometryProcessor.get()));
     fColorStages = that.fColorStages;
     fCoverageStages = that.fCoverageStages;
 
@@ -168,13 +124,12 @@
 
 void GrDrawState::onReset(const SkMatrix* initialViewMatrix) {
     SkASSERT(0 == fBlockEffectRemovalCnt || 0 == this->numTotalStages());
-    SkASSERT(!fRenderTarget.ownsPendingIO());
+    fRenderTarget.reset(NULL);
 
     fGeometryProcessor.reset(NULL);
     fColorStages.reset();
     fCoverageStages.reset();
 
-    fRenderTarget.reset();
 
     this->setDefaultVertexAttribs();
 
@@ -577,22 +532,6 @@
 
 ////////////////////////////////////////////////////////////////////////////////
 
-void GrDrawState::convertToPendingExec() {
-    fRenderTarget.markPendingIO();
-    fRenderTarget.removeRef();
-    for (int i = 0; i < fColorStages.count(); ++i) {
-        fColorStages[i].convertToPendingExec();
-    }
-    if (fGeometryProcessor) {
-        fGeometryProcessor.convertToPendingExec();
-    }
-    for (int i = 0; i < fCoverageStages.count(); ++i) {
-        fCoverageStages[i].convertToPendingExec();
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
 GrDrawState::~GrDrawState() {
     SkASSERT(0 == fBlockEffectRemovalCnt);
 }
diff --git a/src/gpu/GrDrawState.h b/src/gpu/GrDrawState.h
index baa7564..4b10160 100644
--- a/src/gpu/GrDrawState.h
+++ b/src/gpu/GrDrawState.h
@@ -13,7 +13,7 @@
 #include "GrDrawTargetCaps.h"
 #include "GrGeometryProcessor.h"
 #include "GrGpuResourceRef.h"
-#include "GrProcessorStage.h"
+#include "GrFragmentStage.h"
 #include "GrProcOptInfo.h"
 #include "GrRenderTarget.h"
 #include "GrStencil.h"
@@ -540,9 +540,7 @@
      *
      * @param target  The render target to set.
      */
-    void setRenderTarget(GrRenderTarget* target) {
-        fRenderTarget.set(SkSafeRef(target), kWrite_GrIOType);
-    }
+    void setRenderTarget(GrRenderTarget* target) { fRenderTarget.reset(SkSafeRef(target)); }
 
     /// @}
 
@@ -719,40 +717,9 @@
 
     ///////////////////////////////////////////////////////////////////////////
 
-    /** Return type for CombineIfPossible. */
-    enum CombinedState {
-        /** The GrDrawStates cannot be combined. */
-        kIncompatible_CombinedState,
-        /** Either draw state can be used in place of the other. */
-        kAOrB_CombinedState,
-        /** Use the first draw state. */
-        kA_CombinedState,
-        /** Use the second draw state. */
-        kB_CombinedState,
-    };
-
-    /** This function determines whether the GrDrawStates used for two draws can be combined into
-        a single GrDrawState. This is used to avoid storing redundant GrDrawStates and to determine
-        if draws can be batched. The return value indicates whether combining is possible and, if
-        so, which of the two inputs should be used. */
-    static CombinedState CombineIfPossible(const GrDrawState& a, const GrDrawState& b,
-                                           const GrDrawTargetCaps& caps);
-
     GrDrawState& operator= (const GrDrawState& that);
 
 private:
-    /**
-     * Converts refs on GrGpuResources owned directly or indirectly by this GrDrawState into
-     * pending reads and writes. This should be called when a GrDrawState is recorded into
-     * a GrDrawTarget for later execution. Subclasses of GrDrawState may add setters. However,
-     * once this call has been made the GrDrawState is immutable. It is also no longer copyable.
-     * In the future this conversion will automatically happen when converting a GrDrawState into
-     * an optimized draw state.
-     */
-    void convertToPendingExec();
-
-    friend class GrDrawTarget;
-
     bool isEqual(const GrDrawState& that) const;
 
     /**
@@ -836,29 +803,25 @@
 
     void internalSetVertexAttribs(const GrVertexAttrib attribs[], int count, size_t stride);
 
-    typedef GrTGpuResourceRef<GrRenderTarget> ProgramRenderTarget;
-    // These fields are roughly sorted by decreasing likelihood of being different in op==
-    ProgramRenderTarget                 fRenderTarget;
-    GrColor                             fColor;
-    SkMatrix                            fViewMatrix;
-    GrColor                             fBlendConstant;
-    uint32_t                            fFlagBits;
-    const GrVertexAttrib*               fVAPtr;
-    int                                 fVACount;
-    size_t                              fVAStride;
-    GrStencilSettings                   fStencilSettings;
-    uint8_t                             fCoverage;
-    DrawFace                            fDrawFace;
-    GrBlendCoeff                        fSrcBlend;
-    GrBlendCoeff                        fDstBlend;
-
     typedef SkSTArray<4, GrFragmentStage> FragmentStageArray;
-    typedef GrProgramElementRef<const GrGeometryProcessor> ProgramGeometryProcessor;
-    ProgramGeometryProcessor            fGeometryProcessor;
-    FragmentStageArray                  fColorStages;
-    FragmentStageArray                  fCoverageStages;
 
-    uint32_t                            fHints;
+    SkAutoTUnref<GrRenderTarget>            fRenderTarget;
+    GrColor                                 fColor;
+    SkMatrix                                fViewMatrix;
+    GrColor                                 fBlendConstant;
+    uint32_t                                fFlagBits;
+    const GrVertexAttrib*                   fVAPtr;
+    int                                     fVACount;
+    size_t                                  fVAStride;
+    GrStencilSettings                       fStencilSettings;
+    uint8_t                                 fCoverage;
+    DrawFace                                fDrawFace;
+    GrBlendCoeff                            fSrcBlend;
+    GrBlendCoeff                            fDstBlend;
+    SkAutoTUnref<const GrGeometryProcessor> fGeometryProcessor;
+    FragmentStageArray                      fColorStages;
+    FragmentStageArray                      fCoverageStages;
+    uint32_t                                fHints;
 
     // This is simply a different representation of info in fVertexAttribs and thus does
     // not need to be compared in op==.
diff --git a/src/gpu/GrDrawTarget.h b/src/gpu/GrDrawTarget.h
index b13db61..c5fff70 100644
--- a/src/gpu/GrDrawTarget.h
+++ b/src/gpu/GrDrawTarget.h
@@ -607,11 +607,6 @@
     bool programUnitTest(int maxStages);
 
 protected:
-    // Extend access to GrDrawState::convertToPEndeingExec to subclasses.
-    void convertDrawStateToPendingExec(GrDrawState* ds) {
-        ds->convertToPendingExec();
-    }
-
     enum GeometrySrcType {
         kNone_GeometrySrcType,     //<! src has not been specified
         kReserved_GeometrySrcType, //<! src was set using reserve*Space
diff --git a/src/gpu/GrInOrderDrawBuffer.cpp b/src/gpu/GrInOrderDrawBuffer.cpp
index 778b8cf..7535a8d 100644
--- a/src/gpu/GrInOrderDrawBuffer.cpp
+++ b/src/gpu/GrInOrderDrawBuffer.cpp
@@ -11,7 +11,6 @@
 #include "GrDefaultGeoProcFactory.h"
 #include "GrDrawTargetCaps.h"
 #include "GrGpu.h"
-#include "GrOptDrawState.h"
 #include "GrTemplates.h"
 #include "GrTextStrike.h"
 #include "GrTexture.h"
@@ -264,9 +263,10 @@
 
     GeometryPoolState& poolState = fGeoPoolStateStack.back();
 
-    this->recordStateIfNecessary(ds,
-                                 GrGpu::PrimTypeToDrawType(info.primitiveType()),
-                                 info.getDstCopy());
+    if (!this->recordStateAndShouldDraw(ds, GrGpu::PrimTypeToDrawType(info.primitiveType()),
+                                        info.getDstCopy())) {
+        return;
+    }
 
     Draw* draw;
     if (info.isInstanced()) {
@@ -302,7 +302,9 @@
                                         const GrClipMaskManager::ScissorState& scissorState,
                                         const GrStencilSettings& stencilSettings) {
     // Only compare the subset of GrDrawState relevant to path stenciling?
-    this->recordStateIfNecessary(ds, GrGpu::kStencilPath_DrawType, NULL);
+    if (!this->recordStateAndShouldDraw(ds, GrGpu::kStencilPath_DrawType, NULL)) {
+        return;
+    }
     StencilPath* sp = GrNEW_APPEND_TO_RECORDER(fCmdBuffer, StencilPath, (path));
     sp->fScissorState = scissorState;
     sp->fStencilSettings = stencilSettings;
@@ -315,7 +317,9 @@
                                      const GrStencilSettings& stencilSettings,
                                      const GrDeviceCoordTexture* dstCopy) {
     // TODO: Only compare the subset of GrDrawState relevant to path covering?
-    this->recordStateIfNecessary(ds, GrGpu::kDrawPath_DrawType, dstCopy);
+    if (!this->recordStateAndShouldDraw(ds, GrGpu::kDrawPath_DrawType, dstCopy)) {
+        return;
+    }
     DrawPath* dp = GrNEW_APPEND_TO_RECORDER(fCmdBuffer, DrawPath, (path));
     if (dstCopy) {
         dp->fDstCopy = *dstCopy;
@@ -338,7 +342,9 @@
     SkASSERT(indices);
     SkASSERT(transforms);
 
-    this->recordStateIfNecessary(ds, GrGpu::kDrawPaths_DrawType, dstCopy);
+    if (!this->recordStateAndShouldDraw(ds, GrGpu::kDrawPath_DrawType, dstCopy)) {
+        return;
+    }
 
     uint32_t* savedIndices = fPathIndexBuffer.append(count, indices);
     float* savedTransforms = fPathTransformBuffer.append(count *
@@ -441,7 +447,7 @@
     this->resetIndexSource();
 
     fCmdBuffer.reset();
-    fLastState = NULL;
+    fLastState.reset(NULL);
     fVertexPool.reset();
     fIndexPool.reset();
     reset_data_buffer(&fPathIndexBuffer, kPathIdxBufferMinReserve);
@@ -474,8 +480,8 @@
     int currCmdMarker = 0;
     fDstGpu->saveActiveTraceMarkers();
 
-    // Gpu no longer maintains the current drawstate, so we track the setstate calls below.
-    // NOTE: we always record a new drawstate at flush boundaries
+    // Updated every time we find a set state cmd to reflect the current state in the playback
+    // stream.
     SkAutoTUnref<const GrOptDrawState> currentOptState;
 
     while (iter.next()) {
@@ -490,10 +496,7 @@
 
         if (kSetState_Cmd == strip_trace_bit(iter->fType)) {
             SetState* ss = reinterpret_cast<SetState*>(iter.get());
-            currentOptState.reset(GrOptDrawState::Create(ss->fState,
-                                                         fDstGpu,
-                                                         &ss->fDstCopy,
-                                                         ss->fDrawType));
+            currentOptState.reset(SkRef(ss->fState.get()));
         } else {
             iter->execute(this, currentOptState.get());
         }
@@ -511,34 +514,22 @@
 }
 
 void GrInOrderDrawBuffer::Draw::execute(GrInOrderDrawBuffer* buf, const GrOptDrawState* optState) {
-    if (!optState) {
-        return;
-    }
     buf->fDstGpu->draw(*optState, fInfo, fScissorState);
 }
 
 void GrInOrderDrawBuffer::StencilPath::execute(GrInOrderDrawBuffer* buf,
                                                const GrOptDrawState* optState) {
-    if (!optState) {
-        return;
-    }
     buf->fDstGpu->stencilPath(*optState, this->path(), fScissorState, fStencilSettings);
 }
 
 void GrInOrderDrawBuffer::DrawPath::execute(GrInOrderDrawBuffer* buf,
                                             const GrOptDrawState* optState) {
-    if (!optState) {
-        return;
-    }
     buf->fDstGpu->drawPath(*optState, this->path(), fScissorState, fStencilSettings,
                            fDstCopy.texture() ? &fDstCopy : NULL);
 }
 
 void GrInOrderDrawBuffer::DrawPaths::execute(GrInOrderDrawBuffer* buf,
                                              const GrOptDrawState* optState) {
-    if (!optState) {
-        return;
-    }
     buf->fDstGpu->drawPaths(*optState, this->pathRange(),
                             &buf->fPathIndexBuffer[fIndicesLocation], fCount,
                             &buf->fPathTransformBuffer[fTransformsLocation], fTransformsType,
@@ -745,45 +736,23 @@
     }
 }
 
-void GrInOrderDrawBuffer::recordStateIfNecessary(const GrDrawState& ds,
-                                                 GrGpu::DrawType drawType,
-                                                 const GrDeviceCoordTexture* dstCopy) {
-    if (!fLastState) {
-        SetState* ss = GrNEW_APPEND_TO_RECORDER(fCmdBuffer, SetState, (ds));
-        fLastState = &ss->fState;
+bool GrInOrderDrawBuffer::recordStateAndShouldDraw(const GrDrawState& ds,
+                                                   GrGpu::DrawType drawType,
+                                                   const GrDeviceCoordTexture* dstCopy) {
+    SkAutoTUnref<GrOptDrawState> optState(GrOptDrawState::Create(ds, fDstGpu, dstCopy, drawType));
+    if (!optState) {
+        return false;
+    }
+    if (!fLastState || *optState != *fLastState) {
+        SetState* ss = GrNEW_APPEND_TO_RECORDER(fCmdBuffer, SetState, (optState));
+        fLastState.reset(SkRef(optState.get()));
         if (dstCopy) {
             ss->fDstCopy = *dstCopy;
         }
         ss->fDrawType = drawType;
-        this->convertDrawStateToPendingExec(fLastState);
         this->recordTraceMarkersIfNecessary();
-        return;
     }
-    switch (GrDrawState::CombineIfPossible(*fLastState, ds, *this->caps())) {
-        case GrDrawState::kIncompatible_CombinedState: {
-            SetState* ss = GrNEW_APPEND_TO_RECORDER(fCmdBuffer, SetState, (ds));
-            fLastState = &ss->fState;
-            if (dstCopy) {
-                ss->fDstCopy = *dstCopy;
-            }
-            ss->fDrawType = drawType;
-            this->convertDrawStateToPendingExec(fLastState);
-            this->recordTraceMarkersIfNecessary();
-            break;
-        }
-        case GrDrawState::kA_CombinedState:
-        case GrDrawState::kAOrB_CombinedState: // Treat the same as kA.
-            break;
-        case GrDrawState::kB_CombinedState:
-            // prev has already been converted to pending execution. That is a one-way ticket.
-            // So here we just destruct the previous state and reinit with a new copy of curr.
-            // Note that this goes away when we move GrIODB over to taking optimized snapshots
-            // of draw states.
-            fLastState->~GrDrawState();
-            SkNEW_PLACEMENT_ARGS(fLastState, GrDrawState, (ds));
-            this->convertDrawStateToPendingExec(fLastState);
-            break;
-    }
+    return true;
 }
 
 void GrInOrderDrawBuffer::recordTraceMarkersIfNecessary() {
diff --git a/src/gpu/GrInOrderDrawBuffer.h b/src/gpu/GrInOrderDrawBuffer.h
index 1d5480a..58b239b 100644
--- a/src/gpu/GrInOrderDrawBuffer.h
+++ b/src/gpu/GrInOrderDrawBuffer.h
@@ -9,13 +9,12 @@
 #define GrInOrderDrawBuffer_DEFINED
 
 #include "GrDrawTarget.h"
-#include "GrAllocPool.h"
-#include "GrAllocator.h"
 #include "GrGpu.h"
 #include "GrIndexBuffer.h"
-#include "GrRenderTarget.h"
+#include "GrOptDrawState.h"
 #include "GrPath.h"
 #include "GrPathRange.h"
+#include "GrRenderTarget.h"
 #include "GrSurface.h"
 #include "GrTRecorder.h"
 #include "GrVertexBuffer.h"
@@ -224,63 +223,60 @@
     };
 
     struct SetState : public Cmd {
-        SetState(const GrDrawState& state) : Cmd(kSetState_Cmd), fState(state) {}
+        SetState(const GrOptDrawState* state) : Cmd(kSetState_Cmd), fState(SkRef(state)) {}
 
         virtual void execute(GrInOrderDrawBuffer*, const GrOptDrawState*);
 
-        GrDrawState fState;
-        GrGpu::DrawType fDrawType;
-        GrDeviceCoordTexture fDstCopy;
+        SkAutoTUnref<const GrOptDrawState>  fState;
+        GrGpu::DrawType                     fDrawType;
+        GrDeviceCoordTexture                fDstCopy;
     };
 
     typedef void* TCmdAlign; // This wouldn't be enough align if a command used long double.
     typedef GrTRecorder<Cmd, TCmdAlign> CmdBuffer;
 
     // overrides from GrDrawTarget
-    virtual void onDraw(const GrDrawState&,
-                        const DrawInfo&,
-                        const GrClipMaskManager::ScissorState&) SK_OVERRIDE;
-    virtual void onDrawRect(GrDrawState*,
-                            const SkRect& rect,
-                            const SkRect* localRect,
-                            const SkMatrix* localMatrix) SK_OVERRIDE;
+    void onDraw(const GrDrawState&,
+                const DrawInfo&,
+                const GrClipMaskManager::ScissorState&) SK_OVERRIDE;
+    void onDrawRect(GrDrawState*,
+                    const SkRect& rect,
+                    const SkRect* localRect,
+                    const SkMatrix* localMatrix) SK_OVERRIDE;
 
-    virtual void onStencilPath(const GrDrawState&,
-                               const GrPath*,
-                               const GrClipMaskManager::ScissorState&,
-                               const GrStencilSettings&) SK_OVERRIDE;
-    virtual void onDrawPath(const GrDrawState&,
-                            const GrPath*,
-                            const GrClipMaskManager::ScissorState&,
-                            const GrStencilSettings&,
-                            const GrDeviceCoordTexture* dstCopy) SK_OVERRIDE;
-    virtual void onDrawPaths(const GrDrawState&,
-                             const GrPathRange*,
-                             const uint32_t indices[],
-                             int count,
-                             const float transforms[],
-                             PathTransformType,
-                             const GrClipMaskManager::ScissorState&,
-                             const GrStencilSettings&,
-                             const GrDeviceCoordTexture*) SK_OVERRIDE;
-    virtual void onClear(const SkIRect* rect,
-                         GrColor color,
-                         bool canIgnoreRect,
-                         GrRenderTarget* renderTarget) SK_OVERRIDE;
-    virtual void setDrawBuffers(DrawInfo*) SK_OVERRIDE;
+    void onStencilPath(const GrDrawState&,
+                       const GrPath*,
+                       const GrClipMaskManager::ScissorState&,
+                       const GrStencilSettings&) SK_OVERRIDE;
+    void onDrawPath(const GrDrawState&,
+                    const GrPath*,
+                    const GrClipMaskManager::ScissorState&,
+                    const GrStencilSettings&,
+                    const GrDeviceCoordTexture* dstCopy) SK_OVERRIDE;
+    void onDrawPaths(const GrDrawState&,
+                     const GrPathRange*,
+                     const uint32_t indices[],
+                     int count,
+                     const float transforms[],
+                     PathTransformType,
+                     const GrClipMaskManager::ScissorState&,
+                     const GrStencilSettings&,
+                     const GrDeviceCoordTexture*) SK_OVERRIDE;
+    void onClear(const SkIRect* rect,
+                 GrColor color,
+                 bool canIgnoreRect,
+                 GrRenderTarget* renderTarget) SK_OVERRIDE;
+    void setDrawBuffers(DrawInfo*) SK_OVERRIDE;
 
-    virtual bool onReserveVertexSpace(size_t vertexSize,
-                                      int vertexCount,
-                                      void** vertices) SK_OVERRIDE;
-    virtual bool onReserveIndexSpace(int indexCount,
-                                     void** indices) SK_OVERRIDE;
-    virtual void releaseReservedVertexSpace() SK_OVERRIDE;
-    virtual void releaseReservedIndexSpace() SK_OVERRIDE;
-    virtual void geometrySourceWillPush() SK_OVERRIDE;
-    virtual void geometrySourceWillPop(const GeometrySrcState& restoredState) SK_OVERRIDE;
-    virtual void willReserveVertexAndIndexSpace(int vertexCount,
-                                                size_t vertexStride,
-                                                int indexCount) SK_OVERRIDE;
+    bool onReserveVertexSpace(size_t vertexSize, int vertexCount, void** vertices) SK_OVERRIDE;
+    bool onReserveIndexSpace(int indexCount, void** indices) SK_OVERRIDE;
+    void releaseReservedVertexSpace() SK_OVERRIDE;
+    void releaseReservedIndexSpace() SK_OVERRIDE;
+    void geometrySourceWillPush() SK_OVERRIDE;
+    void geometrySourceWillPop(const GeometrySrcState& restoredState) SK_OVERRIDE;
+    void willReserveVertexAndIndexSpace(int vertexCount,
+                                        size_t vertexStride,
+                                        int indexCount) SK_OVERRIDE;
 
     // Attempts to concat instances from info onto the previous draw. info must represent an
     // instanced draw. The caller must have already recorded a new draw state and clip if necessary.
@@ -288,8 +284,12 @@
                             const DrawInfo&,
                             const GrClipMaskManager::ScissorState&);
 
-    // Determines whether the current draw operation requieres a new drawstate and if so records it.
-    void recordStateIfNecessary(const GrDrawState&, GrGpu::DrawType, const GrDeviceCoordTexture*);
+    // Determines whether the current draw operation requires a new GrOptDrawState and if so
+    // records it. If the draw can be skipped false is returned and no new GrOptDrawState is
+    // recorded.
+    bool SK_WARN_UNUSED_RESULT recordStateAndShouldDraw(const GrDrawState&,
+                                                        GrGpu::DrawType,
+                                                        const GrDeviceCoordTexture*);
     // We lazily record clip changes in order to skip clips that have no effect.
     void recordClipIfNecessary();
     // Records any trace markers for a command after adding it to the buffer.
@@ -305,15 +305,6 @@
         kGeoPoolStatePreAllocCnt     = 4,
     };
 
-    CmdBuffer                         fCmdBuffer;
-    GrDrawState*                      fLastState;
-    SkTArray<GrTraceMarkerSet, false> fGpuCmdMarkers;
-    GrGpu*                            fDstGpu;
-    GrVertexBufferAllocPool&          fVertexPool;
-    GrIndexBufferAllocPool&           fIndexPool;
-    SkTDArray<uint32_t>               fPathIndexBuffer;
-    SkTDArray<float>                  fPathTransformBuffer;
-
     struct GeometryPoolState {
         const GrVertexBuffer*   fPoolVertexBuffer;
         int                     fPoolStartVertex;
@@ -328,9 +319,17 @@
 
     typedef SkSTArray<kGeoPoolStatePreAllocCnt, GeometryPoolState> GeoPoolStateStack;
 
-    GeoPoolStateStack                                   fGeoPoolStateStack;
-    bool                                                fFlushing;
-    uint32_t                                            fDrawID;
+    CmdBuffer                           fCmdBuffer;
+    SkAutoTUnref<const GrOptDrawState>  fLastState;
+    SkTArray<GrTraceMarkerSet, false>   fGpuCmdMarkers;
+    GrGpu*                              fDstGpu;
+    GrVertexBufferAllocPool&            fVertexPool;
+    GrIndexBufferAllocPool&             fIndexPool;
+    SkTDArray<uint32_t>                 fPathIndexBuffer;
+    SkTDArray<float>                    fPathTransformBuffer;
+    GeoPoolStateStack                   fGeoPoolStateStack;
+    bool                                fFlushing;
+    uint32_t                            fDrawID;
 
     typedef GrClipTarget INHERITED;
 };
diff --git a/src/gpu/GrOptDrawState.cpp b/src/gpu/GrOptDrawState.cpp
index 4a258c2..4b20fa9 100644
--- a/src/gpu/GrOptDrawState.cpp
+++ b/src/gpu/GrOptDrawState.cpp
@@ -19,8 +19,8 @@
                                GrBlendCoeff optDstCoeff,
                                GrGpu* gpu,
                                const GrDeviceCoordTexture* dstCopy,
-                               GrGpu::DrawType drawType) {
-    fRenderTarget.set(SkSafeRef(drawState.getRenderTarget()), kWrite_GrIOType);
+                               GrGpu::DrawType drawType)
+: fRenderTarget(drawState.fRenderTarget.get()) {
     fViewMatrix = drawState.getViewMatrix();
     fBlendConstant = drawState.getBlendConstant();
     fFlagBits = drawState.getFlagBits();
@@ -66,26 +66,21 @@
     SkASSERT(GrGpu::IsPathRenderingDrawType(drawType) ||
              GrGpu::kStencilPath_DrawType ||
              drawState.hasGeometryProcessor());
-    if (drawState.hasGeometryProcessor()) {
-        fGeometryProcessor.initAndRef(drawState.fGeometryProcessor);
-    } else {
-        fGeometryProcessor.reset(NULL);
-    }
+    fGeometryProcessor.reset(drawState.getGeometryProcessor());
 
-    // Copy Color Stages from DS to ODS
-    if (firstColorStageIdx < drawState.numColorStages()) {
-        fFragmentStages.reset(&drawState.getColorStage(firstColorStageIdx),
-                              drawState.numColorStages() - firstColorStageIdx);
-    } else {
-        fFragmentStages.reset();
-    }
+    // Copy Stages from DS to ODS
+    bool explicitLocalCoords = descInfo.hasLocalCoordAttribute();
 
+    for (int i = firstColorStageIdx; i < drawState.numColorStages(); ++i) {
+        SkNEW_APPEND_TO_TARRAY(&fFragmentStages,
+                               GrPendingFragmentStage,
+                               (drawState.fColorStages[i], explicitLocalCoords));
+    }
     fNumColorStages = fFragmentStages.count();
-
-    // Copy Coverage Stages from DS to ODS
-    if (firstCoverageStageIdx < drawState.numCoverageStages()) {
-        fFragmentStages.push_back_n(drawState.numCoverageStages() - firstCoverageStageIdx,
-                                    &drawState.getCoverageStage(firstCoverageStageIdx));
+    for (int i = firstCoverageStageIdx; i < drawState.numCoverageStages(); ++i) {
+        SkNEW_APPEND_TO_TARRAY(&fFragmentStages,
+                               GrPendingFragmentStage,
+                               (drawState.fCoverageStages[i], explicitLocalCoords));
     }
 
     this->setOutputStateInfo(drawState, *gpu->caps(), &descInfo);
@@ -257,10 +252,6 @@
 ////////////////////////////////////////////////////////////////////////////////
 
 bool GrOptDrawState::operator== (const GrOptDrawState& that) const {
-    return this->isEqual(that);
-}
-
-bool GrOptDrawState::isEqual(const GrOptDrawState& that) const {
     if (this->fDesc != that.fDesc) {
         return false;
     }
@@ -298,10 +289,11 @@
         return false;
     }
 
-    bool explicitLocalCoords = this->fDesc.header().fLocalCoordAttributeIndex != -1;
+    // The program desc comparison should have already assured that the stage counts match.
+    SkASSERT(this->numFragmentStages() == that.numFragmentStages());
     for (int i = 0; i < this->numFragmentStages(); i++) {
-        if (!GrFragmentStage::AreCompatible(this->getFragmentStage(i), that.getFragmentStage(i),
-                                            explicitLocalCoords)) {
+
+        if (this->getFragmentStage(i) != that.getFragmentStage(i)) {
             return false;
         }
     }
diff --git a/src/gpu/GrOptDrawState.h b/src/gpu/GrOptDrawState.h
index c44bb6f..ab531ee 100644
--- a/src/gpu/GrOptDrawState.h
+++ b/src/gpu/GrOptDrawState.h
@@ -10,7 +10,7 @@
 
 #include "GrColor.h"
 #include "GrGpu.h"
-#include "GrProcessorStage.h"
+#include "GrPendingFragmentStage.h"
 #include "GrProgramDesc.h"
 #include "GrStencil.h"
 #include "GrTypesPriv.h"
@@ -26,6 +26,8 @@
  */
 class GrOptDrawState : public SkRefCnt {
 public:
+    SK_DECLARE_INST_COUNT(GrOptDrawState)
+
     /**
      * Returns a snapshot of the current optimized state. The GrOptDrawState is reffed and ownership
      * is given to the caller.
@@ -36,6 +38,7 @@
                                   GrGpu::DrawType drawType);
 
     bool operator== (const GrOptDrawState& that) const;
+    bool operator!= (const GrOptDrawState& that) const { return !(*this == that); }
 
     ///////////////////////////////////////////////////////////////////////////
     /// @name Vertex Attributes
@@ -87,9 +90,6 @@
     /// The input color to the first color-stage is either the constant color or interpolated
     /// per-vertex colors. The input to the first coverage stage is either a constant coverage
     /// (usually full-coverage) or interpolated per-vertex coverage.
-    ///
-    /// See the documentation of kCoverageDrawing_StateBit for information about disabling the
-    /// the color / coverage distinction.
     ////
 
     int numColorStages() const { return fNumColorStages; }
@@ -101,15 +101,17 @@
 
     bool hasGeometryProcessor() const { return SkToBool(fGeometryProcessor.get()); }
     const GrGeometryProcessor* getGeometryProcessor() const { return fGeometryProcessor.get(); }
-    const GrFragmentStage& getColorStage(int idx) const {
+    const GrPendingFragmentStage& getColorStage(int idx) const {
         SkASSERT(idx < this->numColorStages());
         return fFragmentStages[idx];
     }
-    const GrFragmentStage& getCoverageStage(int idx) const {
+    const GrPendingFragmentStage& getCoverageStage(int idx) const {
         SkASSERT(idx < this->numCoverageStages());
         return fFragmentStages[fNumColorStages + idx];
     }
-    const GrFragmentStage& getFragmentStage(int idx) const { return fFragmentStages[idx]; }
+    const GrPendingFragmentStage& getFragmentStage(int idx) const {
+        return fFragmentStages[idx];
+    }
 
     /// @}
 
@@ -138,26 +140,6 @@
      */
     const SkMatrix& getViewMatrix() const { return fViewMatrix; }
 
-    /**
-     *  Retrieves the inverse of the current view matrix.
-     *
-     *  If the current view matrix is invertible, return true, and if matrix
-     *  is non-null, copy the inverse into it. If the current view matrix is
-     *  non-invertible, return false and ignore the matrix parameter.
-     *
-     * @param matrix if not null, will receive a copy of the current inverse.
-     */
-    bool getViewInverse(SkMatrix* matrix) const {
-        SkMatrix inverse;
-        if (fViewMatrix.invert(&inverse)) {
-            if (matrix) {
-                *matrix = inverse;
-            }
-            return true;
-        }
-        return false;
-    }
-
     /// @}
 
     ///////////////////////////////////////////////////////////////////////////
@@ -185,10 +167,41 @@
     /// @name State Flags
     ////
 
+    bool isDitherState() const { return 0 != (fFlagBits & kDither_StateBit); }
+    bool isHWAntialiasState() const { return 0 != (fFlagBits & kHWAntialias_StateBit); }
+    bool isColorWriteDisabled() const { return 0 != (fFlagBits & kNoColorWrites_StateBit); }
+    bool isCoverageDrawing() const { return 0 != (fFlagBits & kCoverageDrawing_StateBit); }
+
+    /// @}
+
+    ///////////////////////////////////////////////////////////////////////////
+    /// @name Face Culling
+    ////
+
+    enum DrawFace {
+        kInvalid_DrawFace = -1,
+
+        kBoth_DrawFace,
+        kCCW_DrawFace,
+        kCW_DrawFace,
+    };
+
     /**
-     *  Flags that affect rendering. Controlled using enable/disableState(). All
-     *  default to disabled.
+     * Gets whether the target is drawing clockwise, counterclockwise,
+     * or both faces.
+     * @return the current draw face(s).
      */
+    DrawFace getDrawFace() const { return fDrawFace; }
+
+    /// @}
+
+    ///////////////////////////////////////////////////////////////////////////
+
+
+    const GrProgramDesc& programDesc() const { return fDesc; }
+
+private:
+    // This is lifted from GrDrawState. This should be revised and made specific to this class/
     enum StateBits {
         /**
          * Perform dithering. TODO: Re-evaluate whether we need this bit
@@ -218,63 +231,9 @@
          * control over the blend coeffs. When set, there will be a single blend step controlled by
          * setBlendFunc() which will use coverage*color as the src color.
          */
-         kCoverageDrawing_StateBit = 0x10,
-
-        // Users of the class may add additional bits to the vector
-        kDummyStateBit,
-        kLastPublicStateBit = kDummyStateBit-1,
+         kCoverageDrawing_StateBit = 0x10
     };
-
-    bool isStateFlagEnabled(uint32_t stateBit) const { return 0 != (stateBit & fFlagBits); }
-
-    bool isDitherState() const { return 0 != (fFlagBits & kDither_StateBit); }
-    bool isHWAntialiasState() const { return 0 != (fFlagBits & kHWAntialias_StateBit); }
-    bool isClipState() const { return 0 != (fFlagBits & kClip_StateBit); }
-    bool isColorWriteDisabled() const { return 0 != (fFlagBits & kNoColorWrites_StateBit); }
-    bool isCoverageDrawing() const { return 0 != (fFlagBits & kCoverageDrawing_StateBit); }
-
-    /// @}
-
-    ///////////////////////////////////////////////////////////////////////////
-    /// @name Face Culling
-    ////
-
-    enum DrawFace {
-        kInvalid_DrawFace = -1,
-
-        kBoth_DrawFace,
-        kCCW_DrawFace,
-        kCW_DrawFace,
-    };
-
-    /**
-     * Gets whether the target is drawing clockwise, counterclockwise,
-     * or both faces.
-     * @return the current draw face(s).
-     */
-    DrawFace getDrawFace() const { return fDrawFace; }
-
-    /// @}
-
-    ///////////////////////////////////////////////////////////////////////////
-
-    /** Return type for CombineIfPossible. */
-    enum CombinedState {
-        /** The GrDrawStates cannot be combined. */
-        kIncompatible_CombinedState,
-        /** Either draw state can be used in place of the other. */
-        kAOrB_CombinedState,
-        /** Use the first draw state. */
-        kA_CombinedState,
-        /** Use the second draw state. */
-        kB_CombinedState,
-    };
-
-    /// @}
-
-    const GrProgramDesc& programDesc() const { return fDesc; }
-
-private:
+      
     /**
      * Optimizations for blending / coverage to that can be applied based on the current state.
      */
@@ -358,11 +317,10 @@
     void setOutputStateInfo(const GrDrawState& ds, const GrDrawTargetCaps&,
                             GrProgramDesc::DescInfo*);
 
-    bool isEqual(const GrOptDrawState& that) const;
-
-    // These fields are roughly sorted by decreasing likelihood of being different in op==
-    typedef GrTGpuResourceRef<GrRenderTarget> ProgramRenderTarget;
-    ProgramRenderTarget                 fRenderTarget;
+    typedef GrPendingIOResource<GrRenderTarget, kWrite_GrIOType> RenderTarget;
+    typedef SkSTArray<8, GrPendingFragmentStage> FragmentStageArray;
+    typedef GrPendingProgramElement<const GrGeometryProcessor> ProgramGeometryProcessor;
+    RenderTarget                        fRenderTarget;
     GrColor                             fColor;
     SkMatrix                            fViewMatrix;
     GrColor                             fBlendConstant;
@@ -376,8 +334,6 @@
     GrBlendCoeff                        fSrcBlend;
     GrBlendCoeff                        fDstBlend;
 
-    typedef SkSTArray<8, GrFragmentStage> FragmentStageArray;
-    typedef GrProgramElementRef<const GrGeometryProcessor> ProgramGeometryProcessor;
     ProgramGeometryProcessor            fGeometryProcessor;
     FragmentStageArray                  fFragmentStages;
 
diff --git a/src/gpu/GrPendingFragmentStage.h b/src/gpu/GrPendingFragmentStage.h
new file mode 100644
index 0000000..6c61029
--- /dev/null
+++ b/src/gpu/GrPendingFragmentStage.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2014 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrPendingProcessorStage_DEFINED
+#define GrPendingProcessorStage_DEFINED
+
+#include "GrFragmentStage.h"
+#include "GrCoordTransform.h"
+#include "GrFragmentProcessor.h"
+#include "GrPendingProgramElement.h"
+#include "SkMatrix.h"
+
+/**
+ * This a baked variant of GrFragmentStage, as recorded in GrOptDrawState.
+ */
+class GrPendingFragmentStage {
+public:
+    GrPendingFragmentStage(const GrFragmentStage& stage, bool ignoreMatrix)
+    : fProc(stage.getProcessor())
+    , fCoordChangeMatrix(ignoreMatrix ? SkMatrix::I() : stage.getCoordChangeMatrix()) {
+    }
+
+    GrPendingFragmentStage(const GrPendingFragmentStage& that) { *this = that; }
+
+    GrPendingFragmentStage& operator=(const GrPendingFragmentStage& that) {
+        fProc.reset(that.fProc.get());
+        fCoordChangeMatrix = that.fCoordChangeMatrix;
+        return *this;
+    }
+
+    bool operator==(const GrPendingFragmentStage& that) const {
+        return this->getProcessor()->isEqual(*that.getProcessor()) &&
+               fCoordChangeMatrix == that.fCoordChangeMatrix;
+    }
+
+    bool operator!=(const GrPendingFragmentStage& that) const { return !(*this == that); }
+
+    const SkMatrix& getCoordChangeMatrix() const { return fCoordChangeMatrix; }
+
+    /**
+     * For a coord transform on the fragment processor, does it or the coord change matrix (if
+     * relevant) contain perspective?
+     */
+    bool isPerspectiveCoordTransform(int matrixIndex) const {
+        const GrCoordTransform& coordTransform = this->getProcessor()->coordTransform(matrixIndex);
+        uint32_t type = coordTransform.getMatrix().getType();
+        if (kLocal_GrCoordSet == coordTransform.sourceCoords()) {
+            type |= this->getCoordChangeMatrix().getType();
+        }
+
+        return SkToBool(SkMatrix::kPerspective_Mask & type);
+    }
+
+    const char* name() const { return fProc->name(); }
+
+    const GrFragmentProcessor* getProcessor() const { return fProc.get(); }
+
+protected:
+    GrPendingProgramElement<const GrFragmentProcessor>  fProc;
+    SkMatrix                                            fCoordChangeMatrix;
+};
+#endif
diff --git a/src/gpu/GrPendingProgramElement.h b/src/gpu/GrPendingProgramElement.h
new file mode 100644
index 0000000..7285ecb
--- /dev/null
+++ b/src/gpu/GrPendingProgramElement.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2014 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef GrPendingProgramElement_DEFINED
+#define GrPendingProgramElement_DEFINED
+
+#include "SkRefCnt.h"
+#include "GrTypes.h"
+
+/**
+ * Helper for owning a pending execution on a GrProgramElement. Using this rather than ref allows
+ * resources that are owned by the program element to be correctly tracked as having pending reads
+ * and writes rather than refs.
+ */
+template <typename T> class GrPendingProgramElement : SkNoncopyable {
+public:
+    GrPendingProgramElement() : fObj(NULL) { };
+
+    // Adds a pending execution on obj.
+    explicit GrPendingProgramElement(T* obj) : fObj(obj)  {
+        if (obj) {
+            obj->addPendingExecution();
+        }
+    }
+
+    void reset(T* obj) {
+        if (obj) {
+            obj->addPendingExecution();
+        }
+        if (fObj) {
+            fObj->completedExecution();
+        }
+        fObj = obj;
+    }
+
+    T* get() const { return fObj; }
+    operator T*() { return fObj; }
+
+    T *operator->() const { return fObj; }
+
+    ~GrPendingProgramElement() {
+        if (fObj) {
+            fObj->completedExecution();
+        }
+    }
+
+private:
+    T*   fObj;
+
+    typedef SkNoncopyable INHERITED;
+};
+#endif
diff --git a/src/gpu/GrProcOptInfo.cpp b/src/gpu/GrProcOptInfo.cpp
index c3ca100..18a3202 100644
--- a/src/gpu/GrProcOptInfo.cpp
+++ b/src/gpu/GrProcOptInfo.cpp
@@ -8,7 +8,7 @@
 #include "GrProcOptInfo.h"
 
 #include "GrGeometryProcessor.h"
-#include "GrProcessorStage.h"
+#include "GrFragmentStage.h"
 
 void GrProcOptInfo::calcWithInitialValues(const GrFragmentStage* stages,
                                           int stageCount,
diff --git a/src/gpu/GrProgramElement.cpp b/src/gpu/GrProgramElement.cpp
index 89d53e2..6611234 100644
--- a/src/gpu/GrProgramElement.cpp
+++ b/src/gpu/GrProgramElement.cpp
@@ -17,38 +17,9 @@
     return id;
 }
 
-void GrProgramElement::convertRefToPendingExecution() const {
-    // This function makes it so that all the GrGpuResourceRefs own a single ref to their
-    // underlying GrGpuResource if there are any refs to the GrProgramElement and a single
-    // pending read/write if there are any pending executions of the GrProgramElement. The
-    // GrGpuResourceRef will give up its single ref and/or pending read/write in its destructor.
-    SkASSERT(fRefCnt > 0);
-    if (0 == fPendingExecutions) {
-        for (int i = 0; i < fGpuResources.count(); ++i) {
-            fGpuResources[i]->markPendingIO();
-        }
-    }
-    ++fPendingExecutions;
-    this->unref();
-    if (0 == fRefCnt) {
-        this->removeRefs();
-    }
-}
-
-void GrProgramElement::completedExecution() const {
-    this->validate();
-    --fPendingExecutions;
-    if (0 == fPendingExecutions) {
-        if (0 == fRefCnt) {
-            SkDELETE(this);
-        } else {
-            // Now our pending executions have ocurred and we still have refs. Convert
-            // ownership of our resources back to regular refs.
-            for (int i = 0; i < fGpuResources.count(); ++i) {
-                fGpuResources[i]->pendingIOComplete();
-            }
-
-        }
+void GrProgramElement::addPendingIOs() const {
+    for (int i = 0; i < fGpuResources.count(); ++i) {
+        fGpuResources[i]->markPendingIO();
     }
 }
 
@@ -57,3 +28,10 @@
         fGpuResources[i]->removeRef();
     }
 }
+
+void GrProgramElement::pendingIOComplete() const {
+    for (int i = 0; i < fGpuResources.count(); ++i) {
+        fGpuResources[i]->pendingIOComplete();
+    }
+}
+
diff --git a/src/gpu/effects/GrConfigConversionEffect.h b/src/gpu/effects/GrConfigConversionEffect.h
index 35b3b57..41ae1ac 100644
--- a/src/gpu/effects/GrConfigConversionEffect.h
+++ b/src/gpu/effects/GrConfigConversionEffect.h
@@ -35,7 +35,6 @@
         kPMConversionCnt
     };
 
-    // Installs an effect in the GrProcessorStage to perform a config conversion.
     static const GrFragmentProcessor* Create(GrTexture*, bool swapRedAndBlue, PMConversion,
                                              const SkMatrix&);
 
diff --git a/src/gpu/gl/GrGLProcessor.h b/src/gpu/gl/GrGLProcessor.h
index 331607a..f5ebf51 100644
--- a/src/gpu/gl/GrGLProcessor.h
+++ b/src/gpu/gl/GrGLProcessor.h
@@ -72,11 +72,9 @@
 
     /** A GrGLProcessor instance can be reused with any GrProcessor that produces the same stage
         key; this function reads data from a GrProcessor and uploads any uniform variables required
-        by the shaders created in emitCode(). The GrProcessor installed in the GrDrawEffect is
-        guaranteed to be of the same type that created this GrGLProcessor and to have an identical
-        effect key as the one that created this GrGLProcessor. Effects that use local coords have
-        to consider whether the GrProcessorStage's coord change matrix should be used. When explicit
-        local coordinates are used it can be ignored. */
+        by the shaders created in emitCode(). The GrProcessor parameter is guaranteed to be of the
+        same type that created this GrGLProcessor and to have an identical effect key as the one
+        that created this GrGLProcessor.  */
     virtual void setData(const GrGLProgramDataManager&, const GrProcessor&) {}
 
     const char* name() const { return fFactory.name(); }
diff --git a/src/gpu/gl/GrGLProgram.cpp b/src/gpu/gl/GrGLProgram.cpp
index 3456a42..0e67e81 100644
--- a/src/gpu/gl/GrGLProgram.cpp
+++ b/src/gpu/gl/GrGLProgram.cpp
@@ -25,17 +25,16 @@
 /**
  * Retrieves the final matrix that a transform needs to apply to its source coords.
  */
-static SkMatrix get_transform_matrix(const GrFragmentStage& processorStage,
+static SkMatrix get_transform_matrix(const GrPendingFragmentStage& stage,
                                      bool useExplicitLocalCoords,
                                      int transformIdx) {
-    const GrCoordTransform& coordTransform =
-            processorStage.getProcessor()->coordTransform(transformIdx);
+    const GrCoordTransform& coordTransform = stage.getProcessor()->coordTransform(transformIdx);
     SkMatrix combined;
 
     if (kLocal_GrCoordSet == coordTransform.sourceCoords()) {
         // If we have explicit local coords then we shouldn't need a coord change.
         const SkMatrix& ccm =
-                useExplicitLocalCoords ? SkMatrix::I() : processorStage.getCoordChangeMatrix();
+                useExplicitLocalCoords ? SkMatrix::I() : stage.getCoordChangeMatrix();
         combined.setConcat(coordTransform.getMatrix(), ccm);
     } else {
         combined = coordTransform.getMatrix();
@@ -175,14 +174,15 @@
 void GrGLProgram::setFragmentData(const GrOptDrawState& optState) {
     int numProcessors = fFragmentProcessors->fProcs.count();
     for (int e = 0; e < numProcessors; ++e) {
-        const GrFragmentStage& stage = optState.getFragmentStage(e);
+        const GrPendingFragmentStage& stage = optState.getFragmentStage(e);
         const GrProcessor& processor = *stage.getProcessor();
         fFragmentProcessors->fProcs[e]->fGLProc->setData(fProgramDataManager, processor);
         this->setTransformData(stage, fFragmentProcessors->fProcs[e]);
         this->bindTextures(fFragmentProcessors->fProcs[e], processor);
     }
 }
-void GrGLProgram::setTransformData(const GrFragmentStage& processor, GrGLInstalledFragProc* ip) {
+void GrGLProgram::setTransformData(const GrPendingFragmentStage& processor,
+                                   GrGLInstalledFragProc* ip) {
     SkTArray<GrGLInstalledFragProc::Transform, true>& transforms = ip->fTransforms;
     int numTransforms = transforms.count();
     SkASSERT(numTransforms == processor.getProcessor()->numTransforms());
@@ -329,7 +329,8 @@
     SkASSERT(GrGpu::IsPathRenderingDrawType(drawType));
 }
 
-void GrGLNvprProgram::setTransformData(const GrFragmentStage& proc, GrGLInstalledFragProc* ip) {
+void GrGLNvprProgram::setTransformData(const GrPendingFragmentStage& proc,
+                                       GrGLInstalledFragProc* ip) {
     SkTArray<GrGLInstalledFragProc::Transform, true>& transforms = ip->fTransforms;
     int numTransforms = transforms.count();
     SkASSERT(numTransforms == proc.getProcessor()->numTransforms());
@@ -370,7 +371,8 @@
 }
 
 void
-GrGLLegacyNvprProgram::setTransformData(const GrFragmentStage& proc, GrGLInstalledFragProc* ip) {
+GrGLLegacyNvprProgram::setTransformData(const GrPendingFragmentStage& proc,
+                                        GrGLInstalledFragProc* ip) {
     // We've hidden the texcoord index in the first entry of the transforms array for each effect
     int texCoordIndex = ip->fTransforms[0].fHandle.handle();
     int numTransforms = proc.getProcessor()->numTransforms();
@@ -378,7 +380,7 @@
         const SkMatrix& transform = get_transform_matrix(proc, false, t);
         GrGLPathRendering::PathTexGenComponents components =
                 GrGLPathRendering::kST_PathTexGenComponents;
-        if (proc.isPerspectiveCoordTransform(t, false)) {
+        if (proc.isPerspectiveCoordTransform(t)) {
             components = GrGLPathRendering::kSTR_PathTexGenComponents;
         }
         fGpu->glPathRendering()->enablePathTexGen(texCoordIndex++, components, transform);
diff --git a/src/gpu/gl/GrGLProgram.h b/src/gpu/gl/GrGLProgram.h
index e37d54d..c623977 100644
--- a/src/gpu/gl/GrGLProgram.h
+++ b/src/gpu/gl/GrGLProgram.h
@@ -159,7 +159,7 @@
 
     // A templated helper to loop over effects, set the transforms(via subclass) and bind textures
     void setFragmentData(const GrOptDrawState&);
-    virtual void setTransformData(const GrFragmentStage& effectStage, GrGLInstalledFragProc* pe);
+    virtual void setTransformData(const GrPendingFragmentStage&, GrGLInstalledFragProc*);
     void bindTextures(const GrGLInstalledProc*, const GrProcessor&);
 
     /*
@@ -215,7 +215,7 @@
 
 class GrGLNvprProgram : public GrGLNvprProgramBase {
 public:
-      virtual bool hasVertexShader() const SK_OVERRIDE { return true; }
+    virtual bool hasVertexShader() const SK_OVERRIDE { return true; }
 
 private:
     typedef GrGLNvprProgramBuilder::SeparableVaryingInfo SeparableVaryingInfo;
@@ -228,7 +228,8 @@
                     GrGLInstalledFragProcs* fragmentProcessors,
                     const SeparableVaryingInfoArray& separableVaryings);
     virtual void didSetData(GrGpu::DrawType) SK_OVERRIDE;
-    virtual void setTransformData(const GrFragmentStage&, GrGLInstalledFragProc*) SK_OVERRIDE;
+    virtual void setTransformData(const GrPendingFragmentStage&,
+                                  GrGLInstalledFragProc*) SK_OVERRIDE;
 
     struct Varying {
         GrGLint     fLocation;
@@ -256,7 +257,8 @@
                           GrGLInstalledFragProcs* fragmentProcessors,
                           int texCoordSetCnt);
     virtual void didSetData(GrGpu::DrawType) SK_OVERRIDE;
-    virtual void setTransformData(const GrFragmentStage&, GrGLInstalledFragProc*) SK_OVERRIDE;
+    virtual void setTransformData(const GrPendingFragmentStage&,
+                                  GrGLInstalledFragProc*) SK_OVERRIDE;
 
     int fTexCoordSetCnt;
 
diff --git a/src/gpu/gl/GrGLProgramDesc.cpp b/src/gpu/gl/GrGLProgramDesc.cpp
index 836400e..f8510ff 100644
--- a/src/gpu/gl/GrGLProgramDesc.cpp
+++ b/src/gpu/gl/GrGLProgramDesc.cpp
@@ -73,19 +73,19 @@
     return key;
 }
 
-static uint32_t gen_transform_key(const GrFragmentStage& effectStage,
+static uint32_t gen_transform_key(const GrPendingFragmentStage& stage,
                                   bool useExplicitLocalCoords) {
     uint32_t totalKey = 0;
-    int numTransforms = effectStage.getProcessor()->numTransforms();
+    int numTransforms = stage.getProcessor()->numTransforms();
     for (int t = 0; t < numTransforms; ++t) {
         uint32_t key = 0;
-        if (effectStage.isPerspectiveCoordTransform(t, useExplicitLocalCoords)) {
+        if (stage.isPerspectiveCoordTransform(t)) {
             key |= kGeneral_MatrixType;
         } else {
             key |= kNoPersp_MatrixType;
         }
 
-        const GrCoordTransform& coordTransform = effectStage.getProcessor()->coordTransform(t);
+        const GrCoordTransform& coordTransform = stage.getProcessor()->coordTransform(t);
         if (kLocal_GrCoordSet != coordTransform.sourceCoords() && useExplicitLocalCoords) {
             key |= kPositionCoords_Flag;
         }
@@ -161,8 +161,8 @@
 };
 
 struct FragmentProcessorKeyBuilder {
-    typedef GrFragmentStage StagedProcessor;
-    static bool GetProcessorKey(const GrFragmentStage& fps,
+    typedef GrPendingFragmentStage StagedProcessor;
+    static bool GetProcessorKey(const GrPendingFragmentStage& fps,
                                 const GrGLCaps& caps,
                                 bool useLocalCoords,
                                 GrProcessorKeyBuilder* b,
diff --git a/src/gpu/gl/builders/GrGLLegacyNvprProgramBuilder.cpp b/src/gpu/gl/builders/GrGLLegacyNvprProgramBuilder.cpp
index c0c4fbb..b251593 100644
--- a/src/gpu/gl/builders/GrGLLegacyNvprProgramBuilder.cpp
+++ b/src/gpu/gl/builders/GrGLLegacyNvprProgramBuilder.cpp
@@ -21,7 +21,7 @@
     return firstFreeCoordSet;
 }
 
-void GrGLLegacyNvprProgramBuilder::emitTransforms(const GrFragmentStage& processorStage,
+void GrGLLegacyNvprProgramBuilder::emitTransforms(const GrPendingFragmentStage& processorStage,
                                             GrGLProcessor::TransformedCoordsArray* outCoords,
                                             GrGLInstalledFragProc* ifp) {
     int numTransforms = processorStage.getProcessor()->numTransforms();
@@ -35,8 +35,8 @@
 
     SkString name;
     for (int t = 0; t < numTransforms; ++t) {
-        GrSLType type = processorStage.isPerspectiveCoordTransform(t, false) ? kVec3f_GrSLType :
-                                                                               kVec2f_GrSLType;
+        GrSLType type = processorStage.isPerspectiveCoordTransform(t) ? kVec3f_GrSLType :
+                                                                        kVec2f_GrSLType;
 
         name.printf("%s(gl_TexCoord[%i])", GrGLSLTypeString(type), texCoordIndex++);
         SkNEW_APPEND_TO_TARRAY(outCoords, GrGLProcessor::TransformedCoords, (name, type));
diff --git a/src/gpu/gl/builders/GrGLLegacyNvprProgramBuilder.h b/src/gpu/gl/builders/GrGLLegacyNvprProgramBuilder.h
index cd2cfb7..b25759e 100644
--- a/src/gpu/gl/builders/GrGLLegacyNvprProgramBuilder.h
+++ b/src/gpu/gl/builders/GrGLLegacyNvprProgramBuilder.h
@@ -18,9 +18,9 @@
 
 private:
     int addTexCoordSets(int count);
-    void emitTransforms(const GrFragmentStage&,
+    void emitTransforms(const GrPendingFragmentStage&,
                         GrGLProcessor::TransformedCoordsArray* outCoords,
-                        GrGLInstalledFragProc*);
+                        GrGLInstalledFragProc*) SK_OVERRIDE;
 
     int fTexCoordSetCnt;
 
diff --git a/src/gpu/gl/builders/GrGLNvprProgramBuilder.cpp b/src/gpu/gl/builders/GrGLNvprProgramBuilder.cpp
index 5488252..f5a5586 100644
--- a/src/gpu/gl/builders/GrGLNvprProgramBuilder.cpp
+++ b/src/gpu/gl/builders/GrGLNvprProgramBuilder.cpp
@@ -17,7 +17,7 @@
         , fSeparableVaryingInfos(kVarsPerBlock) {
 }
 
-void GrGLNvprProgramBuilder::emitTransforms(const GrFragmentStage& processorStage,
+void GrGLNvprProgramBuilder::emitTransforms(const GrPendingFragmentStage& processorStage,
                                             GrGLProcessor::TransformedCoordsArray* outCoords,
                                             GrGLInstalledFragProc* ifp) {
     const GrFragmentProcessor* effect = processorStage.getProcessor();
@@ -27,7 +27,7 @@
 
     for (int t = 0; t < numTransforms; t++) {
         GrSLType varyingType =
-                processorStage.isPerspectiveCoordTransform(t, false) ?
+                processorStage.isPerspectiveCoordTransform(t) ?
                         kVec3f_GrSLType :
                         kVec2f_GrSLType;
 
diff --git a/src/gpu/gl/builders/GrGLNvprProgramBuilder.h b/src/gpu/gl/builders/GrGLNvprProgramBuilder.h
index e9f6b3b..48fa96c 100644
--- a/src/gpu/gl/builders/GrGLNvprProgramBuilder.h
+++ b/src/gpu/gl/builders/GrGLNvprProgramBuilder.h
@@ -28,7 +28,7 @@
     virtual GrGLProgram* createProgram(GrGLuint programID);
 
 private:
-    virtual void emitTransforms(const GrFragmentStage&,
+    virtual void emitTransforms(const GrPendingFragmentStage&,
                                 GrGLProcessor::TransformedCoordsArray* outCoords,
                                 GrGLInstalledFragProc*) SK_OVERRIDE;
 
diff --git a/src/gpu/gl/builders/GrGLProgramBuilder.cpp b/src/gpu/gl/builders/GrGLProgramBuilder.cpp
index f628db9..e56a83d 100644
--- a/src/gpu/gl/builders/GrGLProgramBuilder.cpp
+++ b/src/gpu/gl/builders/GrGLProgramBuilder.cpp
@@ -266,8 +266,8 @@
                                 GrGLProgramDescBuilder::kProcessorKeyOffsetsAndLengthOffset);
     for (int e = procOffset; e < numProcs; ++e) {
         GrGLSLExpr4 output;
-        const GrFragmentStage& stage = fOptState.getFragmentStage(e);
-        this->emitAndInstallProc<GrFragmentStage>(stage, e, keyProvider, *inOut, &output);
+        const GrPendingFragmentStage& stage = fOptState.getFragmentStage(e);
+        this->emitAndInstallProc<GrPendingFragmentStage>(stage, e, keyProvider, *inOut, &output);
         *inOut = output;
     }
 }
@@ -305,7 +305,7 @@
     fFS.codeAppend("}");
 }
 
-void GrGLProgramBuilder::emitAndInstallProc(const GrFragmentStage& fs,
+void GrGLProgramBuilder::emitAndInstallProc(const GrPendingFragmentStage& fs,
                                             const GrProcessorKey& key,
                                             const char* outColor,
                                             const char* inColor) {
@@ -358,20 +358,17 @@
     SkASSERT(fFS.hasReadDstColor() == fp.willReadDstColor());
 }
 
-void GrGLProgramBuilder::emitTransforms(const GrFragmentStage& effectStage,
+void GrGLProgramBuilder::emitTransforms(const GrPendingFragmentStage& stage,
                                         GrGLProcessor::TransformedCoordsArray* outCoords,
                                         GrGLInstalledFragProc* ifp) {
-    const GrFragmentProcessor* effect = effectStage.getProcessor();
-    int numTransforms = effect->numTransforms();
+    const GrFragmentProcessor* processor = stage.getProcessor();
+    int numTransforms = processor->numTransforms();
     ifp->fTransforms.push_back_n(numTransforms);
 
     for (int t = 0; t < numTransforms; t++) {
         const char* uniName = "StageMatrix";
-        GrSLType varyingType =
-                effectStage.isPerspectiveCoordTransform(t, fVS.hasLocalCoords()) ?
-                        kVec3f_GrSLType :
-                        kVec2f_GrSLType;
-
+        GrSLType varyingType = stage.isPerspectiveCoordTransform(t) ? kVec3f_GrSLType :
+                                                                      kVec2f_GrSLType;
         SkString suffixedUniName;
         if (0 != t) {
             suffixedUniName.append(uniName);
@@ -390,7 +387,7 @@
             suffixedVaryingName.appendf("_%i", t);
             varyingName = suffixedVaryingName.c_str();
         }
-        const char* coords = kPosition_GrCoordSet == effect->coordTransform(t).sourceCoords() ?
+        const char* coords = kPosition_GrCoordSet == processor->coordTransform(t).sourceCoords() ?
                                                      fVS.positionAttribute().c_str() :
                                                      fVS.localCoordsAttribute().c_str();
         GrGLVertToFrag v(varyingType);
diff --git a/src/gpu/gl/builders/GrGLProgramBuilder.h b/src/gpu/gl/builders/GrGLProgramBuilder.h
index a44fa91..1b6c904 100644
--- a/src/gpu/gl/builders/GrGLProgramBuilder.h
+++ b/src/gpu/gl/builders/GrGLProgramBuilder.h
@@ -14,6 +14,7 @@
 #include "../GrGLProgramDataManager.h"
 #include "../GrGLUniformHandle.h"
 #include "../GrGLGeometryProcessor.h"
+#include "../../GrPendingFragmentStage.h"
 
 /*
  * This is the base class for a series of interfaces.  This base class *MUST* remain abstract with
@@ -252,7 +253,7 @@
                             GrGLSLExpr4* output);
 
     // these emit functions help to keep the createAndEmitProcessors template general
-    void emitAndInstallProc(const GrFragmentStage&,
+    void emitAndInstallProc(const GrPendingFragmentStage&,
                             const GrProcessorKey&,
                             const char* outColor,
                             const char* inColor);
@@ -267,7 +268,7 @@
                       GrGLInstalledProc*);
 
     // each specific program builder has a distinct transform and must override this function
-    virtual void emitTransforms(const GrFragmentStage&,
+    virtual void emitTransforms(const GrPendingFragmentStage&,
                                 GrGLProcessor::TransformedCoordsArray* outCoords,
                                 GrGLInstalledFragProc*);
     GrGLProgram* finalize();
diff --git a/tests/GLProgramsTest.cpp b/tests/GLProgramsTest.cpp
index 7c3ba0f..326bfb1 100644
--- a/tests/GLProgramsTest.cpp
+++ b/tests/GLProgramsTest.cpp
@@ -172,7 +172,7 @@
                           GrDrawState* ds,
                           SkRandom* random,
                           GrTexture* dummyTextures[]) {
-    GrProgramElementRef<const GrGeometryProcessor> gp(
+    SkAutoTUnref<const GrGeometryProcessor> gp(
             GrProcessorTestFactory<GrGeometryProcessor>::CreateStage(random,
                                                                      context,
                                                                      caps,
@@ -228,7 +228,7 @@
 
     int currTextureCoordSet = 0;
     for (int s = 0; s < numProcs;) {
-        GrProgramElementRef<GrFragmentProcessor> fp(
+        SkAutoTUnref<const GrFragmentProcessor> fp(
                 GrProcessorTestFactory<GrFragmentProcessor>::CreateStage(random,
                                                                          gpu->getContext(),
                                                                          *gpu->caps(),