Revert of Move state management to GrInOrderDrawBuffer (patchset #4 id:60001 of https://codereview.chromium.org/1120143002/)

Reason for revert:
Breaks windows

Original issue's description:
> Move state management to GrInOrderDrawBuffer
>
> BUG=skia:
>
> Committed: https://skia.googlesource.com/skia/+/5d6bb6f795143ca360b868560b52165de51fa269

TBR=bsalomon@google.com,robertphillips@google.com,joshualitt@chromium.org
NOPRESUBMIT=true
NOTREECHECKS=true
NOTRY=true
BUG=skia:

Review URL: https://codereview.chromium.org/1119353002
diff --git a/src/gpu/GrAtlasTextContext.h b/src/gpu/GrAtlasTextContext.h
index c6e1d2f..c0b8b00 100644
--- a/src/gpu/GrAtlasTextContext.h
+++ b/src/gpu/GrAtlasTextContext.h
@@ -61,6 +61,7 @@
      * The only thing(aside from a memcopy) required to flush a BitmapTextBlob is to ensure that
      * the GrAtlas will not evict anything the Blob needs.
      */
+    // TODO Pack these bytes
     struct BitmapTextBlob : public SkRefCnt {
         SK_DECLARE_INTERNAL_LLIST_INTERFACE(BitmapTextBlob);
 
diff --git a/src/gpu/GrInOrderDrawBuffer.cpp b/src/gpu/GrInOrderDrawBuffer.cpp
index 76e085d..e298be6 100644
--- a/src/gpu/GrInOrderDrawBuffer.cpp
+++ b/src/gpu/GrInOrderDrawBuffer.cpp
@@ -17,7 +17,6 @@
     , fCommands(context->getGpu(), vertexPool, indexPool)
     , fPathIndexBuffer(kPathIdxBufferMinReserve * sizeof(char)/4)
     , fPathTransformBuffer(kPathXformBufferMinReserve * sizeof(float)/4)
-    , fPipelineBuffer(kPipelineBufferMinReserve)
     , fDrawID(0) {
 
     SkASSERT(vertexPool);
@@ -301,12 +300,7 @@
 
 void GrInOrderDrawBuffer::onDrawBatch(GrBatch* batch,
                                       const PipelineInfo& pipelineInfo) {
-    State* state = this->setupPipelineAndShouldDraw(batch, pipelineInfo);
-    if (!state) {
-        return;
-    }
-
-    GrTargetCommands::Cmd* cmd = fCommands.recordDrawBatch(state, batch);
+    GrTargetCommands::Cmd* cmd = fCommands.recordDrawBatch(this, batch, pipelineInfo);
     this->recordTraceMarkersIfNecessary(cmd);
 }
 
@@ -315,7 +309,7 @@
                                         const GrPath* path,
                                         const GrScissorState& scissorState,
                                         const GrStencilSettings& stencilSettings) {
-    GrTargetCommands::Cmd* cmd = fCommands.recordStencilPath(pipelineBuilder,
+    GrTargetCommands::Cmd* cmd = fCommands.recordStencilPath(this, pipelineBuilder, 
                                                              pathProc, path, scissorState,
                                                              stencilSettings);
     this->recordTraceMarkersIfNecessary(cmd);
@@ -325,11 +319,9 @@
                                      const GrPath* path,
                                      const GrStencilSettings& stencilSettings,
                                      const PipelineInfo& pipelineInfo) {
-    State* state = this->setupPipelineAndShouldDraw(pathProc, pipelineInfo);
-    if (!state) {
-        return;
-    }
-    GrTargetCommands::Cmd* cmd = fCommands.recordDrawPath(state, pathProc, path, stencilSettings);
+    GrTargetCommands::Cmd* cmd = fCommands.recordDrawPath(this, pathProc,
+                                                          path, stencilSettings,
+                                                          pipelineInfo);
     this->recordTraceMarkersIfNecessary(cmd);
 }
 
@@ -342,11 +334,7 @@
                                       int count,
                                       const GrStencilSettings& stencilSettings,
                                       const PipelineInfo& pipelineInfo) {
-    State* state = this->setupPipelineAndShouldDraw(pathProc, pipelineInfo);
-    if (!state) {
-        return;
-    }
-    GrTargetCommands::Cmd* cmd = fCommands.recordDrawPaths(state, this, pathProc, pathRange,
+    GrTargetCommands::Cmd* cmd = fCommands.recordDrawPaths(this, pathProc, pathRange,
                                                            indices, indexType, transformValues,
                                                            transformType, count,
                                                            stencilSettings, pipelineInfo);
@@ -355,14 +343,16 @@
 
 void GrInOrderDrawBuffer::onClear(const SkIRect* rect, GrColor color,
                                   bool canIgnoreRect, GrRenderTarget* renderTarget) {
-    GrTargetCommands::Cmd* cmd = fCommands.recordClear(rect, color, canIgnoreRect, renderTarget);
+    GrTargetCommands::Cmd* cmd = fCommands.recordClear(this, rect, color,
+                                                       canIgnoreRect, renderTarget);
     this->recordTraceMarkersIfNecessary(cmd);
 }
 
 void GrInOrderDrawBuffer::clearStencilClip(const SkIRect& rect,
                                            bool insideClip,
                                            GrRenderTarget* renderTarget) {
-    GrTargetCommands::Cmd* cmd = fCommands.recordClearStencilClip(rect, insideClip, renderTarget);
+    GrTargetCommands::Cmd* cmd = fCommands.recordClearStencilClip(this, rect,
+                                                                  insideClip, renderTarget);
     this->recordTraceMarkersIfNecessary(cmd);
 }
 
@@ -371,7 +361,7 @@
         return;
     }
 
-    GrTargetCommands::Cmd* cmd = fCommands.recordDiscard(renderTarget);
+    GrTargetCommands::Cmd* cmd = fCommands.recordDiscard(this, renderTarget);
     this->recordTraceMarkersIfNecessary(cmd);
 }
 
@@ -380,15 +370,6 @@
     fPathIndexBuffer.rewind();
     fPathTransformBuffer.rewind();
     fGpuCmdMarkers.reset();
-
-    fPrevState.reset(NULL);
-    // Note, fPrevState points into fPipelineBuffer's allocation, so we have to reset first.
-    // Furthermore, we have to reset fCommands before fPipelineBuffer too.
-    if (fDrawID % kPipelineBufferHighWaterMark) {
-        fPipelineBuffer.rewind();
-    } else {
-        fPipelineBuffer.reset();
-    }
 }
 
 void GrInOrderDrawBuffer::onFlush() {
@@ -419,55 +400,3 @@
         }
     }
 }
-
-GrTargetCommands::State*
-GrInOrderDrawBuffer::setupPipelineAndShouldDraw(const GrPrimitiveProcessor* primProc,
-                                                const GrDrawTarget::PipelineInfo& pipelineInfo) {
-    State* state = this->allocState();
-    this->setupPipeline(pipelineInfo, state->pipelineLocation());
-
-    if (state->getPipeline()->mustSkip()) {
-        this->unallocState(state);
-        return NULL;
-    }
-
-    state->fPrimitiveProcessor->initBatchTracker(&state->fBatchTracker,
-                                                 state->getPipeline()->getInitBatchTracker());
-
-    if (fPrevState && fPrevState->fPrimitiveProcessor.get() &&
-        fPrevState->fPrimitiveProcessor->canMakeEqual(fPrevState->fBatchTracker,
-                                                      *state->fPrimitiveProcessor,
-                                                      state->fBatchTracker) &&
-        fPrevState->getPipeline()->isEqual(*state->getPipeline())) {
-        this->unallocState(state);
-    } else {
-        fPrevState.reset(state);
-    }
-
-    fCommands.recordXferBarrierIfNecessary(*fPrevState->getPipeline(), this);
-    return fPrevState;
-}
-
-GrTargetCommands::State*
-GrInOrderDrawBuffer::setupPipelineAndShouldDraw(GrBatch* batch,
-                                                const GrDrawTarget::PipelineInfo& pipelineInfo) {
-    State* state = this->allocState();
-    this->setupPipeline(pipelineInfo, state->pipelineLocation());
-
-    if (state->getPipeline()->mustSkip()) {
-        this->unallocState(state);
-        return NULL;
-    }
-
-    batch->initBatchTracker(state->getPipeline()->getInitBatchTracker());
-
-    if (fPrevState && !fPrevState->fPrimitiveProcessor.get() &&
-        fPrevState->getPipeline()->isEqual(*state->getPipeline())) {
-        this->unallocState(state);
-    } else {
-        fPrevState.reset(state);
-    }
-
-    fCommands.recordXferBarrierIfNecessary(*fPrevState->getPipeline(), this);
-    return fPrevState;
-}
diff --git a/src/gpu/GrInOrderDrawBuffer.h b/src/gpu/GrInOrderDrawBuffer.h
index b253768..ce82c88 100644
--- a/src/gpu/GrInOrderDrawBuffer.h
+++ b/src/gpu/GrInOrderDrawBuffer.h
@@ -72,17 +72,6 @@
 
 private:
     friend class GrTargetCommands;
-    typedef GrTargetCommands::State State;
-
-    State* allocState(const GrPrimitiveProcessor* primProc = NULL) {
-        void* allocation = fPipelineBuffer.alloc(sizeof(State), SkChunkAlloc::kThrow_AllocFailType);
-        return SkNEW_PLACEMENT_ARGS(allocation, State, (primProc));
-    }
-
-    void unallocState(State* state) {
-        state->unref();
-        fPipelineBuffer.unalloc(state);
-    }
 
     void onReset() override;
     void onFlush() override;
@@ -95,6 +84,7 @@
                     const SkRect& rect,
                     const SkRect* localRect,
                     const SkMatrix* localMatrix) override;
+
     void onStencilPath(const GrPipelineBuilder&,
                        const GrPathProcessor*,
                        const GrPath*,
@@ -132,29 +122,17 @@
     }
     bool isIssued(uint32_t drawID) override { return drawID != fDrawID; }
 
-    State* SK_WARN_UNUSED_RESULT setupPipelineAndShouldDraw(const GrPrimitiveProcessor*,
-                                                            const GrDrawTarget::PipelineInfo&);
-    State* SK_WARN_UNUSED_RESULT setupPipelineAndShouldDraw(GrBatch*,
-                                                            const GrDrawTarget::PipelineInfo&);
-
     // TODO: Use a single allocator for commands and records
     enum {
         kPathIdxBufferMinReserve     = 2 * 64,  // 64 uint16_t's
         kPathXformBufferMinReserve   = 2 * 64,  // 64 two-float transforms
-        kPipelineBufferMinReserve    = 32 * sizeof(State),
     };
 
-    // every 100 flushes we should reset our fPipelineBuffer to prevent us from holding at a
-    // highwater mark
-    static const int kPipelineBufferHighWaterMark = 100;
-
     GrTargetCommands                    fCommands;
     SkTArray<GrTraceMarkerSet, false>   fGpuCmdMarkers;
     SkChunkAlloc                        fPathIndexBuffer;
     SkChunkAlloc                        fPathTransformBuffer;
-    SkChunkAlloc                        fPipelineBuffer;
     uint32_t                            fDrawID;
-    SkAutoTUnref<State>                 fPrevState;
 
     typedef GrClipTarget INHERITED;
 };
diff --git a/src/gpu/GrTargetCommands.cpp b/src/gpu/GrTargetCommands.cpp
index d68a242..a6d3148 100644
--- a/src/gpu/GrTargetCommands.cpp
+++ b/src/gpu/GrTargetCommands.cpp
@@ -26,19 +26,27 @@
     return isWinding;
 }
 
-GrTargetCommands::Cmd* GrTargetCommands::recordDrawBatch(State* state, GrBatch* batch) {
+GrTargetCommands::Cmd* GrTargetCommands::recordDrawBatch(
+                                                  GrInOrderDrawBuffer* iodb,
+                                                  GrBatch* batch,
+                                                  const GrDrawTarget::PipelineInfo& pipelineInfo) {
+    if (!this->setupPipelineAndShouldDraw(iodb, batch, pipelineInfo)) {
+        return NULL;
+    }
+
     // Check if there is a Batch Draw we can batch with
-    if (!fCmdBuffer.empty() && Cmd::kDrawBatch_CmdType == fCmdBuffer.back().type()) {
+    if (Cmd::kDrawBatch_CmdType == fCmdBuffer.back().type()) {
         DrawBatch* previous = static_cast<DrawBatch*>(&fCmdBuffer.back());
-        if (previous->fState == state && previous->fBatch->combineIfPossible(batch)) {
+        if (previous->fBatch->combineIfPossible(batch)) {
             return NULL;
         }
     }
 
-    return GrNEW_APPEND_TO_RECORDER(fCmdBuffer, DrawBatch, (state, batch, &fBatchTarget));
+    return GrNEW_APPEND_TO_RECORDER(fCmdBuffer, DrawBatch, (batch, &fBatchTarget));
 }
 
 GrTargetCommands::Cmd* GrTargetCommands::recordStencilPath(
+                                                        GrInOrderDrawBuffer* iodb,
                                                         const GrPipelineBuilder& pipelineBuilder,
                                                         const GrPathProcessor* pathProc,
                                                         const GrPath* path,
@@ -55,17 +63,21 @@
 }
 
 GrTargetCommands::Cmd* GrTargetCommands::recordDrawPath(
-                                                  State* state,
+                                                  GrInOrderDrawBuffer* iodb,
                                                   const GrPathProcessor* pathProc,
                                                   const GrPath* path,
-                                                  const GrStencilSettings& stencilSettings) {
-    DrawPath* dp = GrNEW_APPEND_TO_RECORDER(fCmdBuffer, DrawPath, (state, path));
+                                                  const GrStencilSettings& stencilSettings,
+                                                  const GrDrawTarget::PipelineInfo& pipelineInfo) {
+    // TODO: Only compare the subset of GrPipelineBuilder relevant to path covering?
+    if (!this->setupPipelineAndShouldDraw(iodb, pathProc, pipelineInfo)) {
+        return NULL;
+    }
+    DrawPath* dp = GrNEW_APPEND_TO_RECORDER(fCmdBuffer, DrawPath, (path));
     dp->fStencilSettings = stencilSettings;
     return dp;
 }
 
 GrTargetCommands::Cmd* GrTargetCommands::recordDrawPaths(
-                                                  State* state,
                                                   GrInOrderDrawBuffer* iodb,
                                                   const GrPathProcessor* pathProc,
                                                   const GrPathRange* pathRange,
@@ -80,6 +92,10 @@
     SkASSERT(indexValues);
     SkASSERT(transformValues);
 
+    if (!this->setupPipelineAndShouldDraw(iodb, pathProc, pipelineInfo)) {
+        return NULL;
+    }
+
     char* savedIndices;
     float* savedTransforms;
     
@@ -87,7 +103,7 @@
                                      transformValues, transformType,
                                      count, &savedIndices, &savedTransforms);
 
-    if (!fCmdBuffer.empty() && Cmd::kDrawPaths_CmdType == fCmdBuffer.back().type()) {
+    if (Cmd::kDrawPaths_CmdType == fCmdBuffer.back().type()) {
         // The previous command was also DrawPaths. Try to collapse this call into the one
         // before. Note that stenciling all the paths at once, then covering, may not be
         // equivalent to two separate draw calls if there is overlap. Blending won't work,
@@ -101,8 +117,7 @@
             transformType == previous->fTransformType &&
             stencilSettings == previous->fStencilSettings &&
             path_fill_type_is_winding(stencilSettings) &&
-            !pipelineInfo.willBlendWithDst(pathProc) &&
-            previous->fState == state) {
+            !pipelineInfo.willBlendWithDst(pathProc)) {
                 const int indexBytes = GrPathRange::PathIndexSizeInBytes(indexType);
                 const int xformSize = GrPathRendering::PathTransformSize(transformType);
                 if (&previous->fIndices[previous->fCount*indexBytes] == savedIndices &&
@@ -115,7 +130,7 @@
         }
     }
 
-    DrawPaths* dp = GrNEW_APPEND_TO_RECORDER(fCmdBuffer, DrawPaths, (state, pathRange));
+    DrawPaths* dp = GrNEW_APPEND_TO_RECORDER(fCmdBuffer, DrawPaths, (pathRange));
     dp->fIndices = savedIndices;
     dp->fIndexType = indexType;
     dp->fTransforms = savedTransforms;
@@ -125,7 +140,8 @@
     return dp;
 }
 
-GrTargetCommands::Cmd* GrTargetCommands::recordClear(const SkIRect* rect,
+GrTargetCommands::Cmd* GrTargetCommands::recordClear(GrInOrderDrawBuffer* iodb,
+                                                     const SkIRect* rect, 
                                                      GrColor color,
                                                      bool canIgnoreRect,
                                                      GrRenderTarget* renderTarget) {
@@ -147,7 +163,8 @@
     return clr;
 }
 
-GrTargetCommands::Cmd* GrTargetCommands::recordClearStencilClip(const SkIRect& rect,
+GrTargetCommands::Cmd* GrTargetCommands::recordClearStencilClip(GrInOrderDrawBuffer* iodb,
+                                                                const SkIRect& rect,
                                                                 bool insideClip,
                                                                 GrRenderTarget* renderTarget) {
     SkASSERT(renderTarget);
@@ -158,7 +175,8 @@
     return clr;
 }
 
-GrTargetCommands::Cmd* GrTargetCommands::recordDiscard(GrRenderTarget* renderTarget) {
+GrTargetCommands::Cmd* GrTargetCommands::recordDiscard(GrInOrderDrawBuffer* iodb,
+                                                       GrRenderTarget* renderTarget) {
     SkASSERT(renderTarget);
 
     Clear* clr = GrNEW_APPEND_TO_RECORDER(fCmdBuffer, Clear, (renderTarget));
@@ -168,6 +186,7 @@
 
 void GrTargetCommands::reset() {
     fCmdBuffer.reset();
+    fPrevState = NULL;
 }
 
 void GrTargetCommands::flush(GrInOrderDrawBuffer* iodb) {
@@ -175,6 +194,10 @@
         return;
     }
 
+    // Updated every time we find a set state cmd to reflect the current state in the playback
+    // stream.
+    SetState* currentState = NULL;
+
     GrGpu* gpu = iodb->getGpu();
 
     // Loop over all batches and generate geometry
@@ -183,8 +206,13 @@
         if (Cmd::kDrawBatch_CmdType == genIter->type()) {
             DrawBatch* db = reinterpret_cast<DrawBatch*>(genIter.get());
             fBatchTarget.resetNumberOfDraws();
-            db->fBatch->generateGeometry(&fBatchTarget, db->fState->getPipeline());
+            db->execute(NULL, currentState);
             db->fBatch->setNumberOfDraws(fBatchTarget.numberOfDraws());
+        } else if (Cmd::kSetState_CmdType == genIter->type()) {
+            SetState* ss = reinterpret_cast<SetState*>(genIter.get());
+
+            ss->execute(gpu, currentState);
+            currentState = ss;
         }
     }
 
@@ -203,7 +231,29 @@
             gpu->addGpuTraceMarker(&newMarker);
         }
 
-        iter->execute(gpu);
+        if (Cmd::kDrawBatch_CmdType == iter->type()) {
+            DrawBatch* db = reinterpret_cast<DrawBatch*>(iter.get());
+            fBatchTarget.flushNext(db->fBatch->numberOfDraws());
+
+            if (iter->isTraced()) {
+                gpu->removeGpuTraceMarker(&newMarker);
+            }
+            continue;
+        }
+
+        if (Cmd::kSetState_CmdType == iter->type()) {
+            // TODO this is just until NVPR is in batch
+            SetState* ss = reinterpret_cast<SetState*>(iter.get());
+
+            if (ss->fPrimitiveProcessor) {
+                ss->execute(gpu, currentState);
+            }
+            currentState = ss;
+
+        } else {
+            iter->execute(gpu, currentState);
+        }
+
         if (iter->isTraced()) {
             gpu->removeGpuTraceMarker(&newMarker);
         }
@@ -212,7 +262,7 @@
     fBatchTarget.postFlush();
 }
 
-void GrTargetCommands::StencilPath::execute(GrGpu* gpu) {
+void GrTargetCommands::StencilPath::execute(GrGpu* gpu, const SetState*) {
     GrGpu::StencilPathState state;
     state.fRenderTarget = fRenderTarget.get();
     state.fScissor = &fScissor;
@@ -223,36 +273,37 @@
     gpu->stencilPath(this->path(), state);
 }
 
-void GrTargetCommands::DrawPath::execute(GrGpu* gpu) {
-    if (!fState->fCompiled) {
-        gpu->buildProgramDesc(&fState->fDesc, *fState->fPrimitiveProcessor, *fState->getPipeline(),
-                              fState->fBatchTracker);
-        fState->fCompiled = true;
-    }
-    DrawArgs args(fState->fPrimitiveProcessor.get(), fState->getPipeline(),
-                  &fState->fDesc, &fState->fBatchTracker);
+void GrTargetCommands::DrawPath::execute(GrGpu* gpu, const SetState* state) {
+    SkASSERT(state);
+    DrawArgs args(state->fPrimitiveProcessor.get(), state->getPipeline(), &state->fDesc,
+                  &state->fBatchTracker);
     gpu->drawPath(args, this->path(), fStencilSettings);
 }
 
-void GrTargetCommands::DrawPaths::execute(GrGpu* gpu) {
-    if (!fState->fCompiled) {
-        gpu->buildProgramDesc(&fState->fDesc, *fState->fPrimitiveProcessor, *fState->getPipeline(),
-                              fState->fBatchTracker);
-        fState->fCompiled = true;
-    }
-    DrawArgs args(fState->fPrimitiveProcessor.get(), fState->getPipeline(),
-                  &fState->fDesc, &fState->fBatchTracker);
+void GrTargetCommands::DrawPaths::execute(GrGpu* gpu, const SetState* state) {
+    SkASSERT(state);
+    DrawArgs args(state->fPrimitiveProcessor.get(), state->getPipeline(), &state->fDesc,
+                  &state->fBatchTracker);
     gpu->drawPaths(args, this->pathRange(),
                    fIndices, fIndexType,
                    fTransforms, fTransformType,
                    fCount, fStencilSettings);
 }
 
-void GrTargetCommands::DrawBatch::execute(GrGpu*) {
-    fBatchTarget->flushNext(fBatch->numberOfDraws());
+void GrTargetCommands::DrawBatch::execute(GrGpu*, const SetState* state) {
+    SkASSERT(state);
+    fBatch->generateGeometry(fBatchTarget, state->getPipeline());
 }
 
-void GrTargetCommands::Clear::execute(GrGpu* gpu) {
+void GrTargetCommands::SetState::execute(GrGpu* gpu, const SetState*) {
+    // TODO sometimes we have a prim proc, othertimes we have a GrBatch.  Eventually we
+    // will only have GrBatch and we can delete this
+    if (fPrimitiveProcessor) {
+        gpu->buildProgramDesc(&fDesc, *fPrimitiveProcessor, *getPipeline(), fBatchTracker);
+    }
+}
+
+void GrTargetCommands::Clear::execute(GrGpu* gpu, const SetState*) {
     if (GrColor_ILLEGAL == fColor) {
         gpu->discard(this->renderTarget());
     } else {
@@ -260,15 +311,15 @@
     }
 }
 
-void GrTargetCommands::ClearStencilClip::execute(GrGpu* gpu) {
+void GrTargetCommands::ClearStencilClip::execute(GrGpu* gpu, const SetState*) {
     gpu->clearStencilClip(fRect, fInsideClip, this->renderTarget());
 }
 
-void GrTargetCommands::CopySurface::execute(GrGpu* gpu) {
+void GrTargetCommands::CopySurface::execute(GrGpu* gpu, const SetState*) {
     gpu->copySurface(this->dst(), this->src(), fSrcRect, fDstPoint);
 }
 
-void GrTargetCommands::XferBarrier::execute(GrGpu* gpu) {
+void GrTargetCommands::XferBarrier::execute(GrGpu* gpu, const SetState* state) {
     gpu->xferBarrier(fBarrierType);
 }
 
@@ -282,10 +333,65 @@
     return cs;
 }
 
-void GrTargetCommands::recordXferBarrierIfNecessary(const GrPipeline& pipeline,
-                                                    GrInOrderDrawBuffer* iodb) {
-    const GrXferProcessor& xp = *pipeline.getXferProcessor();
-    GrRenderTarget* rt = pipeline.getRenderTarget();
+bool GrTargetCommands::setupPipelineAndShouldDraw(GrInOrderDrawBuffer* iodb,
+                                                  const GrPrimitiveProcessor* primProc,
+                                                  const GrDrawTarget::PipelineInfo& pipelineInfo) {
+    SetState* ss = GrNEW_APPEND_TO_RECORDER(fCmdBuffer, SetState, (primProc));
+    iodb->setupPipeline(pipelineInfo, ss->pipelineLocation()); 
+
+    if (ss->getPipeline()->mustSkip()) {
+        fCmdBuffer.pop_back();
+        return false;
+    }
+
+    ss->fPrimitiveProcessor->initBatchTracker(&ss->fBatchTracker,
+                                              ss->getPipeline()->getInitBatchTracker());
+
+    if (fPrevState && fPrevState->fPrimitiveProcessor.get() &&
+        fPrevState->fPrimitiveProcessor->canMakeEqual(fPrevState->fBatchTracker,
+                                                      *ss->fPrimitiveProcessor,
+                                                      ss->fBatchTracker) &&
+        fPrevState->getPipeline()->isEqual(*ss->getPipeline())) {
+        fCmdBuffer.pop_back();
+    } else {
+        fPrevState = ss;
+        iodb->recordTraceMarkersIfNecessary(ss);
+    }
+
+    this->recordXferBarrierIfNecessary(iodb, pipelineInfo);
+    return true;
+}
+
+bool GrTargetCommands::setupPipelineAndShouldDraw(GrInOrderDrawBuffer* iodb,
+                                                  GrBatch* batch,
+                                                  const GrDrawTarget::PipelineInfo& pipelineInfo) {
+    SetState* ss = GrNEW_APPEND_TO_RECORDER(fCmdBuffer, SetState, ());
+    iodb->setupPipeline(pipelineInfo, ss->pipelineLocation()); 
+
+    if (ss->getPipeline()->mustSkip()) {
+        fCmdBuffer.pop_back();
+        return false;
+    }
+
+    batch->initBatchTracker(ss->getPipeline()->getInitBatchTracker());
+
+    if (fPrevState && !fPrevState->fPrimitiveProcessor.get() &&
+        fPrevState->getPipeline()->isEqual(*ss->getPipeline())) {
+        fCmdBuffer.pop_back();
+    } else {
+        fPrevState = ss;
+        iodb->recordTraceMarkersIfNecessary(ss);
+    }
+
+    this->recordXferBarrierIfNecessary(iodb, pipelineInfo);
+    return true;
+}
+
+void GrTargetCommands::recordXferBarrierIfNecessary(GrInOrderDrawBuffer* iodb,
+                                                    const GrDrawTarget::PipelineInfo& info) {
+    SkASSERT(fPrevState);
+    const GrXferProcessor& xp = *fPrevState->getXferProcessor();
+    GrRenderTarget* rt = fPrevState->getRenderTarget();
 
     GrXferBarrierType barrierType;
     if (!xp.willNeedXferBarrier(rt, *iodb->caps(), &barrierType)) {
diff --git a/src/gpu/GrTargetCommands.h b/src/gpu/GrTargetCommands.h
index 8c0a3fd..f9259c7 100644
--- a/src/gpu/GrTargetCommands.h
+++ b/src/gpu/GrTargetCommands.h
@@ -24,7 +24,6 @@
 class GrIndexBufferAllocPool;
 
 class GrTargetCommands : ::SkNoncopyable {
-    struct State;
     struct SetState;
 
 public:
@@ -32,6 +31,7 @@
                      GrVertexBufferAllocPool* vertexPool,
                      GrIndexBufferAllocPool* indexPool)
         : fCmdBuffer(kCmdBufferInitialSizeInBytes)
+        , fPrevState(NULL)
         , fBatchTarget(gpu, vertexPool, indexPool) {
     }
 
@@ -51,7 +51,7 @@
         Cmd(CmdType type) : fMarkerID(-1), fType(type) {}
         virtual ~Cmd() {}
 
-        virtual void execute(GrGpu*) = 0;
+        virtual void execute(GrGpu*, const SetState*) = 0;
 
         CmdType type() const { return fType; }
 
@@ -68,23 +68,32 @@
     void reset();
     void flush(GrInOrderDrawBuffer*);
 
-    Cmd* recordClearStencilClip(const SkIRect& rect,
+    Cmd* recordClearStencilClip(GrInOrderDrawBuffer*,
+                                const SkIRect& rect,
                                 bool insideClip,
                                 GrRenderTarget* renderTarget);
 
-    Cmd* recordDiscard(GrRenderTarget*);
-    Cmd* recordDrawBatch(State*, GrBatch*);
-    Cmd* recordStencilPath(const GrPipelineBuilder&,
+    Cmd* recordDiscard(GrInOrderDrawBuffer*, GrRenderTarget*);
+
+    Cmd* recordDraw(GrInOrderDrawBuffer*,
+                    const GrGeometryProcessor*,
+                    const GrDrawTarget::DrawInfo&,
+                    const GrDrawTarget::PipelineInfo&);
+    Cmd* recordDrawBatch(GrInOrderDrawBuffer*,
+                         GrBatch*,
+                         const GrDrawTarget::PipelineInfo&);
+    Cmd* recordStencilPath(GrInOrderDrawBuffer*,
+                           const GrPipelineBuilder&,
                            const GrPathProcessor*,
                            const GrPath*,
                            const GrScissorState&,
                            const GrStencilSettings&);
-    Cmd* recordDrawPath(State*,
+    Cmd* recordDrawPath(GrInOrderDrawBuffer*,
                         const GrPathProcessor*,
                         const GrPath*,
-                        const GrStencilSettings&);
-    Cmd* recordDrawPaths(State*,
-                         GrInOrderDrawBuffer*,
+                        const GrStencilSettings&,
+                        const GrDrawTarget::PipelineInfo&);
+    Cmd* recordDrawPaths(GrInOrderDrawBuffer*,
                          const GrPathProcessor*,
                          const GrPathRange*,
                          const void*,
@@ -94,7 +103,8 @@
                          int,
                          const GrStencilSettings&,
                          const GrDrawTarget::PipelineInfo&);
-    Cmd* recordClear(const SkIRect* rect,
+    Cmd* recordClear(GrInOrderDrawBuffer*,
+                     const SkIRect* rect,
                      GrColor,
                      bool canIgnoreRect,
                      GrRenderTarget*);
@@ -108,51 +118,18 @@
 
     typedef GrGpu::DrawArgs DrawArgs;
 
-    void recordXferBarrierIfNecessary(const GrPipeline&, GrInOrderDrawBuffer*);
+    // Attempts to concat instances from info onto the previous draw. info must represent an
+    // instanced draw. The caller must have already recorded a new draw state and clip if necessary.
+    int concatInstancedDraw(GrInOrderDrawBuffer*, const GrDrawTarget::DrawInfo&);
 
-    // TODO: This can be just a pipeline once paths are in batch, and it should live elsewhere
-    struct State : public SkRefCnt {
-        // TODO get rid of the prim proc parameter when we use batch everywhere
-        State(const GrPrimitiveProcessor* primProc = NULL)
-            : fPrimitiveProcessor(primProc)
-            , fCompiled(false) {}
+    bool SK_WARN_UNUSED_RESULT setupPipelineAndShouldDraw(GrInOrderDrawBuffer*,
+                                                          const GrPrimitiveProcessor*,
+                                                          const GrDrawTarget::PipelineInfo&);
+    bool SK_WARN_UNUSED_RESULT setupPipelineAndShouldDraw(GrInOrderDrawBuffer*,
+                                                          GrBatch*,
+                                                          const GrDrawTarget::PipelineInfo&);
 
-        ~State() { reinterpret_cast<GrPipeline*>(fPipeline.get())->~GrPipeline(); }
-
-        // This function is only for getting the location in memory where we will create our
-        // pipeline object.
-        GrPipeline* pipelineLocation() { return reinterpret_cast<GrPipeline*>(fPipeline.get()); }
-
-        const GrPipeline* getPipeline() const {
-            return reinterpret_cast<const GrPipeline*>(fPipeline.get());
-        }
-        GrRenderTarget* getRenderTarget() const {
-            return this->getPipeline()->getRenderTarget();
-        }
-        const GrXferProcessor* getXferProcessor() const {
-            return this->getPipeline()->getXferProcessor();
-        }
-
-        void operator delete(void* p) {
-            //SkDebugf("destruction\n");
-        }
-        void* operator new(size_t) {
-            SkFAIL("All States are created by placement new.");
-            return sk_malloc_throw(0);
-        }
-
-        void* operator new(size_t, void* p) { return p; }
-        void operator delete(void* target, void* placement) {
-            ::operator delete(target, placement);
-        }
-
-        typedef GrPendingProgramElement<const GrPrimitiveProcessor> ProgramPrimitiveProcessor;
-        ProgramPrimitiveProcessor               fPrimitiveProcessor;
-        SkAlignedSStorage<sizeof(GrPipeline)>   fPipeline;
-        GrProgramDesc                           fDesc;
-        GrBatchTracker                          fBatchTracker;
-        bool                                    fCompiled;
-    };
+    void recordXferBarrierIfNecessary(GrInOrderDrawBuffer*, const GrDrawTarget::PipelineInfo&);
 
     struct StencilPath : public Cmd {
         StencilPath(const GrPath* path, GrRenderTarget* rt)
@@ -162,7 +139,7 @@
 
         const GrPath* path() const { return fPath.get(); }
 
-        void execute(GrGpu*) override;
+        void execute(GrGpu*, const SetState*) override;
 
         SkMatrix                                                fViewMatrix;
         bool                                                    fUseHWAA;
@@ -174,32 +151,25 @@
     };
 
     struct DrawPath : public Cmd {
-        DrawPath(State* state, const GrPath* path)
-            : Cmd(kDrawPath_CmdType)
-            , fState(SkRef(state))
-            , fPath(path) {}
+        DrawPath(const GrPath* path) : Cmd(kDrawPath_CmdType), fPath(path) {}
 
         const GrPath* path() const { return fPath.get(); }
 
-        void execute(GrGpu*) override;
+        void execute(GrGpu*, const SetState*) override;
 
-        SkAutoTUnref<State>     fState;
         GrStencilSettings       fStencilSettings;
+
     private:
         GrPendingIOResource<const GrPath, kRead_GrIOType> fPath;
     };
 
     struct DrawPaths : public Cmd {
-        DrawPaths(State* state, const GrPathRange* pathRange)
-            : Cmd(kDrawPaths_CmdType)
-            , fState(SkRef(state))
-            , fPathRange(pathRange) {}
+        DrawPaths(const GrPathRange* pathRange) : Cmd(kDrawPaths_CmdType), fPathRange(pathRange) {}
 
         const GrPathRange* pathRange() const { return fPathRange.get();  }
 
-        void execute(GrGpu*) override;
+        void execute(GrGpu*, const SetState*) override;
 
-        SkAutoTUnref<State>             fState;
         char*                           fIndices;
         GrDrawTarget::PathIndexType     fIndexType;
         float*                          fTransforms;
@@ -217,7 +187,7 @@
 
         GrRenderTarget* renderTarget() const { return fRenderTarget.get(); }
 
-        void execute(GrGpu*) override;
+        void execute(GrGpu*, const SetState*) override;
 
         SkIRect fRect;
         GrColor fColor;
@@ -233,7 +203,7 @@
 
         GrRenderTarget* renderTarget() const { return fRenderTarget.get(); }
 
-        void execute(GrGpu*) override;
+        void execute(GrGpu*, const SetState*) override;
 
         SkIRect fRect;
         bool    fInsideClip;
@@ -252,7 +222,7 @@
         GrSurface* dst() const { return fDst.get(); }
         GrSurface* src() const { return fSrc.get(); }
 
-        void execute(GrGpu*) override;
+        void execute(GrGpu*, const SetState*) override;
 
         SkIPoint    fDstPoint;
         SkIRect     fSrcRect;
@@ -262,18 +232,49 @@
         GrPendingIOResource<GrSurface, kRead_GrIOType> fSrc;
     };
 
+    // TODO: rename to SetPipeline once pp, batch tracker, and desc are removed
+    struct SetState : public Cmd {
+        // TODO get rid of the prim proc parameter when we use batch everywhere
+        SetState(const GrPrimitiveProcessor* primProc = NULL)
+        : Cmd(kSetState_CmdType)
+        , fPrimitiveProcessor(primProc) {}
+
+        ~SetState() { reinterpret_cast<GrPipeline*>(fPipeline.get())->~GrPipeline(); }
+
+        // This function is only for getting the location in memory where we will create our
+        // pipeline object.
+        GrPipeline* pipelineLocation() { return reinterpret_cast<GrPipeline*>(fPipeline.get()); }
+
+        const GrPipeline* getPipeline() const {
+            return reinterpret_cast<const GrPipeline*>(fPipeline.get());
+        }
+        GrRenderTarget* getRenderTarget() const {
+            return this->getPipeline()->getRenderTarget();
+        }
+        const GrXferProcessor* getXferProcessor() const {
+            return this->getPipeline()->getXferProcessor();
+        }
+
+        void execute(GrGpu*, const SetState*) override;
+
+        typedef GrPendingProgramElement<const GrPrimitiveProcessor> ProgramPrimitiveProcessor;
+        ProgramPrimitiveProcessor               fPrimitiveProcessor;
+        SkAlignedSStorage<sizeof(GrPipeline)>   fPipeline;
+        GrProgramDesc                           fDesc;
+        GrBatchTracker                          fBatchTracker;
+    };
+
     struct DrawBatch : public Cmd {
-        DrawBatch(State* state, GrBatch* batch, GrBatchTarget* batchTarget)
+        DrawBatch(GrBatch* batch, GrBatchTarget* batchTarget) 
             : Cmd(kDrawBatch_CmdType)
-            , fState(SkRef(state))
             , fBatch(SkRef(batch))
             , fBatchTarget(batchTarget) {
             SkASSERT(!batch->isUsed());
         }
 
-        void execute(GrGpu*) override;
+        void execute(GrGpu*, const SetState*) override;
 
-        SkAutoTUnref<State>    fState;
+        // TODO it wouldn't be too hard to let batches allocate in the cmd buffer
         SkAutoTUnref<GrBatch>  fBatch;
 
     private:
@@ -283,18 +284,19 @@
     struct XferBarrier : public Cmd {
         XferBarrier() : Cmd(kXferBarrier_CmdType) {}
 
-        void execute(GrGpu*) override;
+        void execute(GrGpu*, const SetState*) override;
 
         GrXferBarrierType   fBarrierType;
     };
 
-    static const int kCmdBufferInitialSizeInBytes = 8 * 1024;
+     static const int kCmdBufferInitialSizeInBytes = 8 * 1024;
 
-    typedef void* TCmdAlign; // This wouldn't be enough align if a command used long double.
-    typedef GrTRecorder<Cmd, TCmdAlign> CmdBuffer;
+     typedef void* TCmdAlign; // This wouldn't be enough align if a command used long double.
+     typedef GrTRecorder<Cmd, TCmdAlign> CmdBuffer;
 
-    CmdBuffer                           fCmdBuffer;
-    GrBatchTarget                       fBatchTarget;
+     CmdBuffer                           fCmdBuffer;
+     SetState*                           fPrevState;
+     GrBatchTarget                       fBatchTarget;
 };
 
 #endif