Make drawIndexedInstances non-virtual and rewrite GrIODB's drawRect on top of drawIndexedInstances.

R=robertphillips@google.com
Review URL: https://codereview.appspot.com/7221078

git-svn-id: http://skia.googlecode.com/svn/trunk@7508 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/gyp/SampleApp.gyp b/gyp/SampleApp.gyp
index d635535..8d833dd 100644
--- a/gyp/SampleApp.gyp
+++ b/gyp/SampleApp.gyp
@@ -67,6 +67,7 @@
         '../samplecode/SampleLayers.cpp',
         '../samplecode/SampleLCD.cpp',
         '../samplecode/SampleLines.cpp',
+        '../samplecode/SampleManyRects.cpp',
         '../samplecode/SampleMeasure.cpp',
         '../samplecode/SampleMipMap.cpp',
         '../samplecode/SampleMovie.cpp',
diff --git a/samplecode/SampleManyRects.cpp b/samplecode/SampleManyRects.cpp
new file mode 100644
index 0000000..04d8a41
--- /dev/null
+++ b/samplecode/SampleManyRects.cpp
@@ -0,0 +1,81 @@
+
+/*
+ * Copyright 2013 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#include "SampleCode.h"
+#include "SkCanvas.h"
+#include "SkDevice.h"
+#include "SkPaint.h"
+#include "SkRandom.h"
+#include "SkShader.h"
+#include "SkView.h"
+
+/**
+ * Animated sample used to develop batched rect implementation in GrInOrderDrawBuffer.
+ */
+class ManyRectsView : public SampleView {
+private:
+    enum {
+        N = 1000,
+    };
+
+public:
+    ManyRectsView() {}
+
+protected:
+    virtual bool onQuery(SkEvent* evt) SK_OVERRIDE {
+        if (SampleCode::TitleQ(*evt)) {
+            SampleCode::TitleR(evt, "ManyRects");
+            return true;
+        }
+        return this->INHERITED::onQuery(evt);
+    }
+
+    virtual void onDrawContent(SkCanvas* canvas) {
+        SkISize dsize = canvas->getDeviceSize();
+        SkRect canvasRect = SkRect::MakeWH(SkIntToScalar(dsize.fWidth),
+                                           SkIntToScalar(dsize.fHeight));
+        canvas->clear(0xFFF0E0F0);
+
+        for (int i = 0; i < N; ++i) {
+            SkRect rect = SkRect::MakeWH(SkIntToScalar(fRandom.nextRangeU(10, 100)),
+                                         SkIntToScalar(fRandom.nextRangeU(10, 100)));
+            int x = fRandom.nextRangeU(0, dsize.fWidth);
+            int y = fRandom.nextRangeU(0, dsize.fHeight);
+            canvas->save();
+
+            canvas->translate(SkIntToScalar(x), SkIntToScalar(y));
+            // Rotation messes up the GPU batching because of the clip below. We don't notice
+            // that the rect is inside the clip so the clip changes interrupt batching.
+            if (false) {
+                SkMatrix rotate;
+                rotate.setRotate(fRandom.nextUScalar1() * 360,
+                                 SkIntToScalar(x) + SkScalarHalf(rect.fRight),
+                                 SkIntToScalar(y) + SkScalarHalf(rect.fBottom));
+                canvas->concat(rotate);
+            }
+            SkRect clipRect = rect;
+            // This clip will always contain the entire rect. It's here to give the GPU batching
+            // code a little more challenge.
+            clipRect.outset(10, 10);
+            canvas->clipRect(clipRect);
+            SkPaint paint;
+            paint.setColor(fRandom.nextU());
+            canvas->drawRect(rect, paint);
+            canvas->restore();
+        }
+        this->inval(NULL);
+    }
+
+private:
+    SkMWCRandom fRandom;
+    typedef SampleView INHERITED;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+
+static SkView* MyFactory() { return new ManyRectsView; }
+static SkViewRegister reg(MyFactory);
diff --git a/src/gpu/GrContext.cpp b/src/gpu/GrContext.cpp
index 50d4741..b729d69 100644
--- a/src/gpu/GrContext.cpp
+++ b/src/gpu/GrContext.cpp
@@ -1791,7 +1791,6 @@
                                           fDrawBufferVBAllocPool,
                                           fDrawBufferIBAllocPool));
 
-    fDrawBuffer->setQuadIndexBuffer(this->getQuadIndexBuffer());
     if (fDrawBuffer) {
         fDrawBuffer->setAutoFlushTarget(fGpu);
         fDrawBuffer->setDrawState(fDrawState);
diff --git a/src/gpu/GrDrawState.cpp b/src/gpu/GrDrawState.cpp
index 6079272..b6c521f 100644
--- a/src/gpu/GrDrawState.cpp
+++ b/src/gpu/GrDrawState.cpp
@@ -530,6 +530,32 @@
     return (GrEffect::kA_ValidComponentFlag & validComponentFlags) && 0xff == GrColorUnpackA(color);
 }
 
+bool GrDrawState::hasSolidCoverage(GrVertexLayout layout) const {
+    // If we're drawing coverage directly then coverage is effectively treated as color.
+    if (this->isCoverageDrawing()) {
+        return true;
+    }
+
+    GrColor coverage;
+    uint32_t validComponentFlags;
+    // Initialize to an unknown starting coverage if per-vertex coverage is specified.
+    if (layout & kCoverage_VertexLayoutBit) {
+        validComponentFlags = 0;
+    } else {
+        coverage = fCommon.fCoverage;
+        validComponentFlags = GrEffect::kAll_ValidComponentFlags;
+    }
+
+    // Run through the coverage stages and see if the coverage will be all ones at the end.
+    for (int s = this->getFirstCoverageStage(); s < GrDrawState::kNumStages; ++s) {
+        const GrEffectRef* effect = this->getStage(s).getEffect();
+        if (NULL != effect) {
+            (*effect)->getConstantColorComponents(&coverage, &validComponentFlags);
+        }
+    }
+    return (GrEffect::kAll_ValidComponentFlags == validComponentFlags)  && (0xffffffff == coverage);
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 
 void GrDrawState::AutoViewMatrixRestore::restore() {
diff --git a/src/gpu/GrDrawState.h b/src/gpu/GrDrawState.h
index dc20239..0a0b0da 100644
--- a/src/gpu/GrDrawState.h
+++ b/src/gpu/GrDrawState.h
@@ -304,8 +304,15 @@
                    int* coverageOffset,
                    int* edgeOffset);
 
-    // determine if src alpha is guaranteed to be one for all src pixels
-    bool srcAlphaWillBeOne(GrVertexLayout vertexLayout) const;
+    /**
+     * Determines whether src alpha is guaranteed to be one for all src pixels
+     */
+    bool srcAlphaWillBeOne(GrVertexLayout) const;
+
+    /**
+     * Determines whether the output coverage is guaranteed to be one for all pixels hit by a draw.
+     */
+    bool hasSolidCoverage(GrVertexLayout) const;
 
     /**
      * Accessing positions, texture coords, or colors, of a vertex within an
@@ -409,14 +416,28 @@
      */
     class AutoColorRestore : public ::GrNoncopyable {
     public:
+        AutoColorRestore() : fDrawState(NULL) {}
+
         AutoColorRestore(GrDrawState* drawState, GrColor color) {
+            fDrawState = NULL;
+            this->set(drawState, color);
+        }
+
+        void reset() {
+            if (NULL != fDrawState) {
+                fDrawState->setColor(fOldColor);
+                fDrawState = NULL;
+            }
+        }
+
+        void set(GrDrawState* drawState, GrColor color) {
+            this->reset();
             fDrawState = drawState;
             fOldColor = fDrawState->getColor();
             fDrawState->setColor(color);
         }
-        ~AutoColorRestore() {
-            fDrawState->setColor(fOldColor);
-        }
+
+        ~AutoColorRestore() { this->reset(); }
     private:
         GrDrawState*    fDrawState;
         GrColor         fOldColor;
diff --git a/src/gpu/GrDrawTarget.cpp b/src/gpu/GrDrawTarget.cpp
index 04ffaa4..bb8b9ec 100644
--- a/src/gpu/GrDrawTarget.cpp
+++ b/src/gpu/GrDrawTarget.cpp
@@ -19,6 +19,66 @@
 
 ////////////////////////////////////////////////////////////////////////////////
 
+GrDrawTarget::DrawInfo& GrDrawTarget::DrawInfo::operator =(const DrawInfo& di) {
+    fPrimitiveType  = di.fPrimitiveType;
+    fStartVertex    = di.fStartVertex;
+    fStartIndex     = di.fStartIndex;
+    fVertexCount    = di.fVertexCount;
+    fIndexCount     = di.fIndexCount;
+
+    fInstanceCount          = di.fInstanceCount;
+    fVerticesPerInstance    = di.fVerticesPerInstance;
+    fIndicesPerInstance     = di.fIndicesPerInstance;
+
+    if (NULL != di.fDevBounds) {
+        GrAssert(di.fDevBounds == &di.fDevBoundsStorage);
+        fDevBoundsStorage = di.fDevBoundsStorage;
+        fDevBounds = &fDevBoundsStorage;
+    } else {
+        fDevBounds = NULL;
+    }
+    return *this;
+}
+
+#if GR_DEBUG
+bool GrDrawTarget::DrawInfo::isInstanced() const {
+    if (fInstanceCount > 0) {
+        GrAssert(0 == fIndexCount % fIndicesPerInstance);
+        GrAssert(0 == fVertexCount % fVerticesPerInstance);
+        GrAssert(fIndexCount / fIndicesPerInstance == fInstanceCount);
+        GrAssert(fVertexCount / fVerticesPerInstance == fInstanceCount);
+        // there is no way to specify a non-zero start index to drawIndexedInstances().
+        GrAssert(0 == fStartIndex);
+        return true;
+    } else {
+        GrAssert(!fVerticesPerInstance);
+        GrAssert(!fIndicesPerInstance);
+        return false;
+    }
+}
+#endif
+
+void GrDrawTarget::DrawInfo::adjustInstanceCount(int instanceOffset) {
+    GrAssert(this->isInstanced());
+    GrAssert(instanceOffset + fInstanceCount >= 0);
+    fInstanceCount += instanceOffset;
+    fVertexCount = fVerticesPerInstance * fInstanceCount;
+    fIndexCount = fIndicesPerInstance * fInstanceCount;
+}
+
+void GrDrawTarget::DrawInfo::adjustStartVertex(int vertexOffset) {
+    fStartVertex += vertexOffset;
+    GrAssert(fStartVertex >= 0);
+}
+
+void GrDrawTarget::DrawInfo::adjustStartIndex(int indexOffset) {
+    GrAssert(this->isIndexed());
+    fStartIndex += indexOffset;
+    GrAssert(fStartIndex >= 0);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
 #define DEBUG_INVAL_BUFFER 0xdeadcafe
 #define DEBUG_INVAL_START_IDX -1
 
@@ -338,9 +398,12 @@
     return true;
 }
 
-void GrDrawTarget::drawIndexed(GrPrimitiveType type, int startVertex,
-                               int startIndex, int vertexCount,
-                               int indexCount) {
+void GrDrawTarget::drawIndexed(GrPrimitiveType type,
+                               int startVertex,
+                               int startIndex,
+                               int vertexCount,
+                               int indexCount,
+                               const SkRect* devBounds) {
     if (indexCount > 0 && this->checkDraw(type, startVertex, startIndex, vertexCount, indexCount)) {
         DrawInfo info;
         info.fPrimitiveType = type;
@@ -348,13 +411,22 @@
         info.fStartIndex    = startIndex;
         info.fVertexCount   = vertexCount;
         info.fIndexCount    = indexCount;
+
+        info.fInstanceCount         = 0;
+        info.fVerticesPerInstance   = 0;
+        info.fIndicesPerInstance    = 0;
+
+        if (NULL != devBounds) {
+            info.setDevBounds(*devBounds);
+        }
         this->onDraw(info);
     }
 }
 
 void GrDrawTarget::drawNonIndexed(GrPrimitiveType type,
                                   int startVertex,
-                                  int vertexCount) {
+                                  int vertexCount,
+                                  const SkRect* devBounds) {
     if (vertexCount > 0 && this->checkDraw(type, startVertex, -1, vertexCount, -1)) {
         DrawInfo info;
         info.fPrimitiveType = type;
@@ -362,6 +434,14 @@
         info.fStartIndex    = 0;
         info.fVertexCount   = vertexCount;
         info.fIndexCount    = 0;
+
+        info.fInstanceCount         = 0;
+        info.fVerticesPerInstance   = 0;
+        info.fIndicesPerInstance    = 0;
+
+        if (NULL != devBounds) {
+            info.setDevBounds(*devBounds);
+        }
         this->onDraw(info);
     }
 }
@@ -546,27 +626,43 @@
 void GrDrawTarget::drawIndexedInstances(GrPrimitiveType type,
                                         int instanceCount,
                                         int verticesPerInstance,
-                                        int indicesPerInstance) {
+                                        int indicesPerInstance,
+                                        const SkRect* devBounds) {
     if (!verticesPerInstance || !indicesPerInstance) {
         return;
     }
 
-    int instancesPerDraw = this->indexCountInCurrentSource() /
-                           indicesPerInstance;
-    if (!instancesPerDraw) {
+    int maxInstancesPerDraw = this->indexCountInCurrentSource() / indicesPerInstance;
+    if (!maxInstancesPerDraw) {
         return;
     }
 
-    instancesPerDraw = GrMin(instanceCount, instancesPerDraw);
-    int startVertex = 0;
+    DrawInfo info;
+    info.fPrimitiveType = type;
+    info.fStartIndex = 0;
+    info.fStartVertex = 0;
+    info.fIndicesPerInstance = indicesPerInstance;
+    info.fVerticesPerInstance = verticesPerInstance;
+
+    // Set the same bounds for all the draws.
+    if (NULL != devBounds) {
+        info.setDevBounds(*devBounds);
+    }
+
     while (instanceCount) {
-        this->drawIndexed(type,
-                          startVertex,
-                          0,
-                          verticesPerInstance * instancesPerDraw,
-                          indicesPerInstance * instancesPerDraw);
-        startVertex += verticesPerInstance;
-        instanceCount -= instancesPerDraw;
+        info.fInstanceCount = GrMin(instanceCount, maxInstancesPerDraw);
+        info.fVertexCount = info.fInstanceCount * verticesPerInstance;
+        info.fIndexCount = info.fInstanceCount * indicesPerInstance;
+
+        if (this->checkDraw(type,
+                            info.fStartVertex,
+                            info.fStartIndex,
+                            info.fVertexCount,
+                            info.fIndexCount)) {
+            this->onDraw(info);
+        }
+        info.fStartVertex += info.fVertexCount;
+        instanceCount -= info.fInstanceCount;
     }
 }
 
@@ -576,7 +672,24 @@
                             const SkMatrix* matrix,
                             const GrRect* srcRects[],
                             const SkMatrix* srcMatrices[]) {
-    GrVertexLayout layout = GetRectVertexLayout(srcRects);
+    GrVertexLayout layout = 0;
+    uint32_t explicitCoordMask = 0;
+
+    if (NULL != srcRects) {
+        for (int s = 0; s < GrDrawState::kNumStages; ++s) {
+            int numTC = 0;
+            if (NULL != srcRects[s]) {
+                layout |= GrDrawState::StageTexCoordVertexLayoutBit(s, numTC);
+                explicitCoordMask |= (1 << s);
+                ++numTC;
+            }
+        }
+    }
+
+    GrDrawState::AutoViewMatrixRestore avmr;
+    if (NULL != matrix) {
+        avmr.set(this->drawState(), *matrix, explicitCoordMask);
+    }
 
     AutoReleaseGeometry geo(this, layout, 4, 0);
     if (!geo.succeeded()) {
@@ -584,69 +697,14 @@
         return;
     }
 
-    SetRectVertices(rect, matrix, srcRects,
-                    srcMatrices, SK_ColorBLACK, layout, geo.vertices());
-
-    drawNonIndexed(kTriangleFan_GrPrimitiveType, 0, 4);
-}
-
-GrVertexLayout GrDrawTarget::GetRectVertexLayout(const GrRect* srcRects[]) {
-    if (NULL == srcRects) {
-        return 0;
-    }
-
-    GrVertexLayout layout = 0;
-    for (int i = 0; i < GrDrawState::kNumStages; ++i) {
-        int numTC = 0;
-        if (NULL != srcRects[i]) {
-            layout |= GrDrawState::StageTexCoordVertexLayoutBit(i, numTC);
-            ++numTC;
-        }
-    }
-    return layout;
-}
-
-// This method fills int the four vertices for drawing 'rect'.
-//      matrix - is applied to each vertex
-//      srcRects - provide the uvs for each vertex
-//      srcMatrices - are applied to the corresponding 'srcRect'
-//      color - vertex color (replicated in each vertex)
-//      layout - specifies which uvs and/or color are present
-//      vertices - storage for the resulting vertices
-// Note: the color parameter will only be used when kColor_VertexLayoutBit
-// is present in 'layout'
-void GrDrawTarget::SetRectVertices(const GrRect& rect,
-                                   const SkMatrix* matrix,
-                                   const GrRect* srcRects[],
-                                   const SkMatrix* srcMatrices[],
-                                   GrColor color,
-                                   GrVertexLayout layout,
-                                   void* vertices) {
-#if GR_DEBUG
-    // check that the layout and srcRects agree
-    for (int i = 0; i < GrDrawState::kNumStages; ++i) {
-        if (GrDrawState::VertexTexCoordsForStage(i, layout) >= 0) {
-            GR_DEBUGASSERT(NULL != srcRects && NULL != srcRects[i]);
-        } else {
-            GR_DEBUGASSERT(NULL == srcRects || NULL == srcRects[i]);
-        }
-    }
-#endif
-
-    int stageOffsets[GrDrawState::kNumStages], colorOffset;
-    int vsize = GrDrawState::VertexSizeAndOffsetsByStage(layout, stageOffsets,
-                                                         &colorOffset, NULL, NULL);
-
-    GrTCast<GrPoint*>(vertices)->setRectFan(rect.fLeft, rect.fTop,
-                                            rect.fRight, rect.fBottom,
-                                            vsize);
-    if (NULL != matrix) {
-        matrix->mapPointsWithStride(GrTCast<GrPoint*>(vertices), vsize, 4);
-    }
+    int stageOffsets[GrDrawState::kNumStages];
+    int vsize = GrDrawState::VertexSizeAndOffsetsByStage(layout, stageOffsets,  NULL, NULL, NULL);
+    geo.positions()->setRectFan(rect.fLeft, rect.fTop, rect.fRight, rect.fBottom, vsize);
 
     for (int i = 0; i < GrDrawState::kNumStages; ++i) {
-        if (stageOffsets[i] > 0) {
-            GrPoint* coords = GrTCast<GrPoint*>(GrTCast<intptr_t>(vertices) +
+        if (explicitCoordMask & (1 << i)) {
+            GrAssert(NULL != stageOffsets[i]);
+            GrPoint* coords = GrTCast<GrPoint*>(GrTCast<intptr_t>(geo.vertices()) +
                                                 stageOffsets[i]);
             coords->setRectFan(srcRects[i]->fLeft, srcRects[i]->fTop,
                                srcRects[i]->fRight, srcRects[i]->fBottom,
@@ -654,18 +712,12 @@
             if (NULL != srcMatrices && NULL != srcMatrices[i]) {
                 srcMatrices[i]->mapPointsWithStride(coords, vsize, 4);
             }
+        } else {
+            GrAssert(NULL == stageOffsets[i]);
         }
     }
 
-    if (colorOffset >= 0) {
-
-        GrColor* vertCol = GrTCast<GrColor*>(GrTCast<intptr_t>(vertices) + colorOffset);
-
-        for (int i = 0; i < 4; ++i) {
-            *vertCol = color;
-            vertCol = (GrColor*) ((intptr_t) vertCol + vsize);
-        }
-    }
+    this->drawNonIndexed(kTriangleFan_GrPrimitiveType, 0, 4);
 }
 
 void GrDrawTarget::clipWillBeSet(const GrClipData* clipData) {
diff --git a/src/gpu/GrDrawTarget.h b/src/gpu/GrDrawTarget.h
index 1c8ada1..c1444b4 100644
--- a/src/gpu/GrDrawTarget.h
+++ b/src/gpu/GrDrawTarget.h
@@ -26,7 +26,6 @@
 class GrClipData;
 class GrPath;
 class GrVertexBuffer;
-
 class SkStrokeRec;
 
 class GrDrawTarget : public GrRefCnt {
@@ -50,6 +49,8 @@
         int fMaxTextureSize;
     };
 
+    class DrawInfo;
+
 public:
     SK_DECLARE_INST_COUNT(GrDrawTarget)
 
@@ -361,12 +362,15 @@
      * @param indexCount   the number of index elements to read. The index count
      *                     is effectively trimmed to the last completely
      *                     specified primitive.
+     * @param devBounds    optional bounds hint. This is a promise from the caller,
+     *                     not a request for clipping.
      */
     void drawIndexed(GrPrimitiveType type,
                      int startVertex,
                      int startIndex,
                      int vertexCount,
-                     int indexCount);
+                     int indexCount,
+                     const SkRect* devBounds = NULL);
 
     /**
      * Draws non-indexed geometry using the current state and current vertex
@@ -376,10 +380,13 @@
      * @param startVertex  the vertex in the vertex array/buffer corresponding
      *                     to index 0
      * @param vertexCount  one greater than the max index.
+     * @param devBounds    optional bounds hint. This is a promise from the caller,
+     *                     not a request for clipping.
      */
     void drawNonIndexed(GrPrimitiveType type,
                         int startVertex,
-                        int vertexCount);
+                        int vertexCount,
+                        const SkRect* devBounds = NULL);
 
     /**
      * Draws path into the stencil buffer. The fill must be either even/odd or
@@ -393,9 +400,13 @@
      * and vertex sources. After returning, the vertex and index sources may
      * have changed. They should be reestablished before the next drawIndexed
      * or drawNonIndexed. This cannot be called between reserving and releasing
-     * geometry. The GrDrawTarget subclass may be able to perform additional
-     * optimizations if drawRect is used rather than drawIndexed or
-     * drawNonIndexed.
+     * geometry.
+     *
+     * A subclass may override this to perform more optimal rect rendering. Its
+     * draws should be funneled through one of the public GrDrawTarget draw methods
+     * (e.g. drawNonIndexed, drawIndexedInstances, ...). The base class draws a two
+     * triangle fan using drawNonIndexed from reserved vertex space.
+     *
      * @param rect      the rect to draw
      * @param matrix    optional matrix applied to rect (before viewMatrix)
      * @param srcRects  specifies rects for stages enabled by stageEnableMask.
@@ -425,7 +436,6 @@
         this->drawRect(rect, matrix, NULL, NULL);
     }
 
-
     /**
      * This call is used to draw multiple instances of some geometry with a
      * given number of vertices (V) and indices (I) per-instance. The indices in
@@ -440,7 +450,7 @@
      * source. The size of the index buffer limits the number of instances that
      * can be drawn by the GPU in a single draw. However, the caller may specify
      * any (positive) number for instanceCount and if necessary multiple GPU
-     * draws will be issued. Morever, when drawIndexedInstances is called
+     * draws will be issued. Moreover, when drawIndexedInstances is called
      * multiple times it may be possible for GrDrawTarget to group them into a
      * single GPU draw.
      *
@@ -453,11 +463,14 @@
      *                              in the above description).
      * @param indicesPerInstance    The number of indices in each instance (I
      *                              in the above description).
+     * @param devBounds    optional bounds hint. This is a promise from the caller,
+     *                     not a request for clipping.
      */
-    virtual void drawIndexedInstances(GrPrimitiveType type,
-                                      int instanceCount,
-                                      int verticesPerInstance,
-                                      int indicesPerInstance);
+    void drawIndexedInstances(GrPrimitiveType type,
+                              int instanceCount,
+                              int verticesPerInstance,
+                              int indicesPerInstance,
+                              const SkRect* devBounds = NULL);
 
     /**
      * Clear the current render target if one isn't passed in. Ignores the
@@ -474,6 +487,11 @@
      */
     virtual void purgeResources() {};
 
+    /**
+     * For subclass internal use to invoke a call to onDraw(). See DrawInfo below.
+     */
+    void executeDraw(const DrawInfo& info) { this->onDraw(info); }
+
     ////////////////////////////////////////////////////////////////////////////
 
     /**
@@ -714,48 +732,63 @@
         return this->getGeomSrc().fVertexLayout;
     }
 
-    // Helpers for drawRect, protected so subclasses that override drawRect can use them.
-    static GrVertexLayout GetRectVertexLayout(const GrRect* srcRects[]);
-
-    static void SetRectVertices(const GrRect& rect,
-                                const SkMatrix* matrix,
-                                const GrRect* srcRects[],
-                                const SkMatrix* srcMatrices[],
-                                GrColor color,
-                                GrVertexLayout layout,
-                                void* vertices);
-
     Caps fCaps;
 
+    /**
+     * Used to communicate draws to subclass's onDraw function.
+     */
     class DrawInfo {
     public:
         DrawInfo(const DrawInfo& di) { (*this) = di; }
-        DrawInfo& operator =(const DrawInfo& di) {
-            fPrimitiveType  = di.fPrimitiveType;
-            fStartVertex    = di.fStartVertex;
-            fStartIndex     = di.fStartIndex;
-            fVertexCount    = di.fVertexCount;
-            fIndexCount     = di.fIndexCount;
-            return *this;
-        }
+        DrawInfo& operator =(const DrawInfo& di);
 
         GrPrimitiveType primitiveType() const { return fPrimitiveType; }
         int startVertex() const { return fStartVertex; }
         int startIndex() const { return fStartIndex; }
         int vertexCount() const { return fVertexCount; }
         int indexCount() const { return fIndexCount; }
+        int verticesPerInstance() const { return fVerticesPerInstance; }
+        int indicesPerInstance() const { return fIndicesPerInstance; }
+        int instanceCount() const { return fInstanceCount; }
 
         bool isIndexed() const { return fIndexCount > 0; }
+#if GR_DEBUG
+        bool isInstanced() const; // this version is longer because of asserts
+#else
+        bool isInstanced() const { return fInstanceCount > 0; }
+#endif
+
+        // adds or remove instances
+        void adjustInstanceCount(int instanceOffset);
+        // shifts the start vertex
+        void adjustStartVertex(int vertexOffset);
+        // shifts the start index
+        void adjustStartIndex(int indexOffset);
+
+        void setDevBounds(const SkRect& bounds) {
+            fDevBoundsStorage = bounds;
+            fDevBounds = &fDevBoundsStorage;
+        }
+        const SkRect* getDevBounds() const { return fDevBounds; }
 
     private:
-        DrawInfo() {}
+        DrawInfo() { fDevBounds = NULL; }
+
         friend class GrDrawTarget;
+
         GrPrimitiveType fPrimitiveType;
 
         int             fStartVertex;
         int             fStartIndex;
         int             fVertexCount;
         int             fIndexCount;
+
+        int             fInstanceCount;
+        int             fVerticesPerInstance;
+        int             fIndicesPerInstance;
+
+        SkRect          fDevBoundsStorage;
+        SkRect*         fDevBounds;
     };
 
 private:
diff --git a/src/gpu/GrInOrderDrawBuffer.cpp b/src/gpu/GrInOrderDrawBuffer.cpp
index a0e3e99..bc605fd 100644
--- a/src/gpu/GrInOrderDrawBuffer.cpp
+++ b/src/gpu/GrInOrderDrawBuffer.cpp
@@ -21,13 +21,12 @@
                                          GrIndexBufferAllocPool* indexPool)
     : fAutoFlushTarget(NULL)
     , fClipSet(true)
+    , fClipProxyState(kUnknown_ClipProxyState)
     , fVertexPool(*vertexPool)
     , fIndexPool(*indexPool)
-    , fLastRectVertexLayout(0)
-    , fQuadIndexBuffer(NULL)
-    , fMaxQuads(0)
     , fFlushing(false) {
 
+    fGpu.reset(SkRef(gpu));
     fCaps = gpu->getCaps();
 
     GrAssert(NULL != vertexPool);
@@ -49,29 +48,26 @@
     this->reset();
     // This must be called by before the GrDrawTarget destructor
     this->releaseGeometry();
-    GrSafeUnref(fQuadIndexBuffer);
     GrSafeUnref(fAutoFlushTarget);
 }
 
-void GrInOrderDrawBuffer::setQuadIndexBuffer(const GrIndexBuffer* indexBuffer) {
-    bool newIdxBuffer = fQuadIndexBuffer != indexBuffer;
-    if (newIdxBuffer) {
-        GrSafeUnref(fQuadIndexBuffer);
-        fQuadIndexBuffer = indexBuffer;
-        GrSafeRef(fQuadIndexBuffer);
-        fCurrQuad = 0;
-        fMaxQuads = (NULL == indexBuffer) ? 0 : indexBuffer->maxQuads();
-    } else {
-        GrAssert((NULL == indexBuffer && 0 == fMaxQuads) ||
-                 (indexBuffer->maxQuads() == fMaxQuads));
-    }
-}
-
 ////////////////////////////////////////////////////////////////////////////////
 
-void GrInOrderDrawBuffer::resetDrawTracking() {
-    fCurrQuad = 0;
-    fInstancedDrawTracker.reset();
+namespace {
+void get_vertex_bounds(const void* vertices,
+                       size_t vertexSize,
+                       int vertexCount,
+                       SkRect* bounds) {
+    GrAssert(vertexSize >= sizeof(GrPoint));
+    GrAssert(vertexCount > 0);
+    const GrPoint* point = static_cast<const GrPoint*>(vertices);
+    bounds->fLeft = bounds->fRight = point->fX;
+    bounds->fTop = bounds->fBottom = point->fY;
+    for (int i = 1; i < vertexCount; ++i) {
+        point = reinterpret_cast<GrPoint*>(reinterpret_cast<intptr_t>(point) + vertexSize);
+        bounds->growToInclude(point->fX, point->fY);
+    }
+}
 }
 
 void GrInOrderDrawBuffer::drawRect(const GrRect& rect,
@@ -79,305 +75,223 @@
                                    const GrRect* srcRects[],
                                    const SkMatrix* srcMatrices[]) {
 
-    GrAssert(!(NULL == fQuadIndexBuffer && fCurrQuad));
-    GrAssert(!(fDraws.empty() && fCurrQuad));
-    GrAssert(!(0 != fMaxQuads && NULL == fQuadIndexBuffer));
+    GrVertexLayout layout = 0;
+    GrDrawState::AutoColorRestore acr;
+    GrColor color = this->drawState()->getColor();
 
-    GrDrawState* drawState = this->drawState();
-
-    // if we have a quad IB then either append to the previous run of
-    // rects or start a new run
-    if (fMaxQuads) {
-
-        bool appendToPreviousDraw = false;
-        GrVertexLayout layout = GetRectVertexLayout(srcRects);
-
-        // Batching across colors means we move the draw color into the
-        // rect's vertex colors to allow greater batching (a lot of rects
-        // in a row differing only in color is a common occurence in tables).
-        bool batchAcrossColors = true;
-        if (!this->getCaps().dualSourceBlendingSupport()) {
-            for (int s = 0; s < GrDrawState::kNumStages; ++s) {
-                if (this->getDrawState().isStageEnabled(s)) {
-                    // We disable batching across colors when there is a texture
-                    // present because (by pushing the the color to the vertices)
-                    // Ganesh loses track of the rect's opacity. This, in turn, can
-                    // cause some of the blending optimizations to be disabled. This
-                    // becomes a huge problem on some of the smaller devices where
-                    // shader derivatives and dual source blending aren't supported.
-                    // In those cases paths are often drawn to a texture and then
-                    // drawn as a texture (using this method). Because dual source
-                    // blending is disabled (and the blend optimizations are short
-                    // circuited) some of the more esoteric blend modes can no longer
-                    // be supported.
-                    // TODO: add tracking of batchAcrossColors's opacity
-                    batchAcrossColors = false;
-                    break;
-                }
-            }
-        }
-
-        if (batchAcrossColors) {
-            layout |= GrDrawState::kColor_VertexLayoutBit;
-        }
-
-        AutoReleaseGeometry geo(this, layout, 4, 0);
-        if (!geo.succeeded()) {
-            GrPrintf("Failed to get space for vertices!\n");
-            return;
-        }
-        SkMatrix combinedMatrix = drawState->getViewMatrix();
-        // We go to device space so that matrix changes allow us to concat
-        // rect draws. When the caller has provided explicit source rects
-        // then we don't want to modify the stages' matrices. Otherwise
-        // we have to account for the view matrix change in the stage
-        // matrices.
-        uint32_t explicitCoordMask = 0;
-        if (srcRects) {
-            for (int s = 0; s < GrDrawState::kNumStages; ++s) {
-                if (srcRects[s]) {
-                    explicitCoordMask |= (1 << s);
-                }
-            }
-        }
-        GrDrawState::AutoDeviceCoordDraw adcd(this->drawState(), explicitCoordMask);
-        if (!adcd.succeeded()) {
-            return;
-        }
-        if (NULL != matrix) {
-            combinedMatrix.preConcat(*matrix);
-        }
-
-        SetRectVertices(rect, &combinedMatrix, srcRects, srcMatrices,
-                        this->getDrawState().getColor(), layout, geo.vertices());
-
-        // Now that the paint's color is stored in the vertices set it to
-        // white so that the following code can batch all the rects regardless
-        // of paint color
-        GrDrawState::AutoColorRestore acr(this->drawState(),
-                                          batchAcrossColors ? SK_ColorWHITE
-                                                            : this->getDrawState().getColor());
-
-        // we don't want to miss an opportunity to batch rects together
-        // simply because the clip has changed if the clip doesn't affect
-        // the rect.
-        bool disabledClip = false;
-
-        if (drawState->isClipState()) {
-
-            GrRect devClipRect;
-            bool isIntersectionOfRects = false;
-            const GrClipData* clip = this->getClip();
-            clip->fClipStack->getConservativeBounds(-clip->fOrigin.fX,
-                                                    -clip->fOrigin.fY,
-                                                    drawState->getRenderTarget()->width(),
-                                                    drawState->getRenderTarget()->height(),
-                                                    &devClipRect,
-                                                    &isIntersectionOfRects);
-
-            if (isIntersectionOfRects) {
-                // If the clip rect touches the edge of the viewport, extended it
-                // out (close) to infinity to avoid bogus intersections.
-                // We might consider a more exact clip to viewport if this
-                // conservative test fails.
-                const GrRenderTarget* target = drawState->getRenderTarget();
-                if (0 >= devClipRect.fLeft) {
-                    devClipRect.fLeft = SK_ScalarMin;
-                }
-                if (target->width() <= devClipRect.fRight) {
-                    devClipRect.fRight = SK_ScalarMax;
-                }
-                if (0 >= devClipRect.top()) {
-                    devClipRect.fTop = SK_ScalarMin;
-                }
-                if (target->height() <= devClipRect.fBottom) {
-                    devClipRect.fBottom = SK_ScalarMax;
-                }
-                int stride = GrDrawState::VertexSize(layout);
-                bool insideClip = true;
-                for (int v = 0; v < 4; ++v) {
-                    const GrPoint& p = *GrDrawState::GetVertexPoint(geo.vertices(), v, stride);
-                    if (!devClipRect.contains(p)) {
-                        insideClip = false;
-                        break;
-                    }
-                }
-                if (insideClip) {
-                    drawState->disableState(GrDrawState::kClip_StateBit);
-                    disabledClip = true;
-                }
-            }
-        }
-
-        if (!this->needsNewClip() &&
-            !this->needsNewState() &&
-            fCurrQuad > 0 &&
-            fCurrQuad < fMaxQuads &&
-            layout == fLastRectVertexLayout) {
-
-            int vsize = GrDrawState::VertexSize(layout);
-
-            DrawRecord& lastDraw = fDraws.back();
-
-            GrAssert(lastDraw.fIndexBuffer == fQuadIndexBuffer);
-            GrAssert(kTriangles_GrPrimitiveType == lastDraw.fPrimitiveType);
-            GrAssert(0 == lastDraw.fVertexCount % 4);
-            GrAssert(0 == lastDraw.fIndexCount % 6);
-            GrAssert(0 == lastDraw.fStartIndex);
-
-            GeometryPoolState& poolState = fGeoPoolStateStack.back();
-
-            appendToPreviousDraw = kDraw_Cmd == fCmds.back() &&
-                                   lastDraw.fVertexBuffer == poolState.fPoolVertexBuffer &&
-                                   (fCurrQuad * 4 + lastDraw.fStartVertex) ==
-                                   poolState.fPoolStartVertex;
-
-            if (appendToPreviousDraw) {
-                lastDraw.fVertexCount += 4;
-                lastDraw.fIndexCount += 6;
-                fCurrQuad += 1;
-                // we reserved above, so we should be the first
-                // use of this vertex reservation.
-                GrAssert(0 == poolState.fUsedPoolVertexBytes);
-                poolState.fUsedPoolVertexBytes = 4 * vsize;
-            }
-        }
-        if (!appendToPreviousDraw) {
-            this->setIndexSourceToBuffer(fQuadIndexBuffer);
-            this->drawIndexed(kTriangles_GrPrimitiveType, 0, 0, 4, 6);
-            fCurrQuad = 1;
-            fLastRectVertexLayout = layout;
-        }
-        if (disabledClip) {
-            drawState->enableState(GrDrawState::kClip_StateBit);
-        }
-        fInstancedDrawTracker.reset();
-    } else {
-        INHERITED::drawRect(rect, matrix, srcRects, srcMatrices);
+    // Using per-vertex colors allows batching across colors. (A lot of rects in a row differing
+    // only in color is a common occurrence in tables). However, having per-vertex colors disables
+    // blending optimizations because we don't know if the color will be solid or not. These
+    // optimizations help determine whether coverage and color can be blended correctly when
+    // dual-source blending isn't available. This comes into play when there is coverage. If colors
+    // were a stage it could take a hint that every vertex's color will be opaque.
+    if (this->getCaps().dualSourceBlendingSupport() ||
+        this->getDrawState().hasSolidCoverage(this->getGeomSrc().fVertexLayout)) {
+        layout |= GrDrawState::kColor_VertexLayoutBit;;
+        // We set the draw state's color to white here. This is done so that any batching performed
+        // in our subclass's onDraw() won't get a false from GrDrawState::op== due to a color
+        // mismatch. TODO: Once vertex layout is owned by GrDrawState it should skip comparing the
+        // constant color in its op== when the kColor layout bit is set and then we can remove this.
+        acr.set(this->drawState(), 0xFFFFFFFF);
     }
-}
 
-void GrInOrderDrawBuffer::drawIndexedInstances(GrPrimitiveType type,
-                                               int instanceCount,
-                                               int verticesPerInstance,
-                                               int indicesPerInstance) {
-    if (!verticesPerInstance || !indicesPerInstance) {
+    uint32_t explicitCoordMask = 0;
+    if (NULL != srcRects) {
+        for (int s = 0; s < GrDrawState::kNumStages; ++s) {
+            int numTC = 0;
+            if (NULL != srcRects[s]) {
+                layout |= GrDrawState::StageTexCoordVertexLayoutBit(s, numTC);
+                ++numTC;
+                explicitCoordMask |= (1 << s);
+            }
+        }
+    }
+
+    AutoReleaseGeometry geo(this, layout, 4, 0);
+    if (!geo.succeeded()) {
+        GrPrintf("Failed to get space for vertices!\n");
         return;
     }
 
+    // Go to device coords to allow batching across matrix changes
+    SkMatrix combinedMatrix;
+    if (NULL != matrix) {
+        combinedMatrix = *matrix;
+    } else {
+        combinedMatrix.reset();
+    }
+    combinedMatrix.postConcat(this->drawState()->getViewMatrix());
+    // When the caller has provided an explicit source rects for a stage then we don't want to
+    // modify that stage's matrix. Otherwise if the effect is generating its source rect from
+    // the vertex positions then we have to account for the view matrix change.
+    GrDrawState::AutoDeviceCoordDraw adcd(this->drawState(), explicitCoordMask);
+    if (!adcd.succeeded()) {
+        return;
+    }
+
+    int stageOffsets[GrDrawState::kNumStages], colorOffset;
+    int vsize = GrDrawState::VertexSizeAndOffsetsByStage(layout, stageOffsets,
+                                                         &colorOffset, NULL, NULL);
+
+    geo.positions()->setRectFan(rect.fLeft, rect.fTop, rect.fRight, rect.fBottom, vsize);
+    combinedMatrix.mapPointsWithStride(geo.positions(), vsize, 4);
+
+    SkRect devBounds;
+    // since we already computed the dev verts, set the bounds hint. This will help us avoid
+    // unnecessary clipping in our onDraw().
+    get_vertex_bounds(geo.vertices(), vsize, 4, &devBounds);
+
+    for (int i = 0; i < GrDrawState::kNumStages; ++i) {
+        if (explicitCoordMask & (1 << i)) {
+            GrAssert(NULL != stageOffsets[i]);
+            GrPoint* coords = GrTCast<GrPoint*>(GrTCast<intptr_t>(geo.vertices()) +
+                                                stageOffsets[i]);
+            coords->setRectFan(srcRects[i]->fLeft, srcRects[i]->fTop,
+                               srcRects[i]->fRight, srcRects[i]->fBottom,
+                               vsize);
+            if (NULL != srcMatrices && NULL != srcMatrices[i]) {
+                srcMatrices[i]->mapPointsWithStride(coords, vsize, 4);
+            }
+        } else {
+            GrAssert(NULL == stageOffsets[i]);
+        }
+    }
+
+    if (colorOffset >= 0) {
+        GrColor* vertColor = GrTCast<GrColor*>(GrTCast<intptr_t>(geo.vertices()) + colorOffset);
+        for (int i = 0; i < 4; ++i) {
+            *vertColor = color;
+            vertColor = (GrColor*) ((intptr_t) vertColor + vsize);
+        }
+    }
+
+    this->setIndexSourceToBuffer(fGpu->getQuadIndexBuffer());
+    this->drawIndexedInstances(kTriangles_GrPrimitiveType, 1, 4, 6, &devBounds);
+}
+
+bool GrInOrderDrawBuffer::quickInsideClip(const SkRect& devBounds) {
+    if (!this->getDrawState().isClipState()) {
+        return true;
+    }
+    if (kUnknown_ClipProxyState == fClipProxyState) {
+        SkIRect rect;
+        bool iior;
+        this->getClip()->getConservativeBounds(this->getDrawState().getRenderTarget(), &rect, &iior);
+        if (iior) {
+            // The clip is a rect. We will remember that in fProxyClip. It is common for an edge (or
+            // all edges) of the clip to be at the edge of the RT. However, we get that clipping for
+            // free via the viewport. We don't want to think that clipping must be enabled in this
+            // case. So we extend the clip outward from the edge to avoid these false negatives.
+            fClipProxyState = kValid_ClipProxyState;
+            fClipProxy = SkRect::MakeFromIRect(rect);
+
+            if (fClipProxy.fLeft <= 0) {
+                fClipProxy.fLeft = SK_ScalarMin;
+            }
+            if (fClipProxy.fTop <= 0) {
+                fClipProxy.fTop = SK_ScalarMin;
+            }
+            if (fClipProxy.fRight >= this->getDrawState().getRenderTarget()->width()) {
+                fClipProxy.fRight = SK_ScalarMax;
+            }
+            if (fClipProxy.fBottom >= this->getDrawState().getRenderTarget()->height()) {
+                fClipProxy.fBottom = SK_ScalarMax;
+            }
+        } else {
+            fClipProxyState = kInvalid_ClipProxyState;
+        }
+    }
+    if (kValid_ClipProxyState == fClipProxyState) {
+        return fClipProxy.contains(devBounds);
+    }
+    SkPoint originOffset = {SkIntToScalar(this->getClip()->fOrigin.fX),
+                            SkIntToScalar(this->getClip()->fOrigin.fY)};
+    SkRect clipSpaceBounds = devBounds;
+    clipSpaceBounds.offset(originOffset);
+    return this->getClip()->fClipStack->quickContains(clipSpaceBounds);
+}
+
+int GrInOrderDrawBuffer::concatInstancedDraw(const DrawInfo& info) {
+    GrAssert(info.isInstanced());
+
     const GeometrySrcState& geomSrc = this->getGeomSrc();
 
-    // we only attempt to concat the case when reserved verts are used with
-    // an index buffer.
-    if (kReserved_GeometrySrcType == geomSrc.fVertexSrc &&
-        kBuffer_GeometrySrcType == geomSrc.fIndexSrc) {
-
-        if (this->needsNewClip()) {
-            this->recordClip();
-        }
-        if (this->needsNewState()) {
-            this->recordState();
-        }
-
-        DrawRecord* draw = NULL;
-        // if the last draw used the same indices/vertices per shape then we
-        // may be able to append to it.
-        if (kDraw_Cmd == fCmds.back() &&
-            verticesPerInstance == fInstancedDrawTracker.fVerticesPerInstance &&
-            indicesPerInstance == fInstancedDrawTracker.fIndicesPerInstance) {
-            GrAssert(fDraws.count());
-            draw = &fDraws.back();
-        }
-
-        GeometryPoolState& poolState = fGeoPoolStateStack.back();
-        const GrVertexBuffer* vertexBuffer = poolState.fPoolVertexBuffer;
-
-        // Check whether the draw is compatible with this draw in order to
-        // append
-        if (NULL == draw ||
-            draw->fIndexBuffer != geomSrc.fIndexBuffer ||
-            draw->fPrimitiveType != type ||
-            draw->fVertexBuffer != vertexBuffer) {
-
-            draw = this->recordDraw();
-            draw->fPrimitiveType = type;
-            draw->fStartVertex = poolState.fPoolStartVertex;
-            draw->fStartIndex = 0;
-            draw->fVertexCount = 0;
-            draw->fIndexCount = 0;
-            draw->fVertexLayout = geomSrc.fVertexLayout;
-            draw->fVertexBuffer = vertexBuffer;
-            vertexBuffer->ref();
-            draw->fIndexBuffer = geomSrc.fIndexBuffer;
-            geomSrc.fIndexBuffer->ref();
-        } else {
-            GrAssert(!(draw->fIndexCount % indicesPerInstance));
-            GrAssert(!(draw->fVertexCount % verticesPerInstance));
-            GrAssert(poolState.fPoolStartVertex == draw->fStartVertex +
-                                                   draw->fVertexCount);
-        }
-
-        // how many instances can be in a single draw
-        int maxInstancesPerDraw = this->indexCountInCurrentSource() /
-                                  indicesPerInstance;
-        if (!maxInstancesPerDraw) {
-            return;
-        }
-        // how many instances should be concat'ed onto draw
-        int instancesToConcat = maxInstancesPerDraw - draw->fVertexCount /
-                                                      verticesPerInstance;
-        if (maxInstancesPerDraw > instanceCount) {
-            maxInstancesPerDraw = instanceCount;
-            if (instancesToConcat > instanceCount) {
-                instancesToConcat = instanceCount;
-            }
-        }
-
-        // update the amount of reserved data actually referenced in draws
-        size_t vertexBytes = instanceCount * verticesPerInstance *
-                             GrDrawState::VertexSize(draw->fVertexLayout);
-        poolState.fUsedPoolVertexBytes =
-                            GrMax(poolState.fUsedPoolVertexBytes, vertexBytes);
-
-        while (instanceCount) {
-            if (!instancesToConcat) {
-                int startVertex = draw->fStartVertex + draw->fVertexCount;
-                draw = this->recordDraw();
-                draw->fPrimitiveType = type;
-                draw->fStartVertex = startVertex;
-                draw->fStartIndex = 0;
-                draw->fVertexCount = 0;
-                draw->fIndexCount = 0;
-                draw->fVertexLayout = geomSrc.fVertexLayout;
-                draw->fVertexBuffer = vertexBuffer;
-                vertexBuffer->ref();
-                draw->fIndexBuffer = geomSrc.fIndexBuffer;
-                geomSrc.fIndexBuffer->ref();
-                instancesToConcat = maxInstancesPerDraw;
-            }
-            draw->fVertexCount += instancesToConcat * verticesPerInstance;
-            draw->fIndexCount += instancesToConcat * indicesPerInstance;
-            instanceCount -= instancesToConcat;
-            instancesToConcat = 0;
-        }
-
-        // update draw tracking for next draw
-        fCurrQuad = 0;
-        fInstancedDrawTracker.fVerticesPerInstance = verticesPerInstance;
-        fInstancedDrawTracker.fIndicesPerInstance = indicesPerInstance;
-    } else {
-        this->INHERITED::drawIndexedInstances(type,
-                                              instanceCount,
-                                              verticesPerInstance,
-                                              indicesPerInstance);
+    // we only attempt to concat the case when reserved verts are used with a client-specified index
+    // buffer. To make this work with client-specified VBs we'd need to know if the VB was updated
+    // between draws.
+    if (kReserved_GeometrySrcType != geomSrc.fVertexSrc ||
+        kBuffer_GeometrySrcType != geomSrc.fIndexSrc) {
+        return 0;
     }
+    // Check if there is a draw info that is compatible that uses the same VB from the pool and
+    // the same IB
+    if (kDraw_Cmd != fCmds.back()) {
+        return 0;
+    }
+
+    DrawRecord* draw = &fDraws.back();
+    GeometryPoolState& poolState = fGeoPoolStateStack.back();
+    const GrVertexBuffer* vertexBuffer = poolState.fPoolVertexBuffer;
+
+    if (!draw->isInstanced() ||
+        draw->verticesPerInstance() != info.verticesPerInstance() ||
+        draw->indicesPerInstance() != info.indicesPerInstance() ||
+        draw->fVertexBuffer != vertexBuffer ||
+        draw->fIndexBuffer != geomSrc.fIndexBuffer ||
+        draw->fVertexLayout != geomSrc.fVertexLayout) {
+        return 0;
+    }
+    // info does not yet account for the offset from the start of the pool's VB while the previous
+    // draw record does.
+    int adjustedStartVertex = poolState.fPoolStartVertex + info.startVertex();
+    if (draw->startVertex() + draw->vertexCount() != adjustedStartVertex) {
+        return 0;
+    }
+
+    GrAssert(poolState.fPoolStartVertex == draw->startVertex() + draw->vertexCount());
+
+    // how many instances can be concat'ed onto draw given the size of the index buffer
+    int instancesToConcat = this->indexCountInCurrentSource() / info.indicesPerInstance();
+    instancesToConcat -= draw->instanceCount();
+    instancesToConcat = GrMin(instancesToConcat, info.instanceCount());
+
+    // update the amount of reserved vertex data actually referenced in draws
+    size_t vertexBytes = instancesToConcat * info.verticesPerInstance() *
+                         GrDrawState::VertexSize(draw->fVertexLayout);
+    poolState.fUsedPoolVertexBytes = GrMax(poolState.fUsedPoolVertexBytes, vertexBytes);
+
+    draw->adjustInstanceCount(instancesToConcat);
+    return instancesToConcat;
 }
 
+class AutoClipReenable {
+public:
+    AutoClipReenable() : fDrawState(NULL) {}
+    ~AutoClipReenable() {
+        if (NULL != fDrawState) {
+            fDrawState->enableState(GrDrawState::kClip_StateBit);
+        }
+    }
+    void set(GrDrawState* drawState) {
+        if (drawState->isClipState()) {
+            fDrawState = drawState;
+            drawState->disableState(GrDrawState::kClip_StateBit);
+        }
+    }
+private:
+    GrDrawState*    fDrawState;
+};
+
 void GrInOrderDrawBuffer::onDraw(const DrawInfo& info) {
 
-    this->resetDrawTracking();
-
     GeometryPoolState& poolState = fGeoPoolStateStack.back();
+    AutoClipReenable acr;
+
+    if (this->getDrawState().isClipState() &&
+        NULL != info.getDevBounds() &&
+        this->quickInsideClip(*info.getDevBounds())) {
+        acr.set(this->drawState());
+    }
 
     if (this->needsNewClip()) {
        this->recordClip();
@@ -386,7 +300,18 @@
         this->recordState();
     }
 
-    DrawRecord* draw = this->recordDraw(info);
+    DrawRecord* draw;
+    if (info.isInstanced()) {
+        int instancesConcated = this->concatInstancedDraw(info);
+        if (info.instanceCount() > instancesConcated) {
+            draw = this->recordDraw(info);
+            draw->adjustInstanceCount(-instancesConcated);
+        } else {
+            return;
+        }
+    } else {
+        draw = this->recordDraw(info);
+    }
     draw->fVertexLayout = this->getVertexLayout();
 
     switch (this->getGeomSrc().fVertexSrc) {
@@ -399,7 +324,7 @@
                                  GrDrawState::VertexSize(draw->fVertexLayout);
             poolState.fUsedPoolVertexBytes = GrMax(poolState.fUsedPoolVertexBytes, vertexBytes);
             draw->fVertexBuffer = poolState.fPoolVertexBuffer;
-            draw->fStartVertex += poolState.fPoolStartVertex;
+            draw->adjustStartVertex(poolState.fPoolStartVertex);
             break;
         }
         default:
@@ -417,7 +342,7 @@
                 size_t indexBytes = (info.indexCount() + info.startIndex()) * sizeof(uint16_t);
                 poolState.fUsedPoolIndexBytes = GrMax(poolState.fUsedPoolIndexBytes, indexBytes);
                 draw->fIndexBuffer = poolState.fPoolIndexBuffer;
-                draw->fStartIndex += poolState.fPoolStartIndex;
+                draw->adjustStartIndex(poolState.fPoolStartIndex);
                 break;
             }
             default:
@@ -429,7 +354,6 @@
     }
 }
 
-
 GrInOrderDrawBuffer::StencilPath::StencilPath() : fStroke(SkStrokeRec::kFill_InitStyle) {}
 
 void GrInOrderDrawBuffer::onStencilPath(const GrPath* path, const SkStrokeRec& stroke,
@@ -448,9 +372,7 @@
     sp->fStroke = stroke;
 }
 
-void GrInOrderDrawBuffer::clear(const GrIRect* rect,
-                                GrColor color,
-                                GrRenderTarget* renderTarget) {
+void GrInOrderDrawBuffer::clear(const GrIRect* rect, GrColor color, GrRenderTarget* renderTarget) {
     GrIRect r;
     if (NULL == renderTarget) {
         renderTarget = this->drawState()->getRenderTarget();
@@ -491,8 +413,6 @@
     fClips.reset();
     fClipOrigins.reset();
     fClipSet = true;
-
-    this->resetDrawTracking();
 }
 
 bool GrInOrderDrawBuffer::flushTo(GrDrawTarget* target) {
@@ -532,21 +452,11 @@
             case kDraw_Cmd: {
                 const DrawRecord& draw = fDraws[currDraw];
                 target->setVertexSourceToBuffer(draw.fVertexLayout, draw.fVertexBuffer);
-                if (draw.fIndexCount) {
+                if (draw.isIndexed()) {
                     target->setIndexSourceToBuffer(draw.fIndexBuffer);
                 }
+                target->executeDraw(draw);
 
-                if (draw.fIndexCount) {
-                    target->drawIndexed(draw.fPrimitiveType,
-                                        draw.fStartVertex,
-                                        draw.fStartIndex,
-                                        draw.fVertexCount,
-                                        draw.fIndexCount);
-                } else {
-                    target->drawNonIndexed(draw.fPrimitiveType,
-                                           draw.fStartVertex,
-                                           draw.fVertexCount);
-                }
                 ++currDraw;
                 break;
             }
@@ -774,7 +684,6 @@
     GeometryPoolState& poolState = fGeoPoolStateStack.push_back();
     poolState.fUsedPoolVertexBytes = 0;
     poolState.fUsedPoolIndexBytes = 0;
-    this->resetDrawTracking();
 #if GR_DEBUG
     poolState.fPoolVertexBuffer = (GrVertexBuffer*)~0;
     poolState.fPoolStartVertex = ~0;
@@ -802,7 +711,6 @@
         poolState.fUsedPoolIndexBytes = sizeof(uint16_t) *
                                          restoredState.fIndexCount;
     }
-    this->resetDrawTracking();
 }
 
 bool GrInOrderDrawBuffer::needsNewState() const {
@@ -834,19 +742,9 @@
     fCmds.push_back(kSetState_Cmd);
 }
 
-GrInOrderDrawBuffer::DrawRecord* GrInOrderDrawBuffer::recordDraw() {
-    fCmds.push_back(kDraw_Cmd);
-    return &fDraws.push_back();
-}
-
 GrInOrderDrawBuffer::DrawRecord* GrInOrderDrawBuffer::recordDraw(const DrawInfo& info) {
-    DrawRecord* record = this->recordDraw();
-    record->fPrimitiveType  = info.primitiveType();
-    record->fStartVertex    = info.startVertex();
-    record->fVertexCount    = info.vertexCount();
-    record->fStartIndex     = info.startIndex();
-    record->fIndexCount     = info.indexCount();
-    return record;
+    fCmds.push_back(kDraw_Cmd);
+    return &fDraws.push_back(info);
 }
 
 GrInOrderDrawBuffer::StencilPath* GrInOrderDrawBuffer::recordStencilPath() {
@@ -862,4 +760,5 @@
 void GrInOrderDrawBuffer::clipWillBeSet(const GrClipData* newClipData) {
     INHERITED::clipWillBeSet(newClipData);
     fClipSet = true;
+    fClipProxyState = kUnknown_ClipProxyState;
 }
diff --git a/src/gpu/GrInOrderDrawBuffer.h b/src/gpu/GrInOrderDrawBuffer.h
index 21011c9..daa5d06 100644
--- a/src/gpu/GrInOrderDrawBuffer.h
+++ b/src/gpu/GrInOrderDrawBuffer.h
@@ -55,13 +55,6 @@
     virtual ~GrInOrderDrawBuffer();
 
     /**
-     * Provides the buffer with an index buffer that can be used for quad rendering.
-     * The buffer may be able to batch consecutive drawRects if this is provided.
-     * @param indexBuffer   index buffer with quad indices.
-     */
-    void setQuadIndexBuffer(const GrIndexBuffer* indexBuffer);
-
-    /**
      * Empties the draw buffer of any queued up draws. This must not be called while inside an
      * unbalanced pushGeometrySource(). The current draw state and clip are preserved.
      */
@@ -89,24 +82,16 @@
     void setAutoFlushTarget(GrDrawTarget* target);
 
     // overrides from GrDrawTarget
-    virtual void drawRect(const GrRect& rect,
-                          const SkMatrix* matrix = NULL,
-                          const GrRect* srcRects[] = NULL,
-                          const SkMatrix* srcMatrices[] = NULL) SK_OVERRIDE;
-
-    virtual void drawIndexedInstances(GrPrimitiveType type,
-                                      int instanceCount,
-                                      int verticesPerInstance,
-                                      int indicesPerInstance)
-                                      SK_OVERRIDE;
-
     virtual bool geometryHints(size_t vertexSize,
                                int* vertexCount,
                                int* indexCount) const SK_OVERRIDE;
-
     virtual void clear(const GrIRect* rect,
                        GrColor color,
                        GrRenderTarget* renderTarget = NULL) SK_OVERRIDE;
+    virtual void drawRect(const GrRect& rect,
+                          const SkMatrix* matrix,
+                          const GrRect* srcRects[],
+                          const SkMatrix* srcMatrices[]) SK_OVERRIDE;
 
 protected:
     virtual void clipWillBeSet(const GrClipData* newClip) SK_OVERRIDE;
@@ -120,13 +105,9 @@
         kClear_Cmd          = 5,
     };
 
-    // TODO: Make this derive from DrawInfo
-    struct DrawRecord {
-        GrPrimitiveType         fPrimitiveType;
-        int                     fStartVertex;
-        int                     fStartIndex;
-        int                     fVertexCount;
-        int                     fIndexCount;
+    class DrawRecord : public DrawInfo {
+    public:
+        DrawRecord(const DrawInfo& info) : DrawInfo(info) {}
         GrVertexLayout          fVertexLayout;
         const GrVertexBuffer*   fVertexBuffer;
         const GrIndexBuffer*    fIndexBuffer;
@@ -170,25 +151,24 @@
     virtual void willReserveVertexAndIndexSpace(size_t vertexSize,
                                                 int vertexCount,
                                                 int indexCount) SK_OVERRIDE;
+    bool quickInsideClip(const SkRect& devBounds);
 
+    // Attempts to concat instances from info onto the previous draw. info must represent an
+    // instanced draw. The caller must have already recorded a new draw state and clip if necessary.
+    int concatInstancedDraw(const DrawInfo& info);
 
-    // we lazily record state and clip changes in order to skip clips and states
-    // that have no effect.
+    // we lazily record state and clip changes in order to skip clips and states that have no
+    // effect.
     bool needsNewState() const;
     bool needsNewClip() const;
 
     // these functions record a command
     void            recordState();
     void            recordClip();
-    DrawRecord*     recordDraw();
     DrawRecord*     recordDraw(const DrawInfo&);
     StencilPath*    recordStencilPath();
     Clear*          recordClear();
 
-    // call this to invalidate the tracking data that is used to concatenate
-    // multiple draws into a single draw.
-    void resetDrawTracking();
-
     enum {
         kCmdPreallocCnt          = 32,
         kDrawPreallocCnt         = 8,
@@ -199,6 +179,8 @@
         kGeoPoolStatePreAllocCnt = 4,
     };
 
+    SkAutoTUnref<const GrGpu> fGpu;
+
     SkSTArray<kCmdPreallocCnt, uint8_t, true>                          fCmds;
     GrSTAllocator<kDrawPreallocCnt, DrawRecord>                        fDraws;
     GrSTAllocator<kStatePreallocCnt, StencilPath>                      fStencilPaths;
@@ -212,26 +194,18 @@
 
     bool                            fClipSet;
 
+    enum ClipProxyState {
+        kUnknown_ClipProxyState,
+        kValid_ClipProxyState,
+        kInvalid_ClipProxyState
+    };
+    ClipProxyState                  fClipProxyState;
+    SkRect                          fClipProxy;
+
     GrVertexBufferAllocPool&        fVertexPool;
 
     GrIndexBufferAllocPool&         fIndexPool;
 
-    // these are used to attempt to concatenate drawRect calls
-    GrVertexLayout                  fLastRectVertexLayout;
-    const GrIndexBuffer*            fQuadIndexBuffer;
-    int                             fMaxQuads;
-    int                             fCurrQuad;
-
-    // bookkeeping to attempt to concatenate drawIndexedInstances calls
-    struct {
-        int            fVerticesPerInstance;
-        int            fIndicesPerInstance;
-        void reset() {
-            fVerticesPerInstance = 0;
-            fIndicesPerInstance = 0;
-        }
-    } fInstancedDrawTracker;
-
     struct GeometryPoolState {
         const GrVertexBuffer*           fPoolVertexBuffer;
         int                             fPoolStartVertex;