Simplify GrDrawBatch uploads and token uage.

GrVertexBatch subclasses no longer need "initDraw".

Simplifies GrTestBatch

BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1835283002

Review URL: https://codereview.chromium.org/1835283002
diff --git a/gm/beziereffects.cpp b/gm/beziereffects.cpp
index f609ba8..84e2a69 100644
--- a/gm/beziereffects.cpp
+++ b/gm/beziereffects.cpp
@@ -32,67 +32,48 @@
 class BezierCubicOrConicTestBatch : public GrTestBatch {
 public:
     DEFINE_BATCH_CLASS_ID
-    struct Geometry : public GrTestBatch::Geometry {
-        SkRect fBounds;
-    };
 
     const char* name() const override { return "BezierCubicOrConicTestBatch"; }
 
-    static GrDrawBatch* Create(const GrGeometryProcessor* gp, const Geometry& geo,
-                               const SkScalar klmEqs[9], SkScalar sign) {
-        return new BezierCubicOrConicTestBatch(gp, geo, klmEqs, sign);
-    }
-
-private:
-    BezierCubicOrConicTestBatch(const GrGeometryProcessor* gp, const Geometry& geo,
-                                const SkScalar klmEqs[9], SkScalar sign)
-        : INHERITED(ClassID(), gp, geo.fBounds) {
+    BezierCubicOrConicTestBatch(const GrGeometryProcessor* gp, const SkRect& bounds,
+                                GrColor color, const SkScalar klmEqs[9], SkScalar sign)
+        : INHERITED(ClassID(), bounds, color)
+        , fGeometryProcessor(SkRef(gp)) {
         for (int i = 0; i < 9; i++) {
             fKlmEqs[i] = klmEqs[i];
         }
-
-        fGeometry = geo;
         fSign = sign;
     }
 
+private:
+
     struct Vertex {
         SkPoint fPosition;
         float   fKLM[4]; // The last value is ignored. The effect expects a vec4f.
     };
 
-    Geometry* geoData(int index) override {
-        SkASSERT(0 == index);
-        return &fGeometry;
-    }
-
-    const Geometry* geoData(int index) const override {
-        SkASSERT(0 == index);
-        return &fGeometry;
-    }
-
-    void generateGeometry(Target* target) const override {
+    void onPrepareDraws(Target* target) const override {
         QuadHelper helper;
-        size_t vertexStride = this->geometryProcessor()->getVertexStride();
+        size_t vertexStride = fGeometryProcessor->getVertexStride();
         SkASSERT(vertexStride == sizeof(Vertex));
         Vertex* verts = reinterpret_cast<Vertex*>(helper.init(target, vertexStride, 1));
         if (!verts) {
             return;
         }
-
-        verts[0].fPosition.setRectFan(fGeometry.fBounds.fLeft, fGeometry.fBounds.fTop,
-                                      fGeometry.fBounds.fRight, fGeometry.fBounds.fBottom,
+        const SkRect& bounds = this->bounds();
+        verts[0].fPosition.setRectFan(bounds.fLeft, bounds.fTop, bounds.fRight, bounds.fBottom,
                                       sizeof(Vertex));
         for (int v = 0; v < 4; ++v) {
             verts[v].fKLM[0] = eval_line(verts[v].fPosition, fKlmEqs + 0, fSign);
             verts[v].fKLM[1] = eval_line(verts[v].fPosition, fKlmEqs + 3, fSign);
             verts[v].fKLM[2] = eval_line(verts[v].fPosition, fKlmEqs + 6, 1.f);
         }
-        helper.recordDraw(target);
+        helper.recordDraw(target, fGeometryProcessor);
     }
 
-    Geometry fGeometry;
-    SkScalar fKlmEqs[9];
-    SkScalar fSign;
+    SkScalar                                fKlmEqs[9];
+    SkScalar                                fSign;
+    SkAutoTUnref<const GrGeometryProcessor> fGeometryProcessor;
 
     static const int kVertsPerCubic = 4;
     static const int kIndicesPerCubic = 6;
@@ -218,12 +199,8 @@
                         GrPorterDuffXPFactory::Create(SkXfermode::kSrc_Mode))->unref();
                     pipelineBuilder.setRenderTarget(rt);
 
-                    BezierCubicOrConicTestBatch::Geometry geometry;
-                    geometry.fColor = color;
-                    geometry.fBounds = bounds;
-
                     SkAutoTUnref<GrDrawBatch> batch(
-                            BezierCubicOrConicTestBatch::Create(gp, geometry, klmEqs, klmSigns[c]));
+                        new BezierCubicOrConicTestBatch(gp, bounds, color, klmEqs, klmSigns[c]));
 
                     drawContext->drawContextPriv().testingOnly_drawBatch(pipelineBuilder, batch);
                 }
@@ -359,12 +336,8 @@
                         GrPorterDuffXPFactory::Create(SkXfermode::kSrc_Mode))->unref();
                     pipelineBuilder.setRenderTarget(rt);
 
-                    BezierCubicOrConicTestBatch::Geometry geometry;
-                    geometry.fColor = color;
-                    geometry.fBounds = bounds;
-
                     SkAutoTUnref<GrDrawBatch> batch(
-                            BezierCubicOrConicTestBatch::Create(gp, geometry, klmEqs, 1.f));
+                        new BezierCubicOrConicTestBatch(gp, bounds, color, klmEqs, 1.f));
 
                     drawContext->drawContextPriv().testingOnly_drawBatch(pipelineBuilder, batch);
                 }
@@ -423,57 +396,39 @@
 class BezierQuadTestBatch : public GrTestBatch {
 public:
     DEFINE_BATCH_CLASS_ID
-    struct Geometry : public GrTestBatch::Geometry {
-        SkRect fBounds;
-    };
-
     const char* name() const override { return "BezierQuadTestBatch"; }
 
-    static GrDrawBatch* Create(const GrGeometryProcessor* gp, const Geometry& geo,
-                               const GrPathUtils::QuadUVMatrix& devToUV) {
-        return new BezierQuadTestBatch(gp, geo, devToUV);
+    BezierQuadTestBatch(const GrGeometryProcessor* gp, const SkRect& bounds, GrColor color,
+                        const GrPathUtils::QuadUVMatrix& devToUV)
+        : INHERITED(ClassID(), bounds, color)
+        , fDevToUV(devToUV)
+        , fGeometryProcessor(SkRef(gp)) {
     }
 
 private:
-    BezierQuadTestBatch(const GrGeometryProcessor* gp, const Geometry& geo,
-                        const GrPathUtils::QuadUVMatrix& devToUV)
-        : INHERITED(ClassID(), gp, geo.fBounds)
-        , fGeometry(geo)
-        , fDevToUV(devToUV) {
-    }
 
     struct Vertex {
         SkPoint fPosition;
         float   fKLM[4]; // The last value is ignored. The effect expects a vec4f.
     };
 
-    Geometry* geoData(int index) override {
-        SkASSERT(0 == index);
-        return &fGeometry;
-    }
-
-    const Geometry* geoData(int index) const override {
-        SkASSERT(0 == index);
-        return &fGeometry;
-    }
-
-    void generateGeometry(Target* target) const override {
+    void onPrepareDraws(Target* target) const override {
         QuadHelper helper;
-        size_t vertexStride = this->geometryProcessor()->getVertexStride();
+        size_t vertexStride = fGeometryProcessor->getVertexStride();
         SkASSERT(vertexStride == sizeof(Vertex));
         Vertex* verts = reinterpret_cast<Vertex*>(helper.init(target, vertexStride, 1));
         if (!verts) {
             return;
         }
-        verts[0].fPosition.setRectFan(fGeometry.fBounds.fLeft, fGeometry.fBounds.fTop,
-                                      fGeometry.fBounds.fRight, fGeometry.fBounds.fBottom,
+        const SkRect& bounds = this->bounds();
+        verts[0].fPosition.setRectFan(bounds.fLeft, bounds.fTop, bounds.fRight, bounds.fBottom,
                                       sizeof(Vertex));
         fDevToUV.apply<4, sizeof(Vertex), sizeof(SkPoint)>(verts);
-        helper.recordDraw(target);
+        helper.recordDraw(target, fGeometryProcessor);
     }
 
-    Geometry fGeometry;
-    GrPathUtils::QuadUVMatrix fDevToUV;
+    GrPathUtils::QuadUVMatrix               fDevToUV;
+    SkAutoTUnref<const GrGeometryProcessor> fGeometryProcessor;
 
     static const int kVertsPerCubic = 4;
     static const int kIndicesPerCubic = 6;
@@ -595,12 +550,8 @@
 
                     GrPathUtils::QuadUVMatrix DevToUV(pts);
 
-                    BezierQuadTestBatch::Geometry geometry;
-                    geometry.fColor = color;
-                    geometry.fBounds = bounds;
-
-                    SkAutoTUnref<GrDrawBatch> batch(BezierQuadTestBatch::Create(gp, geometry,
-                                                                                DevToUV));
+                    SkAutoTUnref<GrDrawBatch> batch(
+                        new BezierQuadTestBatch(gp, bounds, color, DevToUV));
 
                     drawContext->drawContextPriv().testingOnly_drawBatch(pipelineBuilder, batch);
                 }
diff --git a/gm/convexpolyeffect.cpp b/gm/convexpolyeffect.cpp
index aec571b..f8d1849 100644
--- a/gm/convexpolyeffect.cpp
+++ b/gm/convexpolyeffect.cpp
@@ -26,45 +26,43 @@
 
 #include "effects/GrConvexPolyEffect.h"
 
-namespace skiagm {
+/** outset rendered rect to visualize anti-aliased poly edges */
+static SkRect outset(const SkRect& unsorted) {
+    SkRect r = unsorted;
+    r.outset(5.f, 5.f);
+    return r;
+}
 
-class ConvexPolyTestBatch : public GrTestBatch {
+/** sorts a rect */
+static SkRect sorted_rect(const SkRect& unsorted) {
+    SkRect r = unsorted;
+    r.sort();
+    return r;
+}
+
+namespace skiagm {
+class PolyBoundsBatch : public GrTestBatch {
 public:
     DEFINE_BATCH_CLASS_ID
-    struct Geometry : public GrTestBatch::Geometry {
-        SkRect fRect;
-        SkRect fBounds; // This will be == fRect, except fBounds must be sorted, whereas fRect can
-                        // be inverted
-    };
 
-    const char* name() const override { return "ConvexPolyTestBatch"; }
+    const char* name() const override { return "PolyBoundsBatch"; }
 
-    static GrDrawBatch* Create(const GrGeometryProcessor* gp, const Geometry& geo) {
-        return new ConvexPolyTestBatch(gp, geo);
+    PolyBoundsBatch(const SkRect& rect, GrColor color)
+        : INHERITED(ClassID(), outset(sorted_rect(rect)), color)
+        , fRect(outset(rect)) {
     }
 
 private:
-    ConvexPolyTestBatch(const GrGeometryProcessor* gp, const Geometry& geo)
-        : INHERITED(ClassID(), gp, geo.fBounds)
-        , fGeometry(geo) {
-        // Make sure any artifacts around the exterior of path are visible by using overly
-        // conservative bounding geometry.
-        fGeometry.fBounds.outset(5.f, 5.f);
-        fGeometry.fRect.outset(5.f, 5.f);
-    }
+    void onPrepareDraws(Target* target) const override {
+        using namespace GrDefaultGeoProcFactory;
 
-    Geometry* geoData(int index) override {
-        SkASSERT(0 == index);
-        return &fGeometry;
-    }
+        Color color(this->color());
+        Coverage coverage(Coverage::kSolid_Type);
+        LocalCoords localCoords(LocalCoords::kUnused_Type);
+        SkAutoTUnref<const GrGeometryProcessor> gp(
+            GrDefaultGeoProcFactory::Create(color, coverage, localCoords, SkMatrix::I()));
 
-    const Geometry* geoData(int index) const override {
-        SkASSERT(0 == index);
-        return &fGeometry;
-    }
-
-    void generateGeometry(Target* target) const override {
-        size_t vertexStride = this->geometryProcessor()->getVertexStride();
+        size_t vertexStride = gp->getVertexStride();
         SkASSERT(vertexStride == sizeof(SkPoint));
         QuadHelper helper;
         SkPoint* verts = reinterpret_cast<SkPoint*>(helper.init(target, vertexStride, 1));
@@ -72,12 +70,12 @@
             return;
         }
 
-        fGeometry.fRect.toQuad(verts);
+        fRect.toQuad(verts);
 
-        helper.recordDraw(target);
+        helper.recordDraw(target, gp);
     }
 
-    Geometry fGeometry;
+    SkRect fRect;
 
     typedef GrTestBatch INHERITED;
 };
@@ -156,7 +154,6 @@
     }
 
     void onDraw(SkCanvas* canvas) override {
-        using namespace GrDefaultGeoProcFactory;
         GrRenderTarget* rt = canvas->internal_private_accessTopLayerRenderTarget();
         if (nullptr == rt) {
             skiagm::GM::DrawGpuOnlyMessage(canvas);
@@ -172,12 +169,6 @@
             return;
         }
 
-        Color color(0xff000000);
-        Coverage coverage(Coverage::kSolid_Type);
-        LocalCoords localCoords(LocalCoords::kUnused_Type);
-        SkAutoTUnref<const GrGeometryProcessor> gp(
-                GrDefaultGeoProcFactory::Create(color, coverage, localCoords, SkMatrix::I()));
-
         SkScalar y = 0;
         static const SkScalar kDX = 12.f;
         for (PathList::Iter iter(fPaths, PathList::Iter::kHead_IterStart);
@@ -203,12 +194,7 @@
                 pipelineBuilder.addCoverageFragmentProcessor(fp);
                 pipelineBuilder.setRenderTarget(rt);
 
-                ConvexPolyTestBatch::Geometry geometry;
-                geometry.fColor = color.fColor;
-                geometry.fRect = p.getBounds();
-                geometry.fBounds = p.getBounds();
-
-                SkAutoTUnref<GrDrawBatch> batch(ConvexPolyTestBatch::Create(gp, geometry));
+                SkAutoTUnref<GrDrawBatch> batch(new PolyBoundsBatch(p.getBounds(), 0xff000000));
 
                 drawContext->drawContextPriv().testingOnly_drawBatch(pipelineBuilder, batch);
 
@@ -249,13 +235,7 @@
                 pipelineBuilder.addCoverageFragmentProcessor(fp);
                 pipelineBuilder.setRenderTarget(rt);
 
-                ConvexPolyTestBatch::Geometry geometry;
-                geometry.fColor = color.fColor;
-                geometry.fRect = rect;
-                geometry.fBounds = rect;
-                geometry.fBounds.sort();
-
-                SkAutoTUnref<GrDrawBatch> batch(ConvexPolyTestBatch::Create(gp, geometry));
+                SkAutoTUnref<GrDrawBatch> batch(new PolyBoundsBatch(rect, 0xff000000));
 
                 drawContext->drawContextPriv().testingOnly_drawBatch(pipelineBuilder, batch);
 
diff --git a/src/gpu/GrBatchAtlas.cpp b/src/gpu/GrBatchAtlas.cpp
index 9f3c4dd..40ab0e6 100644
--- a/src/gpu/GrBatchAtlas.cpp
+++ b/src/gpu/GrBatchAtlas.cpp
@@ -14,8 +14,8 @@
 
 GrBatchAtlas::BatchPlot::BatchPlot(int index, uint64_t genID, int offX, int offY, int width,
                                    int height, GrPixelConfig config)
-    : fLastUpload(0)
-    , fLastUse(0)
+    : fLastUpload(GrBatchDrawToken::AlreadyFlushedToken())
+    , fLastUse(GrBatchDrawToken::AlreadyFlushedToken())
     , fIndex(index)
     , fGenID(genID)
     , fID(CreateId(fIndex, fGenID))
@@ -78,7 +78,7 @@
     return true;
 }
 
-void GrBatchAtlas::BatchPlot::uploadToTexture(GrBatchUploader::TextureUploader* uploader,
+void GrBatchAtlas::BatchPlot::uploadToTexture(GrDrawBatch::WritePixelsFn& writePixels,
                                               GrTexture* texture) {
     // We should only be issuing uploads if we are in fact dirty
     SkASSERT(fDirty && fData && texture);
@@ -87,10 +87,8 @@
     const unsigned char* dataPtr = fData;
     dataPtr += rowBytes * fDirtyRect.fTop;
     dataPtr += fBytesPerPixel * fDirtyRect.fLeft;
-    uploader->writeTexturePixels(texture,
-                                 fOffset.fX + fDirtyRect.fLeft, fOffset.fY + fDirtyRect.fTop,
-                                 fDirtyRect.width(), fDirtyRect.height(),
-                                 fConfig, dataPtr, rowBytes);
+    writePixels(texture, fOffset.fX + fDirtyRect.fLeft, fOffset.fY + fDirtyRect.fTop,
+                fDirtyRect.width(), fDirtyRect.height(), fConfig, dataPtr, rowBytes);
     fDirtyRect.setEmpty();
     SkDEBUGCODE(fDirty = false;)
 }
@@ -112,28 +110,6 @@
     SkDEBUGCODE(fDirty = false;)
 }
 
-////////////////////////////////////////////////////////////////////////////////
-
-class GrPlotUploader : public GrBatchUploader {
-public:
-    GrPlotUploader(GrBatchAtlas::BatchPlot* plot, GrTexture* texture)
-        : INHERITED(plot->lastUploadToken())
-        , fPlot(SkRef(plot))
-        , fTexture(texture) {
-        SkASSERT(plot);
-    }
-
-    void upload(TextureUploader* uploader) override {
-        fPlot->uploadToTexture(uploader, fTexture);
-    }
-
-private:
-    SkAutoTUnref<GrBatchAtlas::BatchPlot> fPlot;
-    GrTexture* fTexture;
-
-    typedef GrBatchUploader INHERITED;
-};
-
 ///////////////////////////////////////////////////////////////////////////////
 
 GrBatchAtlas::GrBatchAtlas(GrTexture* texture, int numPlotsX, int numPlotsY)
@@ -185,15 +161,21 @@
     // If our most recent upload has already occurred then we have to insert a new
     // upload. Otherwise, we already have a scheduled upload that hasn't yet ocurred.
     // This new update will piggy back on that previously scheduled update.
-    if (target->hasTokenBeenFlushed(plot->lastUploadToken())) {
-        plot->setLastUploadToken(target->asapToken());
-        SkAutoTUnref<GrPlotUploader> uploader(new GrPlotUploader(plot, fTexture));
-        target->upload(uploader);
+    if (target->hasDrawBeenFlushed(plot->lastUploadToken())) {
+        // With c+14 we could move sk_sp into lamba to only ref once.
+        sk_sp<BatchPlot> plotsp(SkRef(plot));
+        GrTexture* texture = fTexture;
+        GrBatchDrawToken lastUploadToken = target->addAsapUpload(
+            [plotsp, texture] (GrDrawBatch::WritePixelsFn& writePixels) {
+               plotsp->uploadToTexture(writePixels, texture);
+            }
+        );
+        plot->setLastUploadToken(lastUploadToken);
     }
     *id = plot->id();
 }
 
-bool GrBatchAtlas::addToAtlas(AtlasID* id, GrDrawBatch::Target* batchTarget,
+bool GrBatchAtlas::addToAtlas(AtlasID* id, GrDrawBatch::Target* target,
                               int width, int height, const void* image, SkIPoint16* loc) {
     // We should already have a texture, TODO clean this up
     SkASSERT(fTexture);
@@ -205,7 +187,7 @@
     while ((plot = plotIter.get())) {
         SkASSERT(GrBytesPerPixel(fTexture->desc().fConfig) == plot->bpp());
         if (plot->addSubImage(width, height, image, loc)) {
-            this->updatePlot(batchTarget, id, plot);
+            this->updatePlot(target, id, plot);
             return true;
         }
         plotIter.next();
@@ -215,30 +197,26 @@
     // gpu
     plot = fPlotList.tail();
     SkASSERT(plot);
-    if (batchTarget->hasTokenBeenFlushed(plot->lastUseToken())) {
+    if (target->hasDrawBeenFlushed(plot->lastUseToken())) {
         this->processEviction(plot->id());
         plot->resetRects();
         SkASSERT(GrBytesPerPixel(fTexture->desc().fConfig) == plot->bpp());
         SkDEBUGCODE(bool verify = )plot->addSubImage(width, height, image, loc);
         SkASSERT(verify);
-        this->updatePlot(batchTarget, id, plot);
+        this->updatePlot(target, id, plot);
         fAtlasGeneration++;
         return true;
     }
 
-    // The least recently used plot hasn't been flushed to the gpu yet, however, if we have flushed
-    // it to the batch target than we can reuse it.  Our last use token is guaranteed to be less
-    // than or equal to the current token.  If its 'less than' the current token, than we can spin
-    // off the plot (ie let the batch target manage it) and create a new plot in its place in our
-    // array.  If it is equal to the currentToken, then the caller has to flush draws to the batch
-    // target so we can spin off the plot
-    if (plot->lastUseToken() == batchTarget->currentToken()) {
+    // If this plot has been used in a draw that is currently being prepared by a batch, then we
+    // have to fail. This gives the batch a chance to enqueue the draw, and call back into this
+    // function. When that draw is enqueued, the draw token advances, and the subsequent call will
+    // continue past this branch and prepare an inline upload that will occur after the enqueued
+    // draw which references the plot's pre-upload content.
+    if (plot->lastUseToken() == target->nextDrawToken()) {
         return false;
     }
 
-    SkASSERT(plot->lastUseToken() < batchTarget->currentToken());
-    SkASSERT(!batchTarget->hasTokenBeenFlushed(batchTarget->currentToken()));
-
     SkASSERT(!plot->unique());  // The GrPlotUpdater should have a ref too
 
     this->processEviction(plot->id());
@@ -253,9 +231,16 @@
 
     // Note that this plot will be uploaded inline with the draws whereas the
     // one it displaced most likely was uploaded asap.
-    newPlot->setLastUploadToken(batchTarget->currentToken());
-    SkAutoTUnref<GrPlotUploader> uploader(new GrPlotUploader(newPlot, fTexture));
-    batchTarget->upload(uploader);
+    // With c+14 we could move sk_sp into lamba to only ref once.
+    sk_sp<BatchPlot> plotsp(SkRef(newPlot.get()));
+    GrTexture* texture = fTexture;
+    GrBatchDrawToken lastUploadToken = target->addInlineUpload(
+        [plotsp, texture] (GrDrawBatch::WritePixelsFn& writePixels) {
+            plotsp->uploadToTexture(writePixels, texture);
+        }
+    );
+    newPlot->setLastUploadToken(lastUploadToken);
+
     *id = newPlot->id();
 
     fAtlasGeneration++;
diff --git a/src/gpu/GrBatchAtlas.h b/src/gpu/GrBatchAtlas.h
index 43dd692..707f463 100644
--- a/src/gpu/GrBatchAtlas.h
+++ b/src/gpu/GrBatchAtlas.h
@@ -65,7 +65,7 @@
     }
 
     // To ensure the atlas does not evict a given entry, the client must set the last use token
-    inline void setLastUseToken(AtlasID id, GrBatchToken batchToken) {
+    inline void setLastUseToken(AtlasID id, GrBatchDrawToken batchToken) {
         SkASSERT(this->hasID(id));
         uint32_t index = GetIndexFromID(id);
         SkASSERT(index < fNumPlots);
@@ -124,7 +124,7 @@
         friend class GrBatchAtlas;
     };
 
-    void setLastUseTokenBulk(const BulkUseTokenUpdater& updater, GrBatchToken batchToken) {
+    void setLastUseTokenBulk(const BulkUseTokenUpdater& updater, GrBatchDrawToken batchToken) {
         int count = updater.fPlotsToUpdate.count();
         for (int i = 0; i < count; i++) {
             BatchPlot* plot = fPlotArray[updater.fPlotsToUpdate[i]];
@@ -167,18 +167,12 @@
         // we don't need to issue a new upload even if we update the cpu backing store.  We use
         // lastUse to determine when we can evict a plot from the cache, ie if the last use has
         // already flushed through the gpu then we can reuse the plot.
-        GrBatchToken lastUploadToken() const { return fLastUpload; }
-        GrBatchToken lastUseToken() const { return fLastUse; }
-        void setLastUploadToken(GrBatchToken batchToken) {
-            SkASSERT(batchToken >= fLastUpload);
-            fLastUpload = batchToken;
-        }
-        void setLastUseToken(GrBatchToken batchToken) {
-            SkASSERT(batchToken >= fLastUse);
-            fLastUse = batchToken;
-        }
+        GrBatchDrawToken lastUploadToken() const { return fLastUpload; }
+        GrBatchDrawToken lastUseToken() const { return fLastUse; }
+        void setLastUploadToken(GrBatchDrawToken batchToken) { fLastUpload = batchToken; }
+        void setLastUseToken(GrBatchDrawToken batchToken) { fLastUse = batchToken; }
 
-        void uploadToTexture(GrBatchUploader::TextureUploader* uploader, GrTexture* texture);
+        void uploadToTexture(GrDrawBatch::WritePixelsFn&, GrTexture* texture);
         void resetRects();
 
     private:
@@ -199,8 +193,8 @@
             return generation << 16 | index;
         }
 
-        GrBatchToken          fLastUpload;
-        GrBatchToken          fLastUse;
+        GrBatchDrawToken      fLastUpload;
+        GrBatchDrawToken      fLastUse;
 
         const uint32_t        fIndex;
         uint64_t              fGenID;
@@ -246,8 +240,6 @@
 
     inline void processEviction(AtlasID);
 
-    friend class GrPlotUploader; // to access GrBatchPlot
-
     GrTexture* fTexture;
     SkDEBUGCODE(uint32_t fNumPlots;)
 
diff --git a/src/gpu/GrBatchFlushState.cpp b/src/gpu/GrBatchFlushState.cpp
index f01d888..c4ba264 100644
--- a/src/gpu/GrBatchFlushState.cpp
+++ b/src/gpu/GrBatchFlushState.cpp
@@ -12,11 +12,10 @@
 
 GrBatchFlushState::GrBatchFlushState(GrGpu* gpu, GrResourceProvider* resourceProvider)
     : fGpu(gpu)
-    , fUploader(gpu)
     , fResourceProvider(resourceProvider)
     , fVertexPool(gpu)
     , fIndexPool(gpu)
-    , fCurrentToken(0)
+    , fLastIssuedToken(GrBatchDrawToken::AlreadyFlushedToken())
     , fLastFlushedToken(0) {}
 
 void* GrBatchFlushState::makeVertexSpace(size_t vertexSize, int vertexCount,
diff --git a/src/gpu/GrBatchFlushState.h b/src/gpu/GrBatchFlushState.h
index be9d790..0b2e2bd 100644
--- a/src/gpu/GrBatchFlushState.h
+++ b/src/gpu/GrBatchFlushState.h
@@ -13,34 +13,6 @@
 
 class GrResourceProvider;
 
-/** Simple class that performs the upload on behalf of a GrBatchUploader. */
-class GrBatchUploader::TextureUploader {
-public:
-    TextureUploader(GrGpu* gpu) : fGpu(gpu) { SkASSERT(gpu); }
-
-    /**
-        * Updates the pixels in a rectangle of a texture.
-        *
-        * @param left          left edge of the rectangle to write (inclusive)
-        * @param top           top edge of the rectangle to write (inclusive)
-        * @param width         width of rectangle to write in pixels.
-        * @param height        height of rectangle to write in pixels.
-        * @param config        the pixel config of the source buffer
-        * @param buffer        memory to read pixels from
-        * @param rowBytes      number of bytes between consecutive rows. Zero
-        *                      means rows are tightly packed.
-        */
-    bool writeTexturePixels(GrTexture* texture,
-                            int left, int top, int width, int height,
-                            GrPixelConfig config, const void* buffer,
-                            size_t rowBytes) {
-        return fGpu->writePixels(texture, left, top, width, height, config, buffer, rowBytes);
-    }
-
-private:
-    GrGpu* fGpu;
-};
-
 /** Tracks the state across all the GrBatches in a GrDrawTarget flush. */
 class GrBatchFlushState {
 public:
@@ -48,32 +20,37 @@
 
     ~GrBatchFlushState() { this->reset(); }
 
-    void advanceToken() { ++fCurrentToken; }
-
-    void advanceLastFlushedToken() { ++fLastFlushedToken; }
-
-    /** Inserts an upload to be executred after all batches in the flush prepared their draws
+    /** Inserts an upload to be executed after all batches in the flush prepared their draws
         but before the draws are executed to the backend 3D API. */
-    void addASAPUpload(GrBatchUploader* upload) {
-        fAsapUploads.push_back().reset(SkRef(upload));
+    void addASAPUpload(GrDrawBatch::DeferredUploadFn&& upload) {
+        fAsapUploads.emplace_back(std::move(upload));
     }
 
     const GrCaps& caps() const { return *fGpu->caps(); }
     GrResourceProvider* resourceProvider() const { return fResourceProvider; }
 
     /** Has the token been flushed to the backend 3D API. */
-    bool hasTokenBeenFlushed(GrBatchToken token) const { return fLastFlushedToken >= token; }
+    bool hasDrawBeenFlushed(GrBatchDrawToken token) const {
+        return token.fSequenceNumber <= fLastFlushedToken.fSequenceNumber;
+    }
 
-    /** The current token advances once for every contiguous set of uninterrupted draws prepared
-        by a batch. */
-    GrBatchToken currentToken() const { return fCurrentToken; }
+    /** Issue a token to an operation that is being enqueued. */
+    GrBatchDrawToken issueDrawToken() {
+        return GrBatchDrawToken(++fLastIssuedToken.fSequenceNumber);
+    }
+
+    /** Call every time a draw that was issued a token is flushed */
+    void flushToken() { ++fLastFlushedToken.fSequenceNumber; }
+
+    /** Gets the next draw token that will be issued. */
+    GrBatchDrawToken nextDrawToken() const {
+        return GrBatchDrawToken(fLastIssuedToken.fSequenceNumber + 1);
+    }
 
     /** The last token flushed to all the way to the backend API. */
-    GrBatchToken lastFlushedToken() const { return fLastFlushedToken; }
-
-    /** This is a magic token that can be used to indicate that an upload should occur before
-        any draws for any batch in the current flush execute. */
-    GrBatchToken asapToken() const { return fLastFlushedToken + 1; }
+    GrBatchDrawToken nextTokenToFlush() const {
+        return GrBatchDrawToken(fLastFlushedToken.fSequenceNumber + 1);
+    }
 
     void* makeVertexSpace(size_t vertexSize, int vertexCount,
                           const GrBuffer** buffer, int* startVertex);
@@ -85,18 +62,28 @@
         fVertexPool.unmap();
         fIndexPool.unmap();
         int uploadCount = fAsapUploads.count();
+
         for (int i = 0; i < uploadCount; i++) {
-            fAsapUploads[i]->upload(&fUploader);
+            this->doUpload(fAsapUploads[i]);
         }
         fAsapUploads.reset();
     }
 
+    void doUpload(GrDrawBatch::DeferredUploadFn& upload) {
+        GrDrawBatch::WritePixelsFn wp = [this] (GrSurface* surface,
+                int left, int top, int width, int height,
+                GrPixelConfig config, const void* buffer,
+                size_t rowBytes) -> bool {
+            return this->fGpu->writePixels(surface, left, top, width, height, config, buffer,
+                                           rowBytes);
+        };
+        upload(wp);
+    }
+
     void putBackIndices(size_t indices) { fIndexPool.putBack(indices * sizeof(uint16_t)); }
 
     void putBackVertexSpace(size_t sizeInBytes) { fVertexPool.putBack(sizeInBytes); }
 
-    GrBatchUploader::TextureUploader* uploader() { return &fUploader; }
-
     GrGpu* gpu() { return fGpu; }
 
     void reset() {
@@ -105,22 +92,44 @@
     }
 
 private:
-    GrGpu*                                          fGpu;
-    GrBatchUploader::TextureUploader                fUploader;
 
-    GrResourceProvider*                             fResourceProvider;
+    GrGpu*                                              fGpu;
 
-    GrVertexBufferAllocPool                         fVertexPool;
-    GrIndexBufferAllocPool                          fIndexPool;
+    GrResourceProvider*                                 fResourceProvider;
 
-    SkTArray<SkAutoTUnref<GrBatchUploader>, true>   fAsapUploads;
+    GrVertexBufferAllocPool                             fVertexPool;
+    GrIndexBufferAllocPool                              fIndexPool;
 
-    GrBatchToken                                    fCurrentToken;
+    SkSTArray<4, GrDrawBatch::DeferredUploadFn>         fAsapUploads;
 
-    GrBatchToken                                    fLastFlushedToken;
+    GrBatchDrawToken                                    fLastIssuedToken;
+
+    GrBatchDrawToken                                    fLastFlushedToken;
 };
 
 /**
+ * A word about uploads and tokens: Batches should usually schedule their uploads to occur at the
+ * begining of a frame whenever possible. These are called ASAP uploads. Of course, this requires
+ * that there are no draws that have yet to be flushed that rely on the old texture contents. In
+ * that case the ASAP upload would happen prior to the previous draw causing the draw to read the
+ * new (wrong) texture data. In that case they should schedule an inline upload.
+ *
+ * Batches, in conjunction with helpers such as GrBatchAtlas, can use the token system to know
+ * what the most recent draw was that referenced a resource (or portion of a resource). Each draw
+ * is assigned a token. A resource (or portion) can be tagged with the most recent draw's
+ * token. The target provides a facility for testing whether the draw corresponding to the token
+ * has been flushed. If it has not been flushed then the batch must perform an inline upload
+ * instead. When scheduling an inline upload the batch provides the token of the draw that the
+ * upload must occur before. The upload will then occur between the draw that requires the new
+ * data but after the token that requires the old data.
+ *
+ * TODO: Currently the token/upload interface is spread over GrDrawBatch, GrVertexBatch,
+ * GrDrawBatch::Target, and GrVertexBatch::Target. However, the interface at the GrDrawBatch
+ * level is not complete and isn't useful. We should push it down to GrVertexBatch until it
+ * is required at the GrDrawBatch level.
+ */
+ 
+/**
  * GrDrawBatch instances use this object to allocate space for their geometry and to issue the draws
  * that render their batch.
  */
@@ -128,19 +137,28 @@
 public:
     Target(GrBatchFlushState* state, GrDrawBatch* batch) : fState(state), fBatch(batch) {}
 
-    void upload(GrBatchUploader* upload) {
-        if (this->asapToken() == upload->lastUploadToken()) {
-            fState->addASAPUpload(upload);
-        } else {
-            fBatch->fInlineUploads.push_back().reset(SkRef(upload));
-        }
+    /** Returns the token of the draw that this upload will occur before. */
+    GrBatchDrawToken addInlineUpload(DeferredUploadFn&& upload) {
+        fBatch->fInlineUploads.emplace_back(std::move(upload), fState->nextDrawToken());
+        return fBatch->fInlineUploads.back().fUploadBeforeToken;
     }
 
-    bool hasTokenBeenFlushed(GrBatchToken token) const {
-        return fState->hasTokenBeenFlushed(token);
+    /** Returns the token of the draw that this upload will occur before. Since ASAP uploads
+        are done first during a flush, this will be the first token since the most recent
+        flush. */
+    GrBatchDrawToken addAsapUpload(DeferredUploadFn&& upload) {
+        fState->addASAPUpload(std::move(upload));
+        return fState->nextTokenToFlush();
     }
-    GrBatchToken currentToken() const { return fState->currentToken(); }
-    GrBatchToken asapToken() const { return fState->asapToken(); }
+
+    bool hasDrawBeenFlushed(GrBatchDrawToken token) const {
+        return fState->hasDrawBeenFlushed(token);
+    }
+
+    /** Gets the next draw token that will be issued by this target. This can be used by a batch
+        to record that the next draw it issues will use a resource (e.g. texture) while preparing
+        that draw. */
+    GrBatchDrawToken nextDrawToken() const { return fState->nextDrawToken(); }
 
     const GrCaps& caps() const { return fState->caps(); }
 
@@ -161,15 +179,7 @@
 public:
     Target(GrBatchFlushState* state, GrVertexBatch* batch) : INHERITED(state, batch) {}
 
-    void initDraw(const GrPrimitiveProcessor* primProc) {
-        GrVertexBatch::DrawArray* draws = this->vertexBatch()->fDrawArrays.addToTail();
-        draws->fPrimitiveProcessor.reset(primProc);
-        this->state()->advanceToken();
-    }
-
-    void draw(const GrMesh& mesh) {
-        this->vertexBatch()->fDrawArrays.tail()->fDraws.push_back(mesh);
-    }
+    void draw(const GrGeometryProcessor* gp, const GrMesh& mesh);
 
     void* makeVertexSpace(size_t vertexSize, int vertexCount,
                           const GrBuffer** buffer, int* startVertex) {
diff --git a/src/gpu/GrDefaultGeoProcFactory.h b/src/gpu/GrDefaultGeoProcFactory.h
index bf2db40..23bcb45 100644
--- a/src/gpu/GrDefaultGeoProcFactory.h
+++ b/src/gpu/GrDefaultGeoProcFactory.h
@@ -10,8 +10,6 @@
 
 #include "GrGeometryProcessor.h"
 
-class GrDrawState;
-
 /*
  * A factory for creating default Geometry Processors which simply multiply position by the uniform
  * view matrix and wire through color, coverage, UV coords if requested.  Right now this is only
diff --git a/src/gpu/GrDrawingManager.cpp b/src/gpu/GrDrawingManager.cpp
index 6efc286..976fbb6 100644
--- a/src/gpu/GrDrawingManager.cpp
+++ b/src/gpu/GrDrawingManager.cpp
@@ -84,7 +84,7 @@
         fDrawTargets[i]->drawBatches(&fFlushState);
     }
 
-    SkASSERT(fFlushState.lastFlushedToken() == fFlushState.currentToken());
+    SkASSERT(fFlushState.nextDrawToken() == fFlushState.nextTokenToFlush());
 
     for (int i = 0; i < fDrawTargets.count(); ++i) {
         fDrawTargets[i]->reset();
diff --git a/src/gpu/GrOvalRenderer.cpp b/src/gpu/GrOvalRenderer.cpp
index 605ad6e..2accbf6 100644
--- a/src/gpu/GrOvalRenderer.cpp
+++ b/src/gpu/GrOvalRenderer.cpp
@@ -607,8 +607,6 @@
         // Setup geometry processor
         SkAutoTUnref<GrGeometryProcessor> gp(new CircleGeometryProcessor(fStroked, localMatrix));
 
-        target->initDraw(gp);
-
         int instanceCount = fGeoData.count();
         size_t vertexStride = gp->getVertexStride();
         SkASSERT(vertexStride == sizeof(CircleVertex));
@@ -656,7 +654,7 @@
 
             verts += kVerticesPerQuad;
         }
-        helper.recordDraw(target);
+        helper.recordDraw(target, gp);
     }
 
     bool onCombineIfPossible(GrBatch* t, const GrCaps& caps) override {
@@ -793,8 +791,6 @@
         // Setup geometry processor
         SkAutoTUnref<GrGeometryProcessor> gp(new EllipseGeometryProcessor(fStroked, localMatrix));
 
-        target->initDraw(gp);
-
         int instanceCount = fGeoData.count();
         QuadHelper helper;
         size_t vertexStride = gp->getVertexStride();
@@ -847,7 +843,7 @@
 
             verts += kVerticesPerQuad;
         }
-        helper.recordDraw(target);
+        helper.recordDraw(target, gp);
     }
 
     bool onCombineIfPossible(GrBatch* t, const GrCaps& caps) override {
@@ -1011,8 +1007,6 @@
         SkAutoTUnref<GrGeometryProcessor> gp(new DIEllipseGeometryProcessor(this->viewMatrix(),
                                                                             this->style()));
 
-        target->initDraw(gp);
-
         int instanceCount = fGeoData.count();
         size_t vertexStride = gp->getVertexStride();
         SkASSERT(vertexStride == sizeof(DIEllipseVertex));
@@ -1061,7 +1055,7 @@
 
             verts += kVerticesPerQuad;
         }
-        helper.recordDraw(target);
+        helper.recordDraw(target, gp);
     }
 
     DIEllipseBatch(const Geometry& geometry, const SkRect& bounds) : INHERITED(ClassID()) {
@@ -1278,8 +1272,6 @@
         // Setup geometry processor
         SkAutoTUnref<GrGeometryProcessor> gp(new CircleGeometryProcessor(fStroked, localMatrix));
 
-        target->initDraw(gp);
-
         int instanceCount = fGeoData.count();
         size_t vertexStride = gp->getVertexStride();
         SkASSERT(vertexStride == sizeof(CircleVertex));
@@ -1347,7 +1339,7 @@
             }
         }
 
-        helper.recordDraw(target);
+        helper.recordDraw(target, gp);
     }
 
     bool onCombineIfPossible(GrBatch* t, const GrCaps& caps) override {
@@ -1426,8 +1418,6 @@
         // Setup geometry processor
         SkAutoTUnref<GrGeometryProcessor> gp(new EllipseGeometryProcessor(fStroked, localMatrix));
 
-        target->initDraw(gp);
-
         int instanceCount = fGeoData.count();
         size_t vertexStride = gp->getVertexStride();
         SkASSERT(vertexStride == sizeof(EllipseVertex));
@@ -1506,7 +1496,7 @@
                 verts++;
             }
         }
-        helper.recordDraw(target);
+        helper.recordDraw(target, gp);
     }
 
     bool onCombineIfPossible(GrBatch* t, const GrCaps& caps) override {
diff --git a/src/gpu/batches/GrAAConvexPathRenderer.cpp b/src/gpu/batches/GrAAConvexPathRenderer.cpp
index 51c1c55..8c55de7 100644
--- a/src/gpu/batches/GrAAConvexPathRenderer.cpp
+++ b/src/gpu/batches/GrAAConvexPathRenderer.cpp
@@ -792,8 +792,6 @@
             return;
         }
 
-        target->initDraw(gp);
-
         size_t vertexStride = gp->getVertexStride();
 
         SkASSERT(canTweakAlphaForCoverage ?
@@ -839,7 +837,7 @@
                              vertexBuffer, indexBuffer,
                              firstVertex, firstIndex,
                              tess.numPts(), tess.numIndices());
-            target->draw(mesh);
+            target->draw(gp, mesh);
         }
     }
 
@@ -863,8 +861,6 @@
         SkAutoTUnref<GrGeometryProcessor> quadProcessor(
                 QuadEdgeEffect::Create(this->color(), invert, this->usesLocalCoords()));
 
-        target->initDraw(quadProcessor);
-
         // TODO generate all segments for all paths and use one vertex buffer
         for (int i = 0; i < instanceCount; i++) {
             const Geometry& args = fGeoData[i];
@@ -929,7 +925,7 @@
                 const Draw& draw = draws[j];
                 mesh.initIndexed(kTriangles_GrPrimitiveType, vertexBuffer, indexBuffer,
                                  firstVertex, firstIndex, draw.fVertexCnt, draw.fIndexCnt);
-                target->draw(mesh);
+                target->draw(quadProcessor, mesh);
                 firstVertex += draw.fVertexCnt;
                 firstIndex += draw.fIndexCnt;
             }
diff --git a/src/gpu/batches/GrAADistanceFieldPathRenderer.cpp b/src/gpu/batches/GrAADistanceFieldPathRenderer.cpp
index f10d3b8..35d1f61 100644
--- a/src/gpu/batches/GrAADistanceFieldPathRenderer.cpp
+++ b/src/gpu/batches/GrAADistanceFieldPathRenderer.cpp
@@ -176,8 +176,9 @@
     }
 
     struct FlushInfo {
-        SkAutoTUnref<const GrBuffer> fVertexBuffer;
-        SkAutoTUnref<const GrBuffer> fIndexBuffer;
+        SkAutoTUnref<const GrBuffer>            fVertexBuffer;
+        SkAutoTUnref<const GrBuffer>            fIndexBuffer;
+        SkAutoTUnref<const GrGeometryProcessor> fGeometryProcessor;
         int fVertexOffset;
         int fInstancesToFlush;
     };
@@ -198,9 +199,11 @@
 
         GrTextureParams params(SkShader::kRepeat_TileMode, GrTextureParams::kBilerp_FilterMode);
 
+        FlushInfo flushInfo;
+
         // Setup GrGeometryProcessor
         GrBatchAtlas* atlas = fAtlas;
-        SkAutoTUnref<GrGeometryProcessor> dfProcessor(
+        flushInfo.fGeometryProcessor.reset(
                 GrDistanceFieldPathGeoProc::Create(this->color(),
                                                    this->viewMatrix(),
                                                    atlas->getTexture(),
@@ -208,12 +211,8 @@
                                                    flags,
                                                    this->usesLocalCoords()));
 
-        target->initDraw(dfProcessor);
-
-        FlushInfo flushInfo;
-
         // allocate vertices
-        size_t vertexStride = dfProcessor->getVertexStride();
+        size_t vertexStride = flushInfo.fGeometryProcessor->getVertexStride();
         SkASSERT(vertexStride == 2 * sizeof(SkPoint) + sizeof(GrColor));
 
         const GrBuffer* vertexBuffer;
@@ -259,8 +258,6 @@
                 SkScalar scale = desiredDimension/maxDim;
                 pathData = new PathData;
                 if (!this->addPathToAtlas(target,
-                                          dfProcessor,
-                                          this->pipeline(),
                                           &flushInfo,
                                           atlas,
                                           pathData,
@@ -275,15 +272,13 @@
                 }
             }
 
-            atlas->setLastUseToken(pathData->fID, target->currentToken());
+            atlas->setLastUseToken(pathData->fID, target->nextDrawToken());
 
             // Now set vertices
             intptr_t offset = reinterpret_cast<intptr_t>(vertices);
             offset += i * kVerticesPerQuad * vertexStride;
             this->writePathVertices(target,
                                     atlas,
-                                    this->pipeline(),
-                                    dfProcessor,
                                     offset,
                                     args.fColor,
                                     vertexStride,
@@ -316,8 +311,6 @@
     }
 
     bool addPathToAtlas(GrVertexBatch::Target* target,
-                        const GrGeometryProcessor* dfProcessor,
-                        const GrPipeline* pipeline,
                         FlushInfo* flushInfo,
                         GrBatchAtlas* atlas,
                         PathData* pathData,
@@ -406,7 +399,6 @@
                                          &atlasLocation);
         if (!success) {
             this->flush(target, flushInfo);
-            target->initDraw(dfProcessor);
 
             SkDEBUGCODE(success =) atlas->addToAtlas(&id, target, width, height,
                                                      dfStorage.get(), &atlasLocation);
@@ -443,8 +435,6 @@
 
     void writePathVertices(GrDrawBatch::Target* target,
                            GrBatchAtlas* atlas,
-                           const GrPipeline* pipeline,
-                           const GrGeometryProcessor* gp,
                            intptr_t offset,
                            GrColor color,
                            size_t vertexStride,
@@ -496,7 +486,7 @@
         mesh.initInstanced(kTriangles_GrPrimitiveType, flushInfo->fVertexBuffer,
             flushInfo->fIndexBuffer, flushInfo->fVertexOffset, kVerticesPerQuad,
             kIndicesPerQuad, flushInfo->fInstancesToFlush, maxInstancesPerDraw);
-        target->draw(mesh);
+        target->draw(flushInfo->fGeometryProcessor, mesh);
         flushInfo->fVertexOffset += kVerticesPerQuad * flushInfo->fInstancesToFlush;
         flushInfo->fInstancesToFlush = 0;
     }
diff --git a/src/gpu/batches/GrAAHairLinePathRenderer.cpp b/src/gpu/batches/GrAAHairLinePathRenderer.cpp
index 9056e3c..effd8c3 100644
--- a/src/gpu/batches/GrAAHairLinePathRenderer.cpp
+++ b/src/gpu/batches/GrAAHairLinePathRenderer.cpp
@@ -806,37 +806,6 @@
         toSrc = &invert;
     }
 
-    SkAutoTUnref<const GrGeometryProcessor> lineGP;
-    {
-        using namespace GrDefaultGeoProcFactory;
-
-        Color color(this->color());
-        Coverage coverage(Coverage::kAttribute_Type);
-        LocalCoords localCoords(this->usesLocalCoords() ? LocalCoords::kUsePosition_Type :
-                                                          LocalCoords::kUnused_Type);
-        localCoords.fMatrix = geometryProcessorLocalM;
-        lineGP.reset(GrDefaultGeoProcFactory::Create(color, coverage, localCoords,
-                                                     *geometryProcessorViewM));
-    }
-
-    SkAutoTUnref<const GrGeometryProcessor> quadGP(
-            GrQuadEffect::Create(this->color(),
-                                 *geometryProcessorViewM,
-                                 kHairlineAA_GrProcessorEdgeType,
-                                 target->caps(),
-                                 *geometryProcessorLocalM,
-                                 this->usesLocalCoords(),
-                                 this->coverage()));
-
-    SkAutoTUnref<const GrGeometryProcessor> conicGP(
-            GrConicEffect::Create(this->color(),
-                                  *geometryProcessorViewM,
-                                  kHairlineAA_GrProcessorEdgeType,
-                                  target->caps(),
-                                  *geometryProcessorLocalM,
-                                  this->usesLocalCoords(),
-                                  this->coverage()));
-
     // This is hand inlined for maximum performance.
     PREALLOC_PTARRAY(128) lines;
     PREALLOC_PTARRAY(128) quads;
@@ -857,9 +826,21 @@
 
     // do lines first
     if (lineCount) {
+        SkAutoTUnref<const GrGeometryProcessor> lineGP;
+        {
+            using namespace GrDefaultGeoProcFactory;
+
+            Color color(this->color());
+            Coverage coverage(Coverage::kAttribute_Type);
+            LocalCoords localCoords(this->usesLocalCoords() ? LocalCoords::kUsePosition_Type :
+                                    LocalCoords::kUnused_Type);
+            localCoords.fMatrix = geometryProcessorLocalM;
+            lineGP.reset(GrDefaultGeoProcFactory::Create(color, coverage, localCoords,
+                                                         *geometryProcessorViewM));
+        }
+
         SkAutoTUnref<const GrBuffer> linesIndexBuffer(
             ref_lines_index_buffer(target->resourceProvider()));
-        target->initDraw(lineGP);
 
         const GrBuffer* vertexBuffer;
         int firstVertex;
@@ -880,16 +861,32 @@
             add_line(&lines[2*i], toSrc, this->coverage(), &verts);
         }
 
-        {
-            GrMesh mesh;
-            mesh.initInstanced(kTriangles_GrPrimitiveType, vertexBuffer, linesIndexBuffer,
-                               firstVertex, kLineSegNumVertices, kIdxsPerLineSeg, lineCount,
-                               kLineSegsNumInIdxBuffer);
-            target->draw(mesh);
-        }
+        GrMesh mesh;
+        mesh.initInstanced(kTriangles_GrPrimitiveType, vertexBuffer, linesIndexBuffer,
+                           firstVertex, kLineSegNumVertices, kIdxsPerLineSeg, lineCount,
+                           kLineSegsNumInIdxBuffer);
+        target->draw(lineGP, mesh);
     }
 
     if (quadCount || conicCount) {
+        SkAutoTUnref<const GrGeometryProcessor> quadGP(
+            GrQuadEffect::Create(this->color(),
+                                 *geometryProcessorViewM,
+                                 kHairlineAA_GrProcessorEdgeType,
+                                 target->caps(),
+                                 *geometryProcessorLocalM,
+                                 this->usesLocalCoords(),
+                                 this->coverage()));
+
+        SkAutoTUnref<const GrGeometryProcessor> conicGP(
+            GrConicEffect::Create(this->color(),
+                                  *geometryProcessorViewM,
+                                  kHairlineAA_GrProcessorEdgeType,
+                                  target->caps(),
+                                  *geometryProcessorLocalM,
+                                  this->usesLocalCoords(),
+                                  this->coverage()));
+
         const GrBuffer* vertexBuffer;
         int firstVertex;
 
@@ -921,28 +918,20 @@
         }
 
         if (quadCount > 0) {
-            target->initDraw(quadGP);
-
-            {
-                GrMesh mesh;
-                mesh.initInstanced(kTriangles_GrPrimitiveType, vertexBuffer, quadsIndexBuffer,
-                                   firstVertex, kQuadNumVertices, kIdxsPerQuad, quadCount,
-                                   kQuadsNumInIdxBuffer);
-                target->draw(mesh);
-                firstVertex += quadCount * kQuadNumVertices;
-           }
+            GrMesh mesh;
+            mesh.initInstanced(kTriangles_GrPrimitiveType, vertexBuffer, quadsIndexBuffer,
+                               firstVertex, kQuadNumVertices, kIdxsPerQuad, quadCount,
+                               kQuadsNumInIdxBuffer);
+            target->draw(quadGP, mesh);
+            firstVertex += quadCount * kQuadNumVertices;
         }
 
         if (conicCount > 0) {
-            target->initDraw(conicGP);
-
-            {
-                GrMesh mesh;
-                mesh.initInstanced(kTriangles_GrPrimitiveType, vertexBuffer, quadsIndexBuffer,
-                                   firstVertex, kQuadNumVertices, kIdxsPerQuad, conicCount,
-                                   kQuadsNumInIdxBuffer);
-                target->draw(mesh);
-            }
+            GrMesh mesh;
+            mesh.initInstanced(kTriangles_GrPrimitiveType, vertexBuffer, quadsIndexBuffer,
+                               firstVertex, kQuadNumVertices, kIdxsPerQuad, conicCount,
+                               kQuadsNumInIdxBuffer);
+            target->draw(conicGP, mesh);
         }
     }
 }
diff --git a/src/gpu/batches/GrAALinearizingConvexPathRenderer.cpp b/src/gpu/batches/GrAALinearizingConvexPathRenderer.cpp
index bd4353e..446f67f 100644
--- a/src/gpu/batches/GrAALinearizingConvexPathRenderer.cpp
+++ b/src/gpu/batches/GrAALinearizingConvexPathRenderer.cpp
@@ -158,7 +158,7 @@
         fBatch.fCanTweakAlphaForCoverage = overrides.canTweakAlphaForCoverage();
     }
 
-    void draw(GrVertexBatch::Target* target, const GrPipeline* pipeline, int vertexCount,
+    void draw(GrVertexBatch::Target* target, const GrGeometryProcessor* gp, int vertexCount,
               size_t vertexStride, void* vertices, int indexCount, uint16_t* indices) const {
         if (vertexCount == 0 || indexCount == 0) {
             return;
@@ -184,7 +184,7 @@
         memcpy(idxs, indices, indexCount * sizeof(uint16_t));
         mesh.initIndexed(kTriangles_GrPrimitiveType, vertexBuffer, indexBuffer, firstVertex,
                          firstIndex, vertexCount, indexCount);
-        target->draw(mesh);
+        target->draw(gp, mesh);
     }
 
     void onPrepareDraws(Target* target) const override {
@@ -200,8 +200,6 @@
             return;
         }
 
-        target->initDraw(gp);
-
         size_t vertexStride = gp->getVertexStride();
 
         SkASSERT(canTweakAlphaForCoverage ?
@@ -229,8 +227,7 @@
             if (indexCount + currentIndices > UINT16_MAX) {
                 // if we added the current instance, we would overflow the indices we can store in a
                 // uint16_t. Draw what we've got so far and reset.
-                this->draw(target, this->pipeline(), vertexCount, vertexStride, vertices,
-                           indexCount, indices);
+                this->draw(target, gp, vertexCount, vertexStride, vertices, indexCount, indices);
                 vertexCount = 0;
                 indexCount = 0;
             }
@@ -249,8 +246,7 @@
             vertexCount += currentVertices;
             indexCount += currentIndices;
         }
-        this->draw(target, this->pipeline(), vertexCount, vertexStride, vertices, indexCount,
-                   indices);
+        this->draw(target, gp, vertexCount, vertexStride, vertices, indexCount, indices);
         sk_free(vertices);
         sk_free(indices);
     }
diff --git a/src/gpu/batches/GrAAStrokeRectBatch.cpp b/src/gpu/batches/GrAAStrokeRectBatch.cpp
index 90f8165..e4c4062 100644
--- a/src/gpu/batches/GrAAStrokeRectBatch.cpp
+++ b/src/gpu/batches/GrAAStrokeRectBatch.cpp
@@ -192,8 +192,6 @@
         return;
     }
 
-    target->initDraw(gp);
-
     size_t vertexStride = gp->getVertexStride();
 
     SkASSERT(canTweakAlphaForCoverage ?
@@ -231,7 +229,7 @@
                                            args.fDegenerate,
                                            canTweakAlphaForCoverage);
     }
-    helper.recordDraw(target);
+    helper.recordDraw(target, gp);
 }
 
 const GrBuffer* AAStrokeRectBatch::GetIndexBuffer(GrResourceProvider* resourceProvider,
diff --git a/src/gpu/batches/GrAtlasTextBatch.cpp b/src/gpu/batches/GrAtlasTextBatch.cpp
index 0bbf8c0..ca6b99e 100644
--- a/src/gpu/batches/GrAtlasTextBatch.cpp
+++ b/src/gpu/batches/GrAtlasTextBatch.cpp
@@ -97,27 +97,25 @@
 
     GrMaskFormat maskFormat = this->maskFormat();
 
-    SkAutoTUnref<const GrGeometryProcessor> gp;
+    FlushInfo flushInfo;
     if (this->usesDistanceFields()) {
-        gp.reset(this->setupDfProcessor(this->viewMatrix(), fFilteredColor, this->color(),
-                                        texture));
+        flushInfo.fGeometryProcessor.reset(
+            this->setupDfProcessor(this->viewMatrix(), fFilteredColor, this->color(), texture));
     } else {
         GrTextureParams params(SkShader::kClamp_TileMode, GrTextureParams::kNone_FilterMode);
-        gp.reset(GrBitmapTextGeoProc::Create(this->color(),
-                                             texture,
-                                             params,
-                                             maskFormat,
-                                             localMatrix,
-                                             this->usesLocalCoords()));
+        flushInfo.fGeometryProcessor.reset(
+            GrBitmapTextGeoProc::Create(this->color(),
+                                        texture,
+                                        params,
+                                        maskFormat,
+                                        localMatrix,
+                                        this->usesLocalCoords()));
     }
 
-    FlushInfo flushInfo;
     flushInfo.fGlyphsToFlush = 0;
-    size_t vertexStride = gp->getVertexStride();
+    size_t vertexStride = flushInfo.fGeometryProcessor->getVertexStride();
     SkASSERT(vertexStride == GrAtlasTextBlob::GetVertexStride(maskFormat));
 
-    target->initDraw(gp);
-
     int glyphCount = this->numGlyphs();
     const GrBuffer* vertexBuffer;
 
@@ -141,7 +139,7 @@
     GrFontScaler* scaler = nullptr;
     SkTypeface* typeface = nullptr;
 
-    GrBlobRegenHelper helper(this, target, &flushInfo, gp);
+    GrBlobRegenHelper helper(this, target, &flushInfo);
 
     for (int i = 0; i < fGeoCount; i++) {
         const Geometry& args = fGeoData[i];
@@ -187,7 +185,7 @@
                        flushInfo->fIndexBuffer, flushInfo->fVertexOffset,
                        kVerticesPerGlyph, kIndicesPerGlyph, flushInfo->fGlyphsToFlush,
                        maxGlyphsPerDraw);
-    target->draw(mesh);
+    target->draw(flushInfo->fGeometryProcessor, mesh);
     flushInfo->fVertexOffset += kVerticesPerGlyph * flushInfo->fGlyphsToFlush;
     flushInfo->fGlyphsToFlush = 0;
 }
@@ -314,5 +312,4 @@
 
 void GrBlobRegenHelper::flush() {
     fBatch->flush(fTarget, fFlushInfo);
-    fTarget->initDraw(fGP);
 }
diff --git a/src/gpu/batches/GrAtlasTextBatch.h b/src/gpu/batches/GrAtlasTextBatch.h
index 82d3115..e883fa1 100644
--- a/src/gpu/batches/GrAtlasTextBatch.h
+++ b/src/gpu/batches/GrAtlasTextBatch.h
@@ -99,10 +99,11 @@
     void initBatchTracker(const GrXPOverridesForBatch& overrides) override;
 
     struct FlushInfo {
-        SkAutoTUnref<const GrBuffer> fVertexBuffer;
-        SkAutoTUnref<const GrBuffer> fIndexBuffer;
-        int fGlyphsToFlush;
-        int fVertexOffset;
+        SkAutoTUnref<const GrBuffer>            fVertexBuffer;
+        SkAutoTUnref<const GrBuffer>            fIndexBuffer;
+        SkAutoTUnref<const GrGeometryProcessor> fGeometryProcessor;
+        int                                     fGlyphsToFlush;
+        int                                     fVertexOffset;
     };
 
     void onPrepareDraws(Target* target) const override;
@@ -195,12 +196,10 @@
 public:
     GrBlobRegenHelper(const GrAtlasTextBatch* batch,
                       GrVertexBatch::Target* target,
-                      GrAtlasTextBatch::FlushInfo* flushInfo,
-                      const GrGeometryProcessor* gp)
+                      GrAtlasTextBatch::FlushInfo* flushInfo)
         : fBatch(batch)
         , fTarget(target)
-        , fFlushInfo(flushInfo)
-        , fGP(gp) {}
+        , fFlushInfo(flushInfo) {}
 
     void flush();
 
@@ -212,7 +211,6 @@
     const GrAtlasTextBatch* fBatch;
     GrVertexBatch::Target* fTarget;
     GrAtlasTextBatch::FlushInfo* fFlushInfo;
-    const GrGeometryProcessor* fGP;
 };
 
 #endif
diff --git a/src/gpu/batches/GrDefaultPathRenderer.cpp b/src/gpu/batches/GrDefaultPathRenderer.cpp
index cda522e..94508b1 100644
--- a/src/gpu/batches/GrDefaultPathRenderer.cpp
+++ b/src/gpu/batches/GrDefaultPathRenderer.cpp
@@ -269,8 +269,6 @@
         size_t vertexStride = gp->getVertexStride();
         SkASSERT(vertexStride == sizeof(SkPoint));
 
-        target->initDraw(gp);
-
         int instanceCount = fGeoData.count();
 
         // compute number of vertices
@@ -369,7 +367,7 @@
         } else {
             mesh.init(primitiveType, vertexBuffer, firstVertex, vertexOffset);
         }
-        target->draw(mesh);
+        target->draw(gp, mesh);
 
         // put back reserves
         target->putBackIndices((size_t)(maxIndices - indexOffset));
diff --git a/src/gpu/batches/GrDrawAtlasBatch.cpp b/src/gpu/batches/GrDrawAtlasBatch.cpp
index 3884ef2..58ce137 100644
--- a/src/gpu/batches/GrDrawAtlasBatch.cpp
+++ b/src/gpu/batches/GrDrawAtlasBatch.cpp
@@ -58,8 +58,6 @@
                                                                      this->viewMatrix(),
                                                                      this->coverageIgnored()));
 
-    target->initDraw(gp);
-
     int instanceCount = fGeoData.count();
     size_t vertexStride = gp->getVertexStride();
     SkASSERT(vertexStride == sizeof(SkPoint) + sizeof(SkPoint)
@@ -81,7 +79,7 @@
         memcpy(vertPtr, args.fVerts.begin(), allocSize);
         vertPtr += allocSize;
     }
-    helper.recordDraw(target);
+    helper.recordDraw(target, gp);
 }
 
 GrDrawAtlasBatch::GrDrawAtlasBatch(const Geometry& geometry, const SkMatrix& viewMatrix,
diff --git a/src/gpu/batches/GrDrawBatch.h b/src/gpu/batches/GrDrawBatch.h
index d1083b3..bf93cf5 100644
--- a/src/gpu/batches/GrDrawBatch.h
+++ b/src/gpu/batches/GrDrawBatch.h
@@ -8,6 +8,7 @@
 #ifndef GrDrawBatch_DEFINED
 #define GrDrawBatch_DEFINED
 
+#include <functional>
 #include "GrBatch.h"
 #include "GrPipeline.h"
 
@@ -19,18 +20,25 @@
  * to sequence the uploads relative to each other and to draws.
  **/
 
-typedef uint64_t GrBatchToken;
-
-class GrBatchUploader : public SkRefCnt {
+class GrBatchDrawToken {
 public:
-    class TextureUploader;
+    static GrBatchDrawToken AlreadyFlushedToken() { return GrBatchDrawToken(0); }
 
-    GrBatchUploader(GrBatchToken lastUploadToken) : fLastUploadToken(lastUploadToken) {}
-    GrBatchToken lastUploadToken() const { return fLastUploadToken; }
-    virtual void upload(TextureUploader*)=0;
+    GrBatchDrawToken(const GrBatchDrawToken& that) : fSequenceNumber(that.fSequenceNumber) {}
+    GrBatchDrawToken& operator =(const GrBatchDrawToken& that) {
+        fSequenceNumber = that.fSequenceNumber;
+        return *this;
+    }
+    bool operator==(const GrBatchDrawToken& that) const {
+        return fSequenceNumber == that.fSequenceNumber;
+    }
+    bool operator!=(const GrBatchDrawToken& that) const { return !(*this == that); }
 
 private:
-    GrBatchToken fLastUploadToken;
+    GrBatchDrawToken();
+    explicit GrBatchDrawToken(uint64_t sequenceNumber) : fSequenceNumber(sequenceNumber) {}
+    friend class GrBatchFlushState;
+    uint64_t fSequenceNumber;
 };
 
 /**
@@ -38,6 +46,14 @@
  */
 class GrDrawBatch : public GrBatch {
 public:
+    /** Method that performs an upload on behalf of a DeferredUploadFn. */
+    using WritePixelsFn = std::function<bool(GrSurface* texture,
+                                             int left, int top, int width, int height,
+                                             GrPixelConfig config, const void* buffer,
+                                             size_t rowBytes)>;
+    /** See comments before GrDrawBatch::Target definition on how deferred uploaders work. */
+    using DeferredUploadFn = std::function<void(WritePixelsFn&)>;
+
     class Target;
 
     GrDrawBatch(uint32_t classID);
@@ -100,7 +116,14 @@
     virtual void initBatchTracker(const GrXPOverridesForBatch&) = 0;
 
 protected:
-    SkTArray<SkAutoTUnref<GrBatchUploader>, true>   fInlineUploads;
+    struct QueuedUpload {
+        QueuedUpload(DeferredUploadFn&& upload, GrBatchDrawToken token)
+            : fUpload(std::move(upload))
+            , fUploadBeforeToken(token) {}
+        DeferredUploadFn    fUpload;
+        GrBatchDrawToken    fUploadBeforeToken;
+    };
+    SkTArray<QueuedUpload>   fInlineUploads;
 
 private:
     SkAlignedSTStorage<1, GrPipeline>               fPipelineStorage;
diff --git a/src/gpu/batches/GrDrawVerticesBatch.cpp b/src/gpu/batches/GrDrawVerticesBatch.cpp
index d3c9f5e..12ea05a 100644
--- a/src/gpu/batches/GrDrawVerticesBatch.cpp
+++ b/src/gpu/batches/GrDrawVerticesBatch.cpp
@@ -97,8 +97,6 @@
     SkAutoTUnref<const GrGeometryProcessor> gp(
         set_vertex_attributes(hasLocalCoords, &colorOffset, &texOffset, fViewMatrix,
                               fCoverageIgnored));
-    target->initDraw(gp);
-
     size_t vertexStride = gp->getVertexStride();
 
     SkASSERT(vertexStride == sizeof(SkPoint) + (hasLocalCoords ? sizeof(SkPoint) : 0)
@@ -164,7 +162,7 @@
     } else {
         mesh.init(this->primitiveType(), vertexBuffer, firstVertex, fVertexCount);
     }
-    target->draw(mesh);
+    target->draw(gp, mesh);
 }
 
 bool GrDrawVerticesBatch::onCombineIfPossible(GrBatch* t, const GrCaps& caps) {
diff --git a/src/gpu/batches/GrNinePatch.cpp b/src/gpu/batches/GrNinePatch.cpp
index 45c33d7..24e3dd6 100644
--- a/src/gpu/batches/GrNinePatch.cpp
+++ b/src/gpu/batches/GrNinePatch.cpp
@@ -91,8 +91,6 @@
             return;
         }
 
-        target->initDraw(gp);
-
         size_t vertexStride = gp->getVertexStride();
         int instanceCount = fGeoData.count();
 
@@ -138,7 +136,7 @@
                 verts += kVertsPerRect * vertexStride;
             }
         }
-        helper.recordDraw(target);
+        helper.recordDraw(target, gp);
     }
 
     void initBatchTracker(const GrXPOverridesForBatch& overrides) override {
diff --git a/src/gpu/batches/GrNonAAStrokeRectBatch.cpp b/src/gpu/batches/GrNonAAStrokeRectBatch.cpp
index 8c32c83..36092ea 100644
--- a/src/gpu/batches/GrNonAAStrokeRectBatch.cpp
+++ b/src/gpu/batches/GrNonAAStrokeRectBatch.cpp
@@ -117,8 +117,6 @@
                                                      this->viewMatrix()));
         }
 
-        target->initDraw(gp);
-
         size_t vertexStride = gp->getVertexStride();
 
         SkASSERT(vertexStride == sizeof(GrDefaultGeoProcFactory::PositionAttr));
@@ -159,7 +157,7 @@
 
         GrMesh mesh;
         mesh.init(primType, vertexBuffer, firstVertex, vertexCount);
-        target->draw(mesh);
+        target->draw(gp, mesh);
     }
 
     void initBatchTracker(const GrXPOverridesForBatch& overrides) override {
diff --git a/src/gpu/batches/GrPLSPathRenderer.cpp b/src/gpu/batches/GrPLSPathRenderer.cpp
index ee0316a..46993c7 100644
--- a/src/gpu/batches/GrPLSPathRenderer.cpp
+++ b/src/gpu/batches/GrPLSPathRenderer.cpp
@@ -887,8 +887,7 @@
                 }
                 mesh.init(kTriangles_GrPrimitiveType, triVertexBuffer, firstTriVertex,
                           triVertices.count());
-                target->initDraw(triangleProcessor);
-                target->draw(mesh);
+                target->draw(triangleProcessor, mesh);
             }
 
             if (quadVertices.count()) {
@@ -906,8 +905,7 @@
                 }
                 mesh.init(kTriangles_GrPrimitiveType, quadVertexBuffer, firstQuadVertex,
                           quadVertices.count());
-                target->initDraw(quadProcessor);
-                target->draw(mesh);
+                target->draw(quadProcessor, mesh);
             }
 
             SkAutoTUnref<GrGeometryProcessor> finishProcessor(
@@ -935,8 +933,7 @@
 
             mesh.init(kTriangles_GrPrimitiveType, rectVertexBuffer, firstRectVertex,
                       kRectVertexCount);
-            target->initDraw(finishProcessor);
-            target->draw(mesh);
+            target->draw(finishProcessor, mesh);
         }
     }
 
diff --git a/src/gpu/batches/GrTInstanceBatch.h b/src/gpu/batches/GrTInstanceBatch.h
index 726903e..b85b3aa 100644
--- a/src/gpu/batches/GrTInstanceBatch.h
+++ b/src/gpu/batches/GrTInstanceBatch.h
@@ -96,8 +96,6 @@
             return;
         }
 
-        target->initDraw(gp);
-
         size_t vertexStride = gp->getVertexStride();
         int instanceCount = fGeoData.count();
 
@@ -117,7 +115,7 @@
                              i * Impl::kVertsPerInstance * vertexStride;
             Impl::Tesselate(verts, vertexStride, fGeoData[i], fOverrides);
         }
-        helper.recordDraw(target);
+        helper.recordDraw(target, gp);
     }
 
     const Geometry& seedGeometry() const { return fGeoData[0]; }
diff --git a/src/gpu/batches/GrTessellatingPathRenderer.cpp b/src/gpu/batches/GrTessellatingPathRenderer.cpp
index 8185a85..1c17134 100644
--- a/src/gpu/batches/GrTessellatingPathRenderer.cpp
+++ b/src/gpu/batches/GrTessellatingPathRenderer.cpp
@@ -231,11 +231,9 @@
 
         GrPrimitiveType primitiveType = TESSELLATOR_WIREFRAME ? kLines_GrPrimitiveType
                                                               : kTriangles_GrPrimitiveType;
-        target->initDraw(gp);
-
         GrMesh mesh;
         mesh.init(primitiveType, vb, firstVertex, count);
-        target->draw(mesh);
+        target->draw(gp, mesh);
     }
 
     bool onCombineIfPossible(GrBatch*, const GrCaps&) override { return false; }
diff --git a/src/gpu/batches/GrTestBatch.h b/src/gpu/batches/GrTestBatch.h
index 5da65f8..273baae 100644
--- a/src/gpu/batches/GrTestBatch.h
+++ b/src/gpu/batches/GrTestBatch.h
@@ -14,73 +14,52 @@
 #include "batches/GrVertexBatch.h"
 
 /*
- * A simple batch only for testing purposes which actually doesn't batch at all, but can fit into
- * the batch pipeline and generate arbitrary geometry
+ * A simple solid color batch only for testing purposes which actually doesn't batch at all. It
+ * saves having to fill out some boiler plate methods.
  */
 class GrTestBatch : public GrVertexBatch {
 public:
-    struct Geometry {
-        GrColor fColor;
-    };
-
     virtual const char* name() const override = 0;
 
     void computePipelineOptimizations(GrInitInvariantOutput* color,
                                       GrInitInvariantOutput* coverage,
                                       GrBatchToXPOverrides* overrides) const override {
         // When this is called on a batch, there is only one geometry bundle
-        color->setKnownFourComponents(this->geoData(0)->fColor);
+        color->setKnownFourComponents(fColor);
         coverage->setUnknownSingleComponent();
     }
 
     void initBatchTracker(const GrXPOverridesForBatch& overrides) override {
-        // Handle any color overrides
-        if (!overrides.readsColor()) {
-            this->geoData(0)->fColor = GrColor_ILLEGAL;
-        }
-        overrides.getOverrideColorIfSet(&this->geoData(0)->fColor);
+        overrides.getOverrideColorIfSet(&fColor);
 
-        // setup batch properties
-        fBatch.fColorIgnored = !overrides.readsColor();
-        fBatch.fColor = this->geoData(0)->fColor;
-        fBatch.fUsesLocalCoords = overrides.readsLocalCoords();
-        fBatch.fCoverageIgnored = !overrides.readsCoverage();
+        fOptimizations.fColorIgnored = !overrides.readsColor();
+        fOptimizations.fUsesLocalCoords = overrides.readsLocalCoords();
+        fOptimizations.fCoverageIgnored = !overrides.readsCoverage();
     }
 
 protected:
-    GrTestBatch(uint32_t classID, const GrGeometryProcessor* gp, const SkRect& bounds)
-        : INHERITED(classID) {
-        fGeometryProcessor.reset(SkRef(gp));
-
+    GrTestBatch(uint32_t classID, const SkRect& bounds, GrColor color)
+        : INHERITED(classID)
+        , fColor(color) {
         this->setBounds(bounds);
     }
 
-    const GrGeometryProcessor* geometryProcessor() const { return fGeometryProcessor; }
+    struct Optimizations {
+        bool fColorIgnored = false;
+        bool fUsesLocalCoords = false;
+        bool fCoverageIgnored = false;
+    };
+
+    GrColor color() const { return fColor; }
+    const Optimizations optimizations() const { return fOptimizations; }
 
 private:
-    void onPrepareDraws(Target* target) const override {
-        target->initDraw(fGeometryProcessor);
-        this->generateGeometry(target);
-    }
-
-    virtual Geometry* geoData(int index) = 0;
-    virtual const Geometry* geoData(int index) const = 0;
-
     bool onCombineIfPossible(GrBatch* t, const GrCaps&) override {
         return false;
     }
 
-    virtual void generateGeometry(Target*) const = 0;
-
-    struct BatchTracker {
-        GrColor fColor;
-        bool fUsesLocalCoords;
-        bool fColorIgnored;
-        bool fCoverageIgnored;
-    };
-
-    SkAutoTUnref<const GrGeometryProcessor> fGeometryProcessor;
-    BatchTracker fBatch;
+    GrColor       fColor;
+    Optimizations fOptimizations;
 
     typedef GrVertexBatch INHERITED;
 };
diff --git a/src/gpu/batches/GrVertexBatch.cpp b/src/gpu/batches/GrVertexBatch.cpp
index fc7a1e4..8a5dd62 100644
--- a/src/gpu/batches/GrVertexBatch.cpp
+++ b/src/gpu/batches/GrVertexBatch.cpp
@@ -9,7 +9,10 @@
 #include "GrBatchFlushState.h"
 #include "GrResourceProvider.h"
 
-GrVertexBatch::GrVertexBatch(uint32_t classID) : INHERITED(classID) {}
+GrVertexBatch::GrVertexBatch(uint32_t classID)
+    : INHERITED(classID)
+    , fBaseDrawToken(GrBatchDrawToken::AlreadyFlushedToken()) {
+}
 
 void GrVertexBatch::onPrepare(GrBatchFlushState* state) {
     Target target(state, this);
@@ -42,9 +45,9 @@
     return vertices;
 }
 
-void GrVertexBatch::InstancedHelper::recordDraw(Target* target) {
+void GrVertexBatch::InstancedHelper::recordDraw(Target* target, const GrGeometryProcessor* gp) {
     SkASSERT(fMesh.instanceCount());
-    target->draw(fMesh);
+    target->draw(gp, fMesh);
 }
 
 void* GrVertexBatch::QuadHelper::init(Target* target, size_t vertexStride,
@@ -60,22 +63,50 @@
 }
 
 void GrVertexBatch::onDraw(GrBatchFlushState* state) {
-    int uploadCnt = fInlineUploads.count();
-    int currUpload = 0;
+    int currUploadIdx = 0;
+    int currMeshIdx = 0;
 
-    // Iterate of all the drawArrays. Before issuing the draws in each array, perform any inline
-    // uploads.
-    for (DrawArrayList::Iter da(fDrawArrays); da.get(); da.next()) {
-        state->advanceLastFlushedToken();
-        while (currUpload < uploadCnt &&
-               fInlineUploads[currUpload]->lastUploadToken() <= state->lastFlushedToken()) {
-            fInlineUploads[currUpload++]->upload(state->uploader());
+    SkASSERT(fQueuedDraws.empty() || fBaseDrawToken == state->nextTokenToFlush());
+
+    for (int currDrawIdx = 0; currDrawIdx < fQueuedDraws.count(); ++currDrawIdx) {
+        GrBatchDrawToken drawToken = state->nextTokenToFlush();
+        while (currUploadIdx < fInlineUploads.count() &&
+               fInlineUploads[currUploadIdx].fUploadBeforeToken == drawToken) {
+            state->doUpload(fInlineUploads[currUploadIdx++].fUpload);
         }
-        const GrVertexBatch::DrawArray& drawArray = *da.get();
+        const QueuedDraw &draw = fQueuedDraws[currDrawIdx];
+        state->gpu()->draw(*this->pipeline(), *draw.fGeometryProcessor.get(),
+                           fMeshes.begin() + currMeshIdx, draw.fMeshCnt);
+        currMeshIdx += draw.fMeshCnt;
+        state->flushToken();
+    }
+    SkASSERT(currUploadIdx == fInlineUploads.count());
+    SkASSERT(currMeshIdx == fMeshes.count());
+    fQueuedDraws.reset();
+    fInlineUploads.reset();
+}
 
-        state->gpu()->draw(*this->pipeline(),
-                           *drawArray.fPrimitiveProcessor.get(),
-                           drawArray.fDraws.begin(),
-                           drawArray.fDraws.count());
+//////////////////////////////////////////////////////////////////////////////
+
+void GrVertexBatch::Target::draw(const GrGeometryProcessor* gp, const GrMesh& mesh) {
+    GrVertexBatch* batch = this->vertexBatch();
+    batch->fMeshes.push_back(mesh);
+    if (!batch->fQueuedDraws.empty()) {
+        // If the last draw shares a geometry processor and there are no intervening uploads,
+        // add this mesh to it.
+        GrVertexBatch::QueuedDraw& lastDraw = this->vertexBatch()->fQueuedDraws.back();
+        if (lastDraw.fGeometryProcessor == gp &&
+            (batch->fInlineUploads.empty() ||
+             batch->fInlineUploads.back().fUploadBeforeToken != this->nextDrawToken())) {
+            ++lastDraw.fMeshCnt;
+            return;
+        }
+    }
+    GrVertexBatch::QueuedDraw& draw = this->vertexBatch()->fQueuedDraws.push_back();
+    GrBatchDrawToken token = this->state()->issueDrawToken();
+    draw.fGeometryProcessor.reset(gp);
+    draw.fMeshCnt = 1;
+    if (batch->fQueuedDraws.count() == 1) {
+        batch->fBaseDrawToken = token;
     }
 }
diff --git a/src/gpu/batches/GrVertexBatch.h b/src/gpu/batches/GrVertexBatch.h
index 2af4dd1..19475a7 100644
--- a/src/gpu/batches/GrVertexBatch.h
+++ b/src/gpu/batches/GrVertexBatch.h
@@ -9,8 +9,8 @@
 #define GrVertexBatch_DEFINED
 
 #include "GrDrawBatch.h"
+#include "GrGeometryProcessor.h"
 #include "GrMesh.h"
-#include "GrPrimitiveProcessor.h"
 #include "GrPendingProgramElement.h"
 
 #include "SkTLList.h"
@@ -32,16 +32,16 @@
    class InstancedHelper {
    public:
         InstancedHelper() {}
-        /** Returns the allocated storage for the vertices. The caller should populate the before
-            vertices before calling issueDraws(). */
+        /** Returns the allocated storage for the vertices. The caller should populate the vertices
+            before calling recordDraws(). */
         void* init(Target*, GrPrimitiveType, size_t vertexStride,
                    const GrBuffer*, int verticesPerInstance, int indicesPerInstance,
                    int instancesToDraw);
 
         /** Call after init() to issue draws to the batch target.*/
-        void recordDraw(Target* target);
+        void recordDraw(Target*, const GrGeometryProcessor*);
     private:
-        GrMesh  fMesh;
+        GrMesh fMesh;
     };
 
     static const int kVerticesPerQuad = 4;
@@ -52,9 +52,9 @@
     public:
         QuadHelper() : INHERITED() {}
         /** Finds the cached quad index buffer and reserves vertex space. Returns nullptr on failure
-            and on sucess a pointer to the vertex data that the caller should populate before
-            calling issueDraws(). */
-        void* init(Target* batchTarget, size_t vertexStride, int quadsToDraw);
+            and on success a pointer to the vertex data that the caller should populate before
+            calling recordDraws(). */
+        void* init(Target*, size_t vertexStride, int quadsToDraw);
 
         using InstancedHelper::recordDraw;
     private:
@@ -67,18 +67,23 @@
 
     virtual void onPrepareDraws(Target*) const = 0;
 
-    // A set of contiguous draws with no inline uploads between them that all use the same
-    // primitive processor. All the draws in a DrawArray share a primitive processor and use the
-    // the batch's GrPipeline.
-    struct DrawArray {
-        SkSTArray<1, GrMesh, true>                          fDraws;
-        GrPendingProgramElement<const GrPrimitiveProcessor> fPrimitiveProcessor;
+    // A set of contiguous draws that share a draw token and primitive processor. The draws all use
+    // the batch's pipeline. The meshes for the draw are stored in the fMeshes array and each
+    // Queued draw uses fMeshCnt meshes from the fMeshes array. The reason for coallescing meshes
+    // that share a primitive processor into a QueuedDraw is that it allows the Gpu object to setup
+    // the shared state once and then issue draws for each mesh.
+    struct QueuedDraw {
+        int fMeshCnt = 0;
+        GrPendingProgramElement<const GrGeometryProcessor> fGeometryProcessor;
     };
 
-    // Array of DrawArray. There may be inline uploads between each DrawArray and each DrawArray
-    // may use a different primitive processor.
-    typedef SkTLList<DrawArray, 4> DrawArrayList;
-    DrawArrayList fDrawArrays;
+    // All draws in all the vertex batches have implicit tokens based on the order they are
+    // enqueued globally across all batches. This is the offset of the first entry in fQueuedDraws.
+    // fQueuedDraws[i]'s token is fBaseDrawToken + i.
+    GrBatchDrawToken fBaseDrawToken;
+
+    SkSTArray<4, GrMesh>           fMeshes;
+    SkSTArray<4, QueuedDraw, true> fQueuedDraws;
 
     typedef GrDrawBatch INHERITED;
 };
diff --git a/src/gpu/effects/GrDashingEffect.cpp b/src/gpu/effects/GrDashingEffect.cpp
index 344b93f..f1e8016 100644
--- a/src/gpu/effects/GrDashingEffect.cpp
+++ b/src/gpu/effects/GrDashingEffect.cpp
@@ -358,8 +358,6 @@
             return;
         }
 
-        target->initDraw(gp);
-
         // useAA here means Edge AA or MSAA
         bool useAA = this->aaMode() != kBW_DashAAMode;
         bool fullDash = this->fullDash();
@@ -627,7 +625,7 @@
             rectIndex++;
         }
         SkASSERT(0 == (curVIdx % 4) && (curVIdx / 4) == totalRectCount);
-        helper.recordDraw(target);
+        helper.recordDraw(target, gp);
     }
 
     bool onCombineIfPossible(GrBatch* t, const GrCaps& caps) override {
diff --git a/src/gpu/text/GrAtlasTextBlob_regenInBatch.cpp b/src/gpu/text/GrAtlasTextBlob_regenInBatch.cpp
index c2f3f22..7283e6d 100644
--- a/src/gpu/text/GrAtlasTextBlob_regenInBatch.cpp
+++ b/src/gpu/text/GrAtlasTextBlob_regenInBatch.cpp
@@ -206,7 +206,7 @@
                 SkASSERT(success);
             }
             fontCache->addGlyphToBulkAndSetUseToken(info->bulkUseToken(), glyph,
-                                                    target->currentToken());
+                                                    target->nextDrawToken());
             log2Width = fontCache->log2Width(info->maskFormat());
             log2Height = fontCache->log2Height(info->maskFormat());
         }
@@ -311,7 +311,7 @@
 
             // set use tokens for all of the glyphs in our subrun.  This is only valid if we
             // have a valid atlas generation
-            fontCache->setUseTokenBulk(*info.bulkUseToken(), target->currentToken(),
+            fontCache->setUseTokenBulk(*info.bulkUseToken(), target->nextDrawToken(),
                                         info.maskFormat());
             break;
     }
diff --git a/src/gpu/text/GrBatchFontCache.h b/src/gpu/text/GrBatchFontCache.h
index 7ff1b9e..8e420cd 100644
--- a/src/gpu/text/GrBatchFontCache.h
+++ b/src/gpu/text/GrBatchFontCache.h
@@ -143,14 +143,14 @@
     // For convenience, this function will also set the use token for the current glyph if required
     // NOTE: the bulk uploader is only valid if the subrun has a valid atlasGeneration
     void addGlyphToBulkAndSetUseToken(GrBatchAtlas::BulkUseTokenUpdater* updater,
-                                      GrGlyph* glyph, GrBatchToken token) {
+                                      GrGlyph* glyph, GrBatchDrawToken token) {
         SkASSERT(glyph);
         updater->add(glyph->fID);
         this->getAtlas(glyph->fMaskFormat)->setLastUseToken(glyph->fID, token);
     }
 
     void setUseTokenBulk(const GrBatchAtlas::BulkUseTokenUpdater& updater,
-                         GrBatchToken token,
+                         GrBatchDrawToken token,
                          GrMaskFormat format) {
         this->getAtlas(format)->setLastUseTokenBulk(updater, token);
     }
diff --git a/tests/PrimitiveProcessorTest.cpp b/tests/PrimitiveProcessorTest.cpp
index 52210f2..a6f2484 100644
--- a/tests/PrimitiveProcessorTest.cpp
+++ b/tests/PrimitiveProcessorTest.cpp
@@ -88,12 +88,11 @@
             SkTArray<SkString> fAttribNames;
         };
         SkAutoTUnref<GrGeometryProcessor> gp(new GP(fNumAttribs));
-        target->initDraw(gp);
         QuadHelper helper;
         size_t vertexStride = gp->getVertexStride();
         SkPoint* vertices = reinterpret_cast<SkPoint*>(helper.init(target, vertexStride, 1));
         vertices->setRectFan(0.f, 0.f, 1.f, 1.f, vertexStride);
-        helper.recordDraw(target);
+        helper.recordDraw(target, gp);
     }
 
     int fNumAttribs;