Refactor GrVkGpuCommandBuffer to handle multiple GrVkCommandBuffers.

This CL is a pre CL to fix some issues where we will need a GpuCB to
internally have multiple commandbuffers that it submits. Because of
this, I need to move the bounds calculations down into the VkGpuCB
since we need to know the bounds for each sub commandbuffer and not
just entire set of commands. In part this is good since GL actually
never needed the calculations so it saves some work there.

BUG=skia:

GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=3142

Change-Id: Ied918765da3630aa6d87e29ccce6c883b96c4ead
Reviewed-on: https://skia-review.googlesource.com/3142
Commit-Queue: Greg Daniel <egdaniel@google.com>
Reviewed-by: Ethan Nicholas <ethannicholas@google.com>
Reviewed-by: Brian Osman <brianosman@google.com>
diff --git a/src/gpu/GrDrawTarget.cpp b/src/gpu/GrDrawTarget.cpp
index 0117d0e..5b2530f 100644
--- a/src/gpu/GrDrawTarget.cpp
+++ b/src/gpu/GrDrawTarget.cpp
@@ -229,7 +229,6 @@
     SkRandom random;
     GrRenderTarget* currentRT = nullptr;
     SkAutoTDelete<GrGpuCommandBuffer> commandBuffer;
-    SkRect bounds = SkRect::MakeEmpty();
     for (int i = 0; i < fRecordedBatches.count(); ++i) {
         if (!fRecordedBatches[i].fBatch) {
             continue;
@@ -237,16 +236,9 @@
         if (fRecordedBatches[i].fBatch->renderTarget() != currentRT) {
             if (commandBuffer) {
                 commandBuffer->end();
-                if (bounds.intersect(0, 0,
-                                     SkIntToScalar(currentRT->width()),
-                                     SkIntToScalar(currentRT->height()))) {
-                    SkIRect iBounds;
-                    bounds.roundOut(&iBounds);
-                    commandBuffer->submit(iBounds);
-                }
+                commandBuffer->submit();
                 commandBuffer.reset();
             }
-            bounds.setEmpty();
             currentRT = fRecordedBatches[i].fBatch->renderTarget();
             if (currentRT) {
                 static const GrGpuCommandBuffer::LoadAndStoreInfo kBasicLoadStoreInfo
@@ -258,9 +250,6 @@
             }
             flushState->setCommandBuffer(commandBuffer);
         }
-        if (commandBuffer) {
-            bounds.join(fRecordedBatches[i].fClippedBounds);
-        }
         if (fDrawBatchBounds) {
             const SkRect& bounds = fRecordedBatches[i].fClippedBounds;
             SkIRect ibounds;
@@ -271,17 +260,11 @@
                 fGpu->drawDebugWireRect(rt, ibounds, 0xFF000000 | random.nextU());
             }
         }
-        fRecordedBatches[i].fBatch->draw(flushState);
+        fRecordedBatches[i].fBatch->draw(flushState, fRecordedBatches[i].fClippedBounds);
     }
     if (commandBuffer) {
         commandBuffer->end();
-        if (bounds.intersect(0, 0,
-                             SkIntToScalar(currentRT->width()),
-                             SkIntToScalar(currentRT->height()))) {
-            SkIRect iBounds;
-            bounds.roundOut(&iBounds);
-            commandBuffer->submit(iBounds);
-        }
+        commandBuffer->submit();
         flushState->setCommandBuffer(nullptr);
     }
 
diff --git a/src/gpu/GrGpuCommandBuffer.cpp b/src/gpu/GrGpuCommandBuffer.cpp
index d30844c..d2a4e6e 100644
--- a/src/gpu/GrGpuCommandBuffer.cpp
+++ b/src/gpu/GrGpuCommandBuffer.cpp
@@ -14,9 +14,9 @@
 #include "GrRenderTarget.h"
 #include "SkRect.h"
 
-void GrGpuCommandBuffer::submit(const SkIRect& bounds) {
+void GrGpuCommandBuffer::submit() {
     this->gpu()->handleDirtyContext();
-    this->onSubmit(bounds);
+    this->onSubmit();
 }
 
 void GrGpuCommandBuffer::clear(const GrFixedClip& clip, GrColor color) {
@@ -38,12 +38,13 @@
 bool GrGpuCommandBuffer::draw(const GrPipeline& pipeline,
                               const GrPrimitiveProcessor& primProc,
                               const GrMesh* mesh,
-                              int meshCount) {
+                              int meshCount,
+                              const SkRect& bounds) {
     if (primProc.numAttribs() > this->gpu()->caps()->maxVertexAttributes()) {
         this->gpu()->stats()->incNumFailedDraws();
         return false;
     }
-    this->onDraw(pipeline, primProc, mesh, meshCount);
+    this->onDraw(pipeline, primProc, mesh, meshCount, bounds);
     return true;
 }
 
diff --git a/src/gpu/GrGpuCommandBuffer.h b/src/gpu/GrGpuCommandBuffer.h
index 0d5220a..841d311 100644
--- a/src/gpu/GrGpuCommandBuffer.h
+++ b/src/gpu/GrGpuCommandBuffer.h
@@ -17,6 +17,7 @@
 class GrPrimitiveProcessor;
 class GrRenderTarget;
 struct SkIRect;
+struct SkRect;
 
 /**
  * The GrGpuCommandBuffer is a series of commands (draws, clears, and discards), which all target
@@ -51,8 +52,7 @@
 
     // Sends the command buffer off to the GPU object to execute the commands built up in the
     // buffer. The gpu object is allowed to defer execution of the commands until it is flushed.
-    // The bounds should represent the bounds of all the draws put into the command buffer.
-    void submit(const SkIRect& bounds);
+    void submit();
 
     // We pass in an array of meshCount GrMesh to the draw. The backend should loop over each
     // GrMesh object and emit a draw for it. Each draw will use the same GrPipeline and
@@ -61,7 +61,8 @@
     bool draw(const GrPipeline&,
               const GrPrimitiveProcessor&,
               const GrMesh*,
-              int meshCount);
+              int meshCount,
+              const SkRect& bounds);
 
     /**
     * Clear the passed in render target. Ignores the draw state and clip.
@@ -69,6 +70,7 @@
     void clear(const GrFixedClip&, GrColor);
 
     void clearStencilClip(const GrFixedClip&, bool insideStencilMask);
+
     /**
     * Discards the contents render target. nullptr indicates that the current render target should
     * be discarded.
@@ -80,13 +82,14 @@
     virtual GrGpu* gpu() = 0;
     virtual GrRenderTarget* renderTarget() = 0;
 
-    virtual void onSubmit(const SkIRect& bounds) = 0;
+    virtual void onSubmit() = 0;
 
     // overridden by backend-specific derived class to perform the draw call.
     virtual void onDraw(const GrPipeline&,
                         const GrPrimitiveProcessor&,
                         const GrMesh*,
-                        int meshCount) = 0;
+                        int meshCount,
+                        const SkRect& bounds) = 0;
 
     // overridden by backend-specific derived class to perform the clear.
     virtual void onClear(const GrFixedClip&, GrColor) = 0;
diff --git a/src/gpu/batches/GrBatch.h b/src/gpu/batches/GrBatch.h
index 8dafe9f..bef01b7 100644
--- a/src/gpu/batches/GrBatch.h
+++ b/src/gpu/batches/GrBatch.h
@@ -125,7 +125,7 @@
     void prepare(GrBatchFlushState* state) { this->onPrepare(state); }
 
     /** Issues the batches commands to GrGpu. */
-    void draw(GrBatchFlushState* state) { this->onDraw(state); }
+    void draw(GrBatchFlushState* state, const SkRect& bounds) { this->onDraw(state, bounds); }
 
     /** Used to block batching across render target changes. Remove this once we store
         GrBatches for different RTs in different targets. */
@@ -191,7 +191,7 @@
     virtual bool onCombineIfPossible(GrBatch*, const GrCaps& caps) = 0;
 
     virtual void onPrepare(GrBatchFlushState*) = 0;
-    virtual void onDraw(GrBatchFlushState*) = 0;
+    virtual void onDraw(GrBatchFlushState*, const SkRect& bounds) = 0;
 
     static uint32_t GenID(int32_t* idCounter) {
         // The atomic inc returns the old value not the incremented value. So we add
diff --git a/src/gpu/batches/GrClearBatch.h b/src/gpu/batches/GrClearBatch.h
index c4bbadc..f11d485 100644
--- a/src/gpu/batches/GrClearBatch.h
+++ b/src/gpu/batches/GrClearBatch.h
@@ -94,7 +94,7 @@
 
     void onPrepare(GrBatchFlushState*) override {}
 
-    void onDraw(GrBatchFlushState* state) override {
+    void onDraw(GrBatchFlushState* state, const SkRect& /*bounds*/) override {
         state->commandBuffer()->clear(fClip, fColor);
     }
 
diff --git a/src/gpu/batches/GrClearStencilClipBatch.h b/src/gpu/batches/GrClearStencilClipBatch.h
index d9d5b2e..42d7c44 100644
--- a/src/gpu/batches/GrClearStencilClipBatch.h
+++ b/src/gpu/batches/GrClearStencilClipBatch.h
@@ -50,7 +50,7 @@
 
     void onPrepare(GrBatchFlushState*) override {}
 
-    void onDraw(GrBatchFlushState* state) override {
+    void onDraw(GrBatchFlushState* state, const SkRect& /*bounds*/) override {
         state->commandBuffer()->clearStencilClip(fClip, fInsideStencilMask);
     }
 
diff --git a/src/gpu/batches/GrCopySurfaceBatch.h b/src/gpu/batches/GrCopySurfaceBatch.h
index fea8aae..c987d0d 100644
--- a/src/gpu/batches/GrCopySurfaceBatch.h
+++ b/src/gpu/batches/GrCopySurfaceBatch.h
@@ -66,7 +66,7 @@
 
     void onPrepare(GrBatchFlushState*) override {}
 
-    void onDraw(GrBatchFlushState* state) override {
+    void onDraw(GrBatchFlushState* state, const SkRect& /*bounds*/) override {
         if (!state->commandBuffer()) {
             state->gpu()->copySurface(fDst.get(), fSrc.get(), fSrcRect, fDstPoint);
         } else {
diff --git a/src/gpu/batches/GrDiscardBatch.h b/src/gpu/batches/GrDiscardBatch.h
index c9aa7d6..8a3be0e 100644
--- a/src/gpu/batches/GrDiscardBatch.h
+++ b/src/gpu/batches/GrDiscardBatch.h
@@ -43,7 +43,7 @@
 
     void onPrepare(GrBatchFlushState*) override {}
 
-    void onDraw(GrBatchFlushState* state) override {
+    void onDraw(GrBatchFlushState* state, const SkRect& /*bounds*/) override {
         state->commandBuffer()->discard();
     }
 
diff --git a/src/gpu/batches/GrDrawPathBatch.cpp b/src/gpu/batches/GrDrawPathBatch.cpp
index 815fe74..4080d2f 100644
--- a/src/gpu/batches/GrDrawPathBatch.cpp
+++ b/src/gpu/batches/GrDrawPathBatch.cpp
@@ -26,7 +26,7 @@
     return string;
 }
 
-void GrDrawPathBatch::onDraw(GrBatchFlushState* state) {
+void GrDrawPathBatch::onDraw(GrBatchFlushState* state, const SkRect& bounds) {
     GrProgramDesc  desc;
 
     SkAutoTUnref<GrPathProcessor> pathProc(GrPathProcessor::Create(this->color(),
@@ -116,7 +116,7 @@
     return true;
 }
 
-void GrDrawPathRangeBatch::onDraw(GrBatchFlushState* state) {
+void GrDrawPathRangeBatch::onDraw(GrBatchFlushState* state, const SkRect& bounds) {
     const Draw& head = *fDraws.head();
 
     SkMatrix drawMatrix(this->viewMatrix());
diff --git a/src/gpu/batches/GrDrawPathBatch.h b/src/gpu/batches/GrDrawPathBatch.h
index 33bf678..6adc943 100644
--- a/src/gpu/batches/GrDrawPathBatch.h
+++ b/src/gpu/batches/GrDrawPathBatch.h
@@ -81,7 +81,7 @@
 
     bool onCombineIfPossible(GrBatch* t, const GrCaps& caps) override { return false; }
 
-    void onDraw(GrBatchFlushState* state) override;
+    void onDraw(GrBatchFlushState* state, const SkRect& bounds) override;
 
     GrPendingIOResource<const GrPath, kRead_GrIOType> fPath;
 
@@ -174,7 +174,7 @@
 
     bool onCombineIfPossible(GrBatch* t, const GrCaps& caps) override;
 
-    void onDraw(GrBatchFlushState* state) override;
+    void onDraw(GrBatchFlushState* state, const SkRect& bounds) override;
 
     struct Draw {
         void set(const InstanceData* instanceData, SkScalar x, SkScalar y) {
diff --git a/src/gpu/batches/GrStencilPathBatch.h b/src/gpu/batches/GrStencilPathBatch.h
index f505a53..73bec19 100644
--- a/src/gpu/batches/GrStencilPathBatch.h
+++ b/src/gpu/batches/GrStencilPathBatch.h
@@ -66,7 +66,7 @@
 
     void onPrepare(GrBatchFlushState*) override {}
 
-    void onDraw(GrBatchFlushState* state) override {
+    void onDraw(GrBatchFlushState* state, const SkRect& bounds) override {
         GrPathRendering::StencilPathArgs args(fUseHWAA, fRenderTarget.get(), &fViewMatrix,
                                               &fScissor, &fStencil);
         state->gpu()->pathRendering()->stencilPath(args, fPath.get());
diff --git a/src/gpu/batches/GrVertexBatch.cpp b/src/gpu/batches/GrVertexBatch.cpp
index af3a186..32413b8 100644
--- a/src/gpu/batches/GrVertexBatch.cpp
+++ b/src/gpu/batches/GrVertexBatch.cpp
@@ -62,7 +62,7 @@
                                  quadIndexBuffer, kVerticesPerQuad, kIndicesPerQuad, quadsToDraw);
 }
 
-void GrVertexBatch::onDraw(GrBatchFlushState* state) {
+void GrVertexBatch::onDraw(GrBatchFlushState* state, const SkRect& bounds) {
     int currUploadIdx = 0;
     int currMeshIdx = 0;
 
@@ -76,7 +76,7 @@
         }
         const QueuedDraw &draw = fQueuedDraws[currDrawIdx];
         state->commandBuffer()->draw(*this->pipeline(), *draw.fGeometryProcessor.get(),
-                                     fMeshes.begin() + currMeshIdx, draw.fMeshCnt);
+                                     fMeshes.begin() + currMeshIdx, draw.fMeshCnt, bounds);
         currMeshIdx += draw.fMeshCnt;
         state->flushToken();
     }
diff --git a/src/gpu/batches/GrVertexBatch.h b/src/gpu/batches/GrVertexBatch.h
index 19475a7..1159e5e 100644
--- a/src/gpu/batches/GrVertexBatch.h
+++ b/src/gpu/batches/GrVertexBatch.h
@@ -63,7 +63,7 @@
 
 private:
     void onPrepare(GrBatchFlushState* state) final;
-    void onDraw(GrBatchFlushState* state) final;
+    void onDraw(GrBatchFlushState* state, const SkRect& bounds) final;
 
     virtual void onPrepareDraws(Target*) const = 0;
 
diff --git a/src/gpu/gl/GrGLGpuCommandBuffer.h b/src/gpu/gl/GrGLGpuCommandBuffer.h
index 0c9ddc2..65ad543 100644
--- a/src/gpu/gl/GrGLGpuCommandBuffer.h
+++ b/src/gpu/gl/GrGLGpuCommandBuffer.h
@@ -33,12 +33,13 @@
     GrGpu* gpu() override { return fGpu; }
     GrRenderTarget* renderTarget() override { return fRenderTarget; }
 
-    void onSubmit(const SkIRect& bounds) override {}
+    void onSubmit() override {}
 
     void onDraw(const GrPipeline& pipeline,
                 const GrPrimitiveProcessor& primProc,
                 const GrMesh* mesh,
-                int meshCount) override {
+                int meshCount,
+                const SkRect& bounds) override {
         fGpu->draw(pipeline, primProc, mesh, meshCount);
     }
 
diff --git a/src/gpu/instanced/InstancedRendering.cpp b/src/gpu/instanced/InstancedRendering.cpp
index 66e53dd..5564c63 100644
--- a/src/gpu/instanced/InstancedRendering.cpp
+++ b/src/gpu/instanced/InstancedRendering.cpp
@@ -462,7 +462,7 @@
     this->onBeginFlush(rp);
 }
 
-void InstancedRendering::Batch::onDraw(GrBatchFlushState* state) {
+void InstancedRendering::Batch::onDraw(GrBatchFlushState* state, const SkRect& bounds) {
     SkASSERT(State::kFlushing == fInstancedRendering->fState);
     SkASSERT(state->gpu() == fInstancedRendering->gpu());
 
diff --git a/src/gpu/instanced/InstancedRendering.h b/src/gpu/instanced/InstancedRendering.h
index b2c360b..77dc07e 100644
--- a/src/gpu/instanced/InstancedRendering.h
+++ b/src/gpu/instanced/InstancedRendering.h
@@ -124,7 +124,7 @@
                                           GrBatchToXPOverrides*) const override;
 
         void onPrepare(GrBatchFlushState*) override {}
-        void onDraw(GrBatchFlushState*) override;
+        void onDraw(GrBatchFlushState*, const SkRect& bounds) override;
 
         InstancedRendering* const         fInstancedRendering;
         BatchInfo                         fInfo;
diff --git a/src/gpu/vk/GrVkGpuCommandBuffer.cpp b/src/gpu/vk/GrVkGpuCommandBuffer.cpp
index f4ac7c8..1a9ea1f 100644
--- a/src/gpu/vk/GrVkGpuCommandBuffer.cpp
+++ b/src/gpu/vk/GrVkGpuCommandBuffer.cpp
@@ -20,6 +20,7 @@
 #include "GrVkRenderTarget.h"
 #include "GrVkResourceProvider.h"
 #include "GrVkTexture.h"
+#include "SkRect.h"
 
 void get_vk_load_store_ops(const GrGpuCommandBuffer::LoadAndStoreInfo& info,
                            VkAttachmentLoadOp* loadOp, VkAttachmentStoreOp* storeOp) {
@@ -68,36 +69,44 @@
     get_vk_load_store_ops(stencilInfo, &vkLoadOp, &vkStoreOp);
     GrVkRenderPass::LoadStoreOps vkStencilOps(vkLoadOp, vkStoreOp);
 
+    CommandBufferInfo& cbInfo = fCommandBufferInfos.push_back();
+    fCurrentCmdBuffer = 0;
+
     const GrVkResourceProvider::CompatibleRPHandle& rpHandle = target->compatibleRenderPassHandle();
     if (rpHandle.isValid()) {
-        fRenderPass = fGpu->resourceProvider().findRenderPass(rpHandle,
-                                                              vkColorOps,
-                                                              vkStencilOps);
+        cbInfo.fRenderPass = fGpu->resourceProvider().findRenderPass(rpHandle,
+                                                                     vkColorOps,
+                                                                     vkStencilOps);
     } else {
-        fRenderPass = fGpu->resourceProvider().findRenderPass(*target,
-                                                              vkColorOps,
-                                                              vkStencilOps);
+        cbInfo.fRenderPass = fGpu->resourceProvider().findRenderPass(*target,
+                                                                     vkColorOps,
+                                                                     vkStencilOps);
     }
 
-    GrColorToRGBAFloat(colorInfo.fClearColor, fColorClearValue.color.float32);
+    GrColorToRGBAFloat(colorInfo.fClearColor, cbInfo.fColorClearValue.color.float32);
 
-    fCommandBuffer = gpu->resourceProvider().findOrCreateSecondaryCommandBuffer();
-    fCommandBuffer->begin(gpu, target->framebuffer(), fRenderPass);
+    cbInfo.fBounds.setEmpty();
+
+    cbInfo.fCommandBuffer = gpu->resourceProvider().findOrCreateSecondaryCommandBuffer();
+    cbInfo.fCommandBuffer->begin(gpu, target->framebuffer(), cbInfo.fRenderPass);
 }
 
 GrVkGpuCommandBuffer::~GrVkGpuCommandBuffer() {
-    fCommandBuffer->unref(fGpu);
-    fRenderPass->unref(fGpu);
+    for (int i = 0; i < fCommandBufferInfos.count(); ++i) {
+        CommandBufferInfo& cbInfo = fCommandBufferInfos[i];
+        cbInfo.fCommandBuffer->unref(fGpu);
+        cbInfo.fRenderPass->unref(fGpu);
+    }
 }
 
 GrGpu* GrVkGpuCommandBuffer::gpu() { return fGpu; }
 GrRenderTarget* GrVkGpuCommandBuffer::renderTarget() { return fRenderTarget; }
 
 void GrVkGpuCommandBuffer::end() {
-    fCommandBuffer->end(fGpu);
+    fCommandBufferInfos[fCurrentCmdBuffer].fCommandBuffer->end(fGpu);
 }
 
-void GrVkGpuCommandBuffer::onSubmit(const SkIRect& bounds) {
+void GrVkGpuCommandBuffer::onSubmit() {
     // TODO: We can't add this optimization yet since many things create a scratch texture which
     // adds the discard immediately, but then don't draw to it right away. This causes the discard
     // to be ignored and we get yelled at for loading uninitialized data. However, once MDP lands,
@@ -133,33 +142,45 @@
                                   false);
     }
 
-    fGpu->submitSecondaryCommandBuffer(fCommandBuffer, fRenderPass, &fColorClearValue,
-                                       fRenderTarget, bounds);
+    for (int i = 0; i < fCommandBufferInfos.count(); ++i) {
+        CommandBufferInfo& cbInfo = fCommandBufferInfos[i];
+
+        if (cbInfo.fBounds.intersect(0, 0,
+                                     SkIntToScalar(fRenderTarget->width()),
+                                     SkIntToScalar(fRenderTarget->height()))) {
+            SkIRect iBounds;
+            cbInfo.fBounds.roundOut(&iBounds);
+
+            fGpu->submitSecondaryCommandBuffer(cbInfo.fCommandBuffer, cbInfo.fRenderPass,
+                                               &cbInfo.fColorClearValue, fRenderTarget, iBounds);
+        }
+    }
 }
 
 void GrVkGpuCommandBuffer::discard() {
     if (fIsEmpty) {
+        CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdBuffer];
         // We will change the render pass to do a clear load instead
         GrVkRenderPass::LoadStoreOps vkColorOps(VK_ATTACHMENT_LOAD_OP_DONT_CARE,
                                                 VK_ATTACHMENT_STORE_OP_STORE);
         GrVkRenderPass::LoadStoreOps vkStencilOps(VK_ATTACHMENT_LOAD_OP_DONT_CARE,
                                                   VK_ATTACHMENT_STORE_OP_STORE);
 
-        const GrVkRenderPass* oldRP = fRenderPass;
+        const GrVkRenderPass* oldRP = cbInfo.fRenderPass;
 
         const GrVkResourceProvider::CompatibleRPHandle& rpHandle =
             fRenderTarget->compatibleRenderPassHandle();
         if (rpHandle.isValid()) {
-            fRenderPass = fGpu->resourceProvider().findRenderPass(rpHandle,
-                                                                  vkColorOps,
-                                                                  vkStencilOps);
+            cbInfo.fRenderPass = fGpu->resourceProvider().findRenderPass(rpHandle,
+                                                                         vkColorOps,
+                                                                         vkStencilOps);
         } else {
-            fRenderPass = fGpu->resourceProvider().findRenderPass(*fRenderTarget,
-                                                                  vkColorOps,
-                                                                  vkStencilOps);
+            cbInfo.fRenderPass = fGpu->resourceProvider().findRenderPass(*fRenderTarget,
+                                                                         vkColorOps,
+                                                                         vkStencilOps);
         }
 
-        SkASSERT(fRenderPass->isCompatible(*oldRP));
+        SkASSERT(cbInfo.fRenderPass->isCompatible(*oldRP));
         oldRP->unref(fGpu);
         fStartsWithClear = false;
     }
@@ -169,6 +190,8 @@
                                               bool insideStencilMask) {
     SkASSERT(!clip.hasWindowRectangles());
 
+    CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdBuffer];
+
     GrStencilAttachment* sb = fRenderTarget->renderTargetPriv().getStencilAttachment();
     // this should only be called internally when we know we have a
     // stencil buffer.
@@ -206,21 +229,30 @@
     clearRect.layerCount = 1;
 
     uint32_t stencilIndex;
-    SkAssertResult(fRenderPass->stencilAttachmentIndex(&stencilIndex));
+    SkAssertResult(cbInfo.fRenderPass->stencilAttachmentIndex(&stencilIndex));
 
     VkClearAttachment attachment;
     attachment.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
     attachment.colorAttachment = 0; // this value shouldn't matter
     attachment.clearValue.depthStencil = vkStencilColor;
 
-    fCommandBuffer->clearAttachments(fGpu, 1, &attachment, 1, &clearRect);
+    cbInfo.fCommandBuffer->clearAttachments(fGpu, 1, &attachment, 1, &clearRect);
     fIsEmpty = false;
+
+    // Update command buffer bounds
+    if (!clip.scissorEnabled()) {
+        cbInfo.fBounds.join(fRenderTarget->getBoundsRect());
+    } else {
+        cbInfo.fBounds.join(SkRect::Make(clip.scissorRect()));
+    }
 }
 
 void GrVkGpuCommandBuffer::onClear(const GrFixedClip& clip, GrColor color) {
     // parent class should never let us get here with no RT
     SkASSERT(!clip.hasWindowRectangles());
 
+    CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdBuffer];
+
     VkClearColorValue vkColor;
     GrColorToRGBAFloat(color, vkColor.float32);
 
@@ -231,25 +263,28 @@
         GrVkRenderPass::LoadStoreOps vkStencilOps(VK_ATTACHMENT_LOAD_OP_LOAD,
                                                   VK_ATTACHMENT_STORE_OP_STORE);
 
-        const GrVkRenderPass* oldRP = fRenderPass;
+        const GrVkRenderPass* oldRP = cbInfo.fRenderPass;
 
         const GrVkResourceProvider::CompatibleRPHandle& rpHandle =
             fRenderTarget->compatibleRenderPassHandle();
         if (rpHandle.isValid()) {
-            fRenderPass = fGpu->resourceProvider().findRenderPass(rpHandle,
-                                                                  vkColorOps,
-                                                                  vkStencilOps);
+            cbInfo.fRenderPass = fGpu->resourceProvider().findRenderPass(rpHandle,
+                                                                         vkColorOps,
+                                                                         vkStencilOps);
         } else {
-            fRenderPass = fGpu->resourceProvider().findRenderPass(*fRenderTarget,
-                                                                  vkColorOps,
-                                                                  vkStencilOps);
+            cbInfo.fRenderPass = fGpu->resourceProvider().findRenderPass(*fRenderTarget,
+                                                                         vkColorOps,
+                                                                         vkStencilOps);
         }
 
-        SkASSERT(fRenderPass->isCompatible(*oldRP));
+        SkASSERT(cbInfo.fRenderPass->isCompatible(*oldRP));
         oldRP->unref(fGpu);
 
-        GrColorToRGBAFloat(color, fColorClearValue.color.float32);
+        GrColorToRGBAFloat(color, cbInfo.fColorClearValue.color.float32);
         fStartsWithClear = true;
+
+        // Update command buffer bounds
+        cbInfo.fBounds.join(fRenderTarget->getBoundsRect());
         return;
     }
 
@@ -272,15 +307,22 @@
     clearRect.layerCount = 1;
 
     uint32_t colorIndex;
-    SkAssertResult(fRenderPass->colorAttachmentIndex(&colorIndex));
+    SkAssertResult(cbInfo.fRenderPass->colorAttachmentIndex(&colorIndex));
 
     VkClearAttachment attachment;
     attachment.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
     attachment.colorAttachment = colorIndex;
     attachment.clearValue.color = vkColor;
 
-    fCommandBuffer->clearAttachments(fGpu, 1, &attachment, 1, &clearRect);
+    cbInfo.fCommandBuffer->clearAttachments(fGpu, 1, &attachment, 1, &clearRect);
     fIsEmpty = false;
+
+    // Update command buffer bounds
+    if (!clip.scissorEnabled()) {
+        cbInfo.fBounds.join(fRenderTarget->getBoundsRect());
+    } else {
+        cbInfo.fBounds.join(SkRect::Make(clip.scissorRect()));
+    }
     return;
 }
 
@@ -288,6 +330,7 @@
 
 void GrVkGpuCommandBuffer::bindGeometry(const GrPrimitiveProcessor& primProc,
                                         const GrNonInstancedMesh& mesh) {
+    CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdBuffer];
     // There is no need to put any memory barriers to make sure host writes have finished here.
     // When a command buffer is submitted to a queue, there is an implicit memory barrier that
     // occurs for all host writes. Additionally, BufferMemoryBarriers are not allowed inside of
@@ -298,7 +341,7 @@
     SkASSERT(vbuf);
     SkASSERT(!vbuf->isMapped());
 
-    fCommandBuffer->bindVertexBuffer(fGpu, vbuf);
+    cbInfo.fCommandBuffer->bindVertexBuffer(fGpu, vbuf);
 
     if (mesh.isIndexed()) {
         SkASSERT(!mesh.indexBuffer()->isCPUBacked());
@@ -306,29 +349,30 @@
         SkASSERT(ibuf);
         SkASSERT(!ibuf->isMapped());
 
-        fCommandBuffer->bindIndexBuffer(fGpu, ibuf);
+        cbInfo.fCommandBuffer->bindIndexBuffer(fGpu, ibuf);
     }
 }
 
 sk_sp<GrVkPipelineState> GrVkGpuCommandBuffer::prepareDrawState(
                                                                const GrPipeline& pipeline,
                                                                const GrPrimitiveProcessor& primProc,
-                                                               GrPrimitiveType primitiveType,
-                                                               const GrVkRenderPass& renderPass) {
+                                                               GrPrimitiveType primitiveType) {
+    CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdBuffer];
+
     sk_sp<GrVkPipelineState> pipelineState =
         fGpu->resourceProvider().findOrCreateCompatiblePipelineState(pipeline,
                                                                      primProc,
                                                                      primitiveType,
-                                                                     renderPass);
+                                                                     *cbInfo.fRenderPass);
     if (!pipelineState) {
         return pipelineState;
     }
 
     pipelineState->setData(fGpu, primProc, pipeline);
 
-    pipelineState->bind(fGpu, fCommandBuffer);
+    pipelineState->bind(fGpu, cbInfo.fCommandBuffer);
 
-    GrVkPipeline::SetDynamicState(fGpu, fCommandBuffer, pipeline);
+    GrVkPipeline::SetDynamicState(fGpu, cbInfo.fCommandBuffer, pipeline);
 
     return pipelineState;
 }
@@ -368,12 +412,13 @@
 void GrVkGpuCommandBuffer::onDraw(const GrPipeline& pipeline,
                                   const GrPrimitiveProcessor& primProc,
                                   const GrMesh* meshes,
-                                  int meshCount) {
+                                  int meshCount,
+                                  const SkRect& bounds) {
     if (!meshCount) {
         return;
     }
-    const GrVkRenderPass* renderPass = fRenderTarget->simpleRenderPass();
-    SkASSERT(renderPass);
+    CommandBufferInfo& cbInfo = fCommandBufferInfos[fCurrentCmdBuffer];
+    SkASSERT(cbInfo.fRenderPass);
 
     prepare_sampled_images(primProc, fGpu);
     GrFragmentProcessor::Iter iter(pipeline);
@@ -385,8 +430,7 @@
     GrPrimitiveType primitiveType = meshes[0].primitiveType();
     sk_sp<GrVkPipelineState> pipelineState = this->prepareDrawState(pipeline,
                                                                     primProc,
-                                                                    primitiveType,
-                                                                    *renderPass);
+                                                                    primitiveType);
     if (!pipelineState) {
         return;
     }
@@ -405,8 +449,7 @@
                 primitiveType = nonIdxMesh->primitiveType();
                 pipelineState = this->prepareDrawState(pipeline,
                                                        primProc,
-                                                       primitiveType,
-                                                       *renderPass);
+                                                       primitiveType);
                 if (!pipelineState) {
                     return;
                 }
@@ -415,18 +458,18 @@
             this->bindGeometry(primProc, *nonIdxMesh);
 
             if (nonIdxMesh->isIndexed()) {
-                fCommandBuffer->drawIndexed(fGpu,
-                                            nonIdxMesh->indexCount(),
+                cbInfo.fCommandBuffer->drawIndexed(fGpu,
+                                                   nonIdxMesh->indexCount(),
+                                                   1,
+                                                   nonIdxMesh->startIndex(),
+                                                   nonIdxMesh->startVertex(),
+                                                   0);
+            } else {
+                cbInfo.fCommandBuffer->draw(fGpu,
+                                            nonIdxMesh->vertexCount(),
                                             1,
-                                            nonIdxMesh->startIndex(),
                                             nonIdxMesh->startVertex(),
                                             0);
-            } else {
-                fCommandBuffer->draw(fGpu,
-                                     nonIdxMesh->vertexCount(),
-                                     1,
-                                     nonIdxMesh->startVertex(),
-                                     0);
             }
             fIsEmpty = false;
 
@@ -434,6 +477,9 @@
         } while ((nonIdxMesh = iter.next()));
     }
 
+    // Update command buffer bounds
+    cbInfo.fBounds.join(bounds);
+
     // Technically we don't have to call this here (since there is a safety check in
     // pipelineState:setData but this will allow for quicker freeing of resources if the
     // pipelineState sits in a cache for a while.
diff --git a/src/gpu/vk/GrVkGpuCommandBuffer.h b/src/gpu/vk/GrVkGpuCommandBuffer.h
index cc73eb4..527a1b2 100644
--- a/src/gpu/vk/GrVkGpuCommandBuffer.h
+++ b/src/gpu/vk/GrVkGpuCommandBuffer.h
@@ -38,30 +38,37 @@
     GrGpu* gpu() override;
     GrRenderTarget* renderTarget() override;
 
-    void onSubmit(const SkIRect& bounds) override;
+    void onSubmit() override;
 
     // Bind vertex and index buffers
     void bindGeometry(const GrPrimitiveProcessor&, const GrNonInstancedMesh&);
 
     sk_sp<GrVkPipelineState> prepareDrawState(const GrPipeline&,
                                               const GrPrimitiveProcessor&,
-                                              GrPrimitiveType,
-                                              const GrVkRenderPass&);
+                                              GrPrimitiveType);
 
     void onDraw(const GrPipeline& pipeline,
                 const GrPrimitiveProcessor& primProc,
                 const GrMesh* mesh,
-                int meshCount) override;
+                int meshCount,
+                const SkRect& bounds) override;
 
     void onClear(const GrFixedClip&, GrColor color) override;
 
     void onClearStencilClip(const GrFixedClip&, bool insideStencilMask) override;
 
-    const GrVkRenderPass*       fRenderPass;
-    GrVkSecondaryCommandBuffer* fCommandBuffer;
+    struct CommandBufferInfo {
+        const GrVkRenderPass*       fRenderPass;
+        GrVkSecondaryCommandBuffer* fCommandBuffer;
+        VkClearValue                fColorClearValue;
+        SkRect                      fBounds;
+    };
+
+    SkTArray<CommandBufferInfo> fCommandBufferInfos;
+    int                         fCurrentCmdBuffer;
+
     GrVkGpu*                    fGpu;
     GrVkRenderTarget*           fRenderTarget;
-    VkClearValue                fColorClearValue;
 
     bool                        fIsEmpty;
     bool                        fStartsWithClear;