Add explicit clear of stencil buffer before opLists that use them

Change-Id: I9e2468e1331c6593dbc6da3ad510f08d1c589e8d
Reviewed-on: https://skia-review.googlesource.com/32041
Commit-Queue: Stan Iliev <stani@google.com>
Reviewed-by: Greg Daniel <egdaniel@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
diff --git a/src/gpu/GrGpu.h b/src/gpu/GrGpu.h
index 1da8d0e..fa5f372 100644
--- a/src/gpu/GrGpu.h
+++ b/src/gpu/GrGpu.h
@@ -371,8 +371,9 @@
     // awkward workaround that goes away after MDB is complete and the render target is known from
     // the GrRenderTargetOpList.
     virtual GrGpuCommandBuffer* createCommandBuffer(
-            const GrGpuCommandBuffer::LoadAndStoreInfo& colorInfo,
-            const GrGpuCommandBuffer::LoadAndStoreInfo& stencilInfo) = 0;
+            GrRenderTarget*, GrSurfaceOrigin,
+            const GrGpuCommandBuffer::LoadAndStoreInfo&,
+            const GrGpuCommandBuffer::StencilLoadAndStoreInfo&) = 0;
 
     // Called by GrDrawingManager when flushing.
     // Provides a hook for post-flush actions (e.g. Vulkan command buffer submits). This will also
@@ -479,7 +480,7 @@
                                                                         int width,
                                                                         int height) = 0;
     // clears target's entire stencil buffer to 0
-    virtual void clearStencil(GrRenderTarget* target) = 0;
+    virtual void clearStencil(GrRenderTarget* target, int clearValue) = 0;
 
     // Determines whether a texture will need to be rescaled in order to be used with the
     // GrSamplerParams. This variation is called when the caller will create a new texture using the
diff --git a/src/gpu/GrGpuCommandBuffer.h b/src/gpu/GrGpuCommandBuffer.h
index a50901a..1be361f 100644
--- a/src/gpu/GrGpuCommandBuffer.h
+++ b/src/gpu/GrGpuCommandBuffer.h
@@ -53,9 +53,17 @@
         GrColor fClearColor;
     };
 
+    // Load-time clears of the stencil buffer are always to 0 so we don't store
+    // an 'fStencilClearValue'
+    struct StencilLoadAndStoreInfo {
+        LoadOp fLoadOp;
+        StoreOp fStoreOp;
+    };
+
     GrGpuCommandBuffer() {}
     virtual ~GrGpuCommandBuffer() {}
 
+    virtual void begin() = 0;
     // Signals the end of recording to the command buffer and that it can now be submitted.
     virtual void end() = 0;
 
diff --git a/src/gpu/GrOpList.h b/src/gpu/GrOpList.h
index 2d80386..00dc344 100644
--- a/src/gpu/GrOpList.h
+++ b/src/gpu/GrOpList.h
@@ -82,6 +82,9 @@
 
     int32_t uniqueID() const { return fUniqueID; }
 
+    void setRequiresStencil() { this->setFlag(kClearStencilBuffer_Flag); }
+    bool requiresStencil() { return this->isSetFlag(kClearStencilBuffer_Flag); }
+
     /*
      * Dump out the GrOpList dependency DAG
      */
@@ -104,6 +107,8 @@
 
         kWasOutput_Flag = 0x02,   //!< Flag for topological sorting
         kTempMark_Flag  = 0x04,   //!< Flag for topological sorting
+
+        kClearStencilBuffer_Flag = 0x08 //!< Clear the SB before executing the ops
     };
 
     void setFlag(uint32_t flag) {
diff --git a/src/gpu/GrRenderTargetContext.cpp b/src/gpu/GrRenderTargetContext.cpp
index dfea8f9..e76b5ff 100644
--- a/src/gpu/GrRenderTargetContext.cpp
+++ b/src/gpu/GrRenderTargetContext.cpp
@@ -1761,6 +1761,8 @@
 
     if (fixedFunctionFlags & GrDrawOp::FixedFunctionFlags::kUsesStencil ||
         appliedClip.hasStencilClip()) {
+        this->getOpList()->setRequiresStencil();
+
         // This forces instantiation of the render target.
         GrRenderTarget* rt = this->accessRenderTarget();
         if (!rt) {
diff --git a/src/gpu/GrRenderTargetOpList.cpp b/src/gpu/GrRenderTargetOpList.cpp
index f0c97e4..ffa1e43 100644
--- a/src/gpu/GrRenderTargetOpList.cpp
+++ b/src/gpu/GrRenderTargetOpList.cpp
@@ -84,16 +84,30 @@
     }
 }
 
-static std::unique_ptr<GrGpuCommandBuffer> create_command_buffer(GrGpu* gpu) {
+static std::unique_ptr<GrGpuCommandBuffer> create_command_buffer(GrGpu* gpu,
+                                                                 GrRenderTarget* rt,
+                                                                 GrSurfaceOrigin origin,
+                                                                 bool clearSB) {
     static const GrGpuCommandBuffer::LoadAndStoreInfo kBasicLoadStoreInfo {
         GrGpuCommandBuffer::LoadOp::kLoad,
         GrGpuCommandBuffer::StoreOp::kStore,
         GrColor_ILLEGAL
     };
 
+    // TODO:
+    // We would like to (at this level) only ever clear & discard. We would need
+    // to stop splitting up higher level opLists for copyOps to achieve that.
+    // Note: we would still need SB loads and stores but they would happen at a
+    // lower level (inside the VK command buffer).
+    const GrGpuCommandBuffer::StencilLoadAndStoreInfo stencilLoadAndStoreInfo {
+        clearSB ? GrGpuCommandBuffer::LoadOp::kClear : GrGpuCommandBuffer::LoadOp::kLoad,
+        GrGpuCommandBuffer::StoreOp::kStore,
+    };
+
     std::unique_ptr<GrGpuCommandBuffer> buffer(
-                            gpu->createCommandBuffer(kBasicLoadStoreInfo,   // Color
-                                                     kBasicLoadStoreInfo)); // Stencil
+                            gpu->createCommandBuffer(rt, origin,
+                                                     kBasicLoadStoreInfo,       // Color
+                                                     stencilLoadAndStoreInfo)); // Stencil
     return buffer;
 }
 
@@ -116,8 +130,13 @@
 
     SkASSERT(fTarget.get()->priv().peekRenderTarget());
 
-    std::unique_ptr<GrGpuCommandBuffer> commandBuffer = create_command_buffer(flushState->gpu());
+    std::unique_ptr<GrGpuCommandBuffer> commandBuffer = create_command_buffer(
+                                                    flushState->gpu(),
+                                                    fTarget.get()->priv().peekRenderTarget(),
+                                                    fTarget.get()->origin(),
+                                                    this->requiresStencil());
     flushState->setCommandBuffer(commandBuffer.get());
+    commandBuffer->begin();
 
     // Draw all the generated geometry.
     for (int i = 0; i < fRecordedOps.count(); ++i) {
@@ -133,8 +152,12 @@
             commandBuffer.reset();
             flushState->setCommandBuffer(commandBuffer.get());
         } else if (!commandBuffer) {
-            commandBuffer = create_command_buffer(flushState->gpu());
+            commandBuffer = create_command_buffer(flushState->gpu(),
+                                                  fTarget.get()->priv().peekRenderTarget(),
+                                                  fTarget.get()->origin(),
+                                                  false);
             flushState->setCommandBuffer(commandBuffer.get());
+            commandBuffer->begin();
         }
 
         GrOpFlushState::DrawOpArgs opArgs {
diff --git a/src/gpu/GrResourceProvider.cpp b/src/gpu/GrResourceProvider.cpp
index 6061b33..1979570 100644
--- a/src/gpu/GrResourceProvider.cpp
+++ b/src/gpu/GrResourceProvider.cpp
@@ -450,7 +450,6 @@
             height = SkNextPow2(height);
         }
 #endif
-        bool newStencil = false;
         GrStencilAttachment::ComputeSharedStencilAttachmentKey(width, height,
                                                                rt->numStencilSamples(), &sbKey);
         GrStencilAttachment* stencil = static_cast<GrStencilAttachment*>(
@@ -460,21 +459,14 @@
             stencil = this->gpu()->createStencilAttachmentForRenderTarget(rt, width, height);
             if (stencil) {
                 this->assignUniqueKeyToResource(sbKey, stencil);
-                newStencil = true;
             }
         }
         if (rt->renderTargetPriv().attachStencilAttachment(stencil)) {
-            if (newStencil) {
-                // Right now we're clearing the stencil attachment here after it is
-                // attached to a RT for the first time. When we start matching
-                // stencil buffers with smaller color targets this will no longer
-                // be correct because it won't be guaranteed to clear the entire
-                // sb.
-                // We used to clear down in the GL subclass using a special purpose
-                // FBO. But iOS doesn't allow a stencil-only FBO. It reports unsupported
-                // FBO status.
-                this->gpu()->clearStencil(rt);
-            }
+#ifdef SK_DEBUG
+            // Fill the SB with an inappropriate value. opLists that use the
+            // SB should clear it properly.
+            this->gpu()->clearStencil(rt, 0xFFFF);
+#endif
         }
     }
     return rt->renderTargetPriv().getStencilAttachment();
diff --git a/src/gpu/gl/GrGLGpu.cpp b/src/gpu/gl/GrGLGpu.cpp
index bfa8ee4..7eb4952 100644
--- a/src/gpu/gl/GrGLGpu.cpp
+++ b/src/gpu/gl/GrGLGpu.cpp
@@ -2004,8 +2004,8 @@
     GL_CALL(Clear(GR_GL_COLOR_BUFFER_BIT));
 }
 
-void GrGLGpu::clearStencil(GrRenderTarget* target) {
-    if (nullptr == target) {
+void GrGLGpu::clearStencil(GrRenderTarget* target, int clearValue) {
+    if (!target) {
         return;
     }
     GrGLRenderTarget* glRT = static_cast<GrGLRenderTarget*>(target);
@@ -2015,7 +2015,7 @@
     this->disableWindowRectangles();
 
     GL_CALL(StencilMask(0xffffffff));
-    GL_CALL(ClearStencil(0));
+    GL_CALL(ClearStencil(clearValue));
     GL_CALL(Clear(GR_GL_STENCIL_BUFFER_BIT));
     fHWStencilSettings.invalidate();
 }
@@ -2448,9 +2448,10 @@
 }
 
 GrGpuCommandBuffer* GrGLGpu::createCommandBuffer(
-        const GrGpuCommandBuffer::LoadAndStoreInfo& colorInfo,
-        const GrGpuCommandBuffer::LoadAndStoreInfo& stencilInfo) {
-    return new GrGLGpuCommandBuffer(this);
+        GrRenderTarget* rt, GrSurfaceOrigin,
+        const GrGpuCommandBuffer::LoadAndStoreInfo&,
+        const GrGpuCommandBuffer::StencilLoadAndStoreInfo& stencilInfo) {
+    return new GrGLGpuCommandBuffer(this, rt, stencilInfo);
 }
 
 void GrGLGpu::flushRenderTarget(GrGLRenderTarget* target, const SkIRect* bounds, bool disableSRGB) {
diff --git a/src/gpu/gl/GrGLGpu.h b/src/gpu/gl/GrGLGpu.h
index 1d362fa..5b6cda7 100644
--- a/src/gpu/gl/GrGLGpu.h
+++ b/src/gpu/gl/GrGLGpu.h
@@ -142,11 +142,12 @@
         return &this->glContext();
     }
 
-    void clearStencil(GrRenderTarget*) override;
+    void clearStencil(GrRenderTarget*, int clearValue) override;
 
     GrGpuCommandBuffer* createCommandBuffer(
-            const GrGpuCommandBuffer::LoadAndStoreInfo& colorInfo,
-            const GrGpuCommandBuffer::LoadAndStoreInfo& stencilInfo) override;
+            GrRenderTarget*, GrSurfaceOrigin,
+            const GrGpuCommandBuffer::LoadAndStoreInfo&,
+            const GrGpuCommandBuffer::StencilLoadAndStoreInfo&) override;
 
     void invalidateBoundRenderTarget() {
         fHWBoundRenderTargetUniqueID.makeInvalid();
diff --git a/src/gpu/gl/GrGLGpuCommandBuffer.h b/src/gpu/gl/GrGLGpuCommandBuffer.h
index 531251b..85dfbe8 100644
--- a/src/gpu/gl/GrGLGpuCommandBuffer.h
+++ b/src/gpu/gl/GrGLGpuCommandBuffer.h
@@ -24,10 +24,20 @@
  * pass through functions to corresponding calls in the GrGLGpu class.
  */
 public:
-    GrGLGpuCommandBuffer(GrGLGpu* gpu) : fGpu(gpu), fRenderTarget(nullptr) {}
+    GrGLGpuCommandBuffer(GrGLGpu* gpu, GrRenderTarget* rt,
+                         const GrGpuCommandBuffer::StencilLoadAndStoreInfo& stencilInfo)
+            : fGpu(gpu)
+            , fRenderTarget(static_cast<GrGLRenderTarget*>(rt)) {
+        fClearSB = LoadOp::kClear == stencilInfo.fLoadOp;
+    }
 
     ~GrGLGpuCommandBuffer() override {}
 
+    void begin() override {
+        if (fClearSB) {
+            fGpu->clearStencil(fRenderTarget, 0x0);
+        }
+    }
     void end() override {}
 
     void discard(GrRenderTargetProxy* proxy) override {
@@ -94,6 +104,7 @@
 
     GrGLGpu*                    fGpu;
     GrGLRenderTarget*           fRenderTarget;
+    bool                        fClearSB;
 
     typedef GrGpuCommandBuffer INHERITED;
 };
diff --git a/src/gpu/mock/GrMockGpu.cpp b/src/gpu/mock/GrMockGpu.cpp
index 614e627..d97c789 100644
--- a/src/gpu/mock/GrMockGpu.cpp
+++ b/src/gpu/mock/GrMockGpu.cpp
@@ -39,8 +39,10 @@
 }
 
 
-GrGpuCommandBuffer* GrMockGpu::createCommandBuffer(const GrGpuCommandBuffer::LoadAndStoreInfo&,
-                                                   const GrGpuCommandBuffer::LoadAndStoreInfo&) {
+GrGpuCommandBuffer* GrMockGpu::createCommandBuffer(
+                                            GrRenderTarget*, GrSurfaceOrigin,
+                                            const GrGpuCommandBuffer::LoadAndStoreInfo&,
+                                            const GrGpuCommandBuffer::StencilLoadAndStoreInfo&) {
     return new GrMockGpuCommandBuffer(this);
 }
 
diff --git a/src/gpu/mock/GrMockGpu.h b/src/gpu/mock/GrMockGpu.h
index f5a187e..6d72399 100644
--- a/src/gpu/mock/GrMockGpu.h
+++ b/src/gpu/mock/GrMockGpu.h
@@ -43,8 +43,10 @@
         *effectiveSampleCnt = rt->numStencilSamples();
     }
 
-    GrGpuCommandBuffer* createCommandBuffer(const GrGpuCommandBuffer::LoadAndStoreInfo&,
-                                            const GrGpuCommandBuffer::LoadAndStoreInfo&) override;
+    GrGpuCommandBuffer* createCommandBuffer(
+                                    GrRenderTarget*, GrSurfaceOrigin,
+                                    const GrGpuCommandBuffer::LoadAndStoreInfo&,
+                                    const GrGpuCommandBuffer::StencilLoadAndStoreInfo&) override;
 
     GrFence SK_WARN_UNUSED_RESULT insertFence() override { return 0; }
     bool waitFence(GrFence, uint64_t) override { return true; }
@@ -129,7 +131,7 @@
     GrStencilAttachment* createStencilAttachmentForRenderTarget(const GrRenderTarget*,
                                                                 int width,
                                                                 int height) override;
-    void clearStencil(GrRenderTarget* target) override  {}
+    void clearStencil(GrRenderTarget*, int clearValue) override  {}
 
     GrBackendObject createTestingOnlyBackendTexture(void* pixels, int w, int h, GrPixelConfig,
                                                     bool isRT) override;
diff --git a/src/gpu/mock/GrMockGpuCommandBuffer.h b/src/gpu/mock/GrMockGpuCommandBuffer.h
index 0213cb9..d2f6baf 100644
--- a/src/gpu/mock/GrMockGpuCommandBuffer.h
+++ b/src/gpu/mock/GrMockGpuCommandBuffer.h
@@ -20,6 +20,7 @@
                       GrRenderTargetProxy*) override {}
     void discard(GrRenderTargetProxy*) override {}
     void insertEventMarker(GrRenderTargetProxy*, const char*) override {}
+    void begin() override {}
     void end() override {}
 
     int numDraws() const { return fNumDraws; }
diff --git a/src/gpu/mtl/GrMtlGpu.h b/src/gpu/mtl/GrMtlGpu.h
index 06c2b2f..e6f2cca 100644
--- a/src/gpu/mtl/GrMtlGpu.h
+++ b/src/gpu/mtl/GrMtlGpu.h
@@ -47,8 +47,10 @@
     void onQueryMultisampleSpecs(GrRenderTarget* rt, const GrStencilSettings&,
                                  int* effectiveSampleCnt, SamplePattern*) override {}
 
-    GrGpuCommandBuffer* createCommandBuffer(const GrGpuCommandBuffer::LoadAndStoreInfo&,
-                                            const GrGpuCommandBuffer::LoadAndStoreInfo&) override {
+    GrGpuCommandBuffer* createCommandBuffer(
+                                    GrRenderTarget*, GrSurfaceOrigin,
+                                    const GrGpuCommandBuffer::LoadAndStoreInfo&,
+                                    const GrGpuCommandBuffer::StencilLoadAndStoreInfo&) override {
         return nullptr;
     }
 
@@ -138,7 +140,7 @@
         return nullptr;
     }
 
-    void clearStencil(GrRenderTarget* target) override  {}
+    void clearStencil(GrRenderTarget* target, int clearValue) override  {}
 
     GrBackendObject createTestingOnlyBackendTexture(void* pixels, int w, int h,
                                                     GrPixelConfig config, bool isRT) override {
diff --git a/src/gpu/vk/GrVkCommandBuffer.cpp b/src/gpu/vk/GrVkCommandBuffer.cpp
index d4fe80c..3713bfe 100644
--- a/src/gpu/vk/GrVkCommandBuffer.cpp
+++ b/src/gpu/vk/GrVkCommandBuffer.cpp
@@ -384,7 +384,7 @@
 
 void GrVkPrimaryCommandBuffer::beginRenderPass(const GrVkGpu* gpu,
                                                const GrVkRenderPass* renderPass,
-                                               const VkClearValue* clearValues,
+                                               const VkClearValue clearValues[],
                                                const GrVkRenderTarget& target,
                                                const SkIRect& bounds,
                                                bool forSecondaryCB) {
@@ -403,7 +403,15 @@
     beginInfo.renderPass = renderPass->vkRenderPass();
     beginInfo.framebuffer = target.framebuffer()->framebuffer();
     beginInfo.renderArea = renderArea;
-    beginInfo.clearValueCount = renderPass->clearValueCount();
+
+    // TODO: have clearValueCount return the index of the last attachment that
+    // requires a clear instead of the number of total clears.
+    uint32_t stencilIndex;
+    if (renderPass->stencilAttachmentIndex(&stencilIndex)) {
+        beginInfo.clearValueCount = renderPass->clearValueCount() ? 2 : 0;
+    } else {
+        beginInfo.clearValueCount = renderPass->clearValueCount();
+    }
     beginInfo.pClearValues = clearValues;
 
     VkSubpassContents contents = forSecondaryCB ? VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS
diff --git a/src/gpu/vk/GrVkCommandBuffer.h b/src/gpu/vk/GrVkCommandBuffer.h
index e602e4d..548efad 100644
--- a/src/gpu/vk/GrVkCommandBuffer.h
+++ b/src/gpu/vk/GrVkCommandBuffer.h
@@ -185,7 +185,7 @@
     // in the render pass.
     void beginRenderPass(const GrVkGpu* gpu,
                          const GrVkRenderPass* renderPass,
-                         const VkClearValue* clearValues,
+                         const VkClearValue clearValues[],
                          const GrVkRenderTarget& target,
                          const SkIRect& bounds,
                          bool forSecondaryCB);
diff --git a/src/gpu/vk/GrVkGpu.cpp b/src/gpu/vk/GrVkGpu.cpp
index 6da40e5..f152582 100644
--- a/src/gpu/vk/GrVkGpu.cpp
+++ b/src/gpu/vk/GrVkGpu.cpp
@@ -258,9 +258,10 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 GrGpuCommandBuffer* GrVkGpu::createCommandBuffer(
+            GrRenderTarget* rt, GrSurfaceOrigin origin,
             const GrGpuCommandBuffer::LoadAndStoreInfo& colorInfo,
-            const GrGpuCommandBuffer::LoadAndStoreInfo& stencilInfo) {
-    return new GrVkGpuCommandBuffer(this, colorInfo, stencilInfo);
+            const GrGpuCommandBuffer::StencilLoadAndStoreInfo& stencilInfo) {
+    return new GrVkGpuCommandBuffer(this, rt, origin, colorInfo, stencilInfo);
 }
 
 void GrVkGpu::submitCommandBuffer(SyncQueue sync) {
@@ -1474,8 +1475,8 @@
     this->submitCommandBuffer(kSkip_SyncQueue);
 }
 
-void GrVkGpu::clearStencil(GrRenderTarget* target) {
-    if (nullptr == target) {
+void GrVkGpu::clearStencil(GrRenderTarget* target, int clearValue) {
+    if (!target) {
         return;
     }
     GrStencilAttachment* stencil = target->renderTargetPriv().getStencilAttachment();
@@ -1483,7 +1484,8 @@
 
 
     VkClearDepthStencilValue vkStencilColor;
-    memset(&vkStencilColor, 0, sizeof(VkClearDepthStencilValue));
+    vkStencilColor.depth = 0.0f;
+    vkStencilColor.stencil = clearValue;
 
     vkStencil->setImageLayout(this,
                               VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
@@ -1971,6 +1973,7 @@
 void GrVkGpu::submitSecondaryCommandBuffer(const SkTArray<GrVkSecondaryCommandBuffer*>& buffers,
                                            const GrVkRenderPass* renderPass,
                                            const VkClearValue* colorClear,
+                                           const VkClearValue* stencilClear,
                                            GrVkRenderTarget* target,
                                            const SkIRect& bounds) {
     const SkIRect* pBounds = &bounds;
@@ -1993,7 +1996,20 @@
         pBounds = &adjustedBounds;
     }
 
-    fCurrentCmdBuffer->beginRenderPass(this, renderPass, colorClear, *target, *pBounds, true);
+#ifdef SK_DEBUG
+    uint32_t index;
+    bool result = renderPass->colorAttachmentIndex(&index);
+    SkASSERT(result && 0 == index);
+    result = renderPass->stencilAttachmentIndex(&index);
+    if (result) {
+        SkASSERT(1 == index);
+    }
+#endif
+    VkClearValue clears[2];
+    clears[0].color = colorClear->color;
+    clears[1].depthStencil = stencilClear->depthStencil;
+
+    fCurrentCmdBuffer->beginRenderPass(this, renderPass, clears, *target, *pBounds, true);
     for (int i = 0; i < buffers.count(); ++i) {
         fCurrentCmdBuffer->executeCommands(this, buffers[i]);
     }
diff --git a/src/gpu/vk/GrVkGpu.h b/src/gpu/vk/GrVkGpu.h
index 2a44785..976c95a 100644
--- a/src/gpu/vk/GrVkGpu.h
+++ b/src/gpu/vk/GrVkGpu.h
@@ -94,11 +94,12 @@
                                                                 int width,
                                                                 int height) override;
 
-    void clearStencil(GrRenderTarget* target) override;
+    void clearStencil(GrRenderTarget* target, int clearValue) override;
 
     GrGpuCommandBuffer* createCommandBuffer(
-            const GrGpuCommandBuffer::LoadAndStoreInfo& colorInfo,
-            const GrGpuCommandBuffer::LoadAndStoreInfo& stencilInfo) override;
+            GrRenderTarget*, GrSurfaceOrigin,
+            const GrGpuCommandBuffer::LoadAndStoreInfo&,
+            const GrGpuCommandBuffer::StencilLoadAndStoreInfo&) override;
 
     void addMemoryBarrier(VkPipelineStageFlags srcStageMask,
                           VkPipelineStageFlags dstStageMask,
@@ -123,7 +124,7 @@
 
     void submitSecondaryCommandBuffer(const SkTArray<GrVkSecondaryCommandBuffer*>&,
                                       const GrVkRenderPass*,
-                                      const VkClearValue*,
+                                      const VkClearValue*, const VkClearValue*,
                                       GrVkRenderTarget*,
                                       const SkIRect& bounds);
 
diff --git a/src/gpu/vk/GrVkGpuCommandBuffer.cpp b/src/gpu/vk/GrVkGpuCommandBuffer.cpp
index 6a01d3f..2528fce 100644
--- a/src/gpu/vk/GrVkGpuCommandBuffer.cpp
+++ b/src/gpu/vk/GrVkGpuCommandBuffer.cpp
@@ -22,9 +22,10 @@
 #include "GrVkTexture.h"
 #include "SkRect.h"
 
-void get_vk_load_store_ops(const GrGpuCommandBuffer::LoadAndStoreInfo& info,
+void get_vk_load_store_ops(GrGpuCommandBuffer::LoadOp loadOpIn,
+                           GrGpuCommandBuffer::StoreOp storeOpIn,
                            VkAttachmentLoadOp* loadOp, VkAttachmentStoreOp* storeOp) {
-    switch (info.fLoadOp) {
+    switch (loadOpIn) {
         case GrGpuCommandBuffer::LoadOp::kLoad:
             *loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
             break;
@@ -39,7 +40,7 @@
             *loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
     }
 
-    switch (info.fStoreOp) {
+    switch (storeOpIn) {
         case GrGpuCommandBuffer::StoreOp::kStore:
             *storeOp = VK_ATTACHMENT_STORE_OP_STORE;
             break;
@@ -53,19 +54,22 @@
 }
 
 GrVkGpuCommandBuffer::GrVkGpuCommandBuffer(GrVkGpu* gpu,
+                                           GrRenderTarget* rt, GrSurfaceOrigin origin,
                                            const LoadAndStoreInfo& colorInfo,
-                                           const LoadAndStoreInfo& stencilInfo)
+                                           const StencilLoadAndStoreInfo& stencilInfo)
     : fGpu(gpu)
     , fRenderTarget(nullptr)
     , fOrigin(kTopLeft_GrSurfaceOrigin)
     , fClearColor(GrColor4f::FromGrColor(colorInfo.fClearColor))
     , fLastPipelineState(nullptr) {
+    get_vk_load_store_ops(colorInfo.fLoadOp, colorInfo.fStoreOp,
+                          &fVkColorLoadOp, &fVkColorStoreOp);
 
-    get_vk_load_store_ops(colorInfo, &fVkColorLoadOp, &fVkColorStoreOp);
-
-    get_vk_load_store_ops(stencilInfo, &fVkStencilLoadOp, &fVkStencilStoreOp);
-
+    get_vk_load_store_ops(stencilInfo.fLoadOp, stencilInfo.fStoreOp,
+                          &fVkStencilLoadOp, &fVkStencilStoreOp);
     fCurrentCmdInfo = -1;
+
+    this->init(static_cast<GrVkRenderTarget*>(rt), origin);
 }
 
 void GrVkGpuCommandBuffer::init(GrVkRenderTarget* target, GrSurfaceOrigin origin) {
@@ -96,6 +100,9 @@
     cbInfo.fColorClearValue.color.float32[2] = fClearColor.fRGBA[2];
     cbInfo.fColorClearValue.color.float32[3] = fClearColor.fRGBA[3];
 
+    cbInfo.fStencilClearValue.depthStencil.depth = 0;
+    cbInfo.fStencilClearValue.depthStencil.stencil = 0;
+
     cbInfo.fBounds.setEmpty();
     cbInfo.fIsEmpty = true;
     cbInfo.fStartsWithClear = false;
@@ -118,6 +125,13 @@
 GrGpu* GrVkGpuCommandBuffer::gpu() { return fGpu; }
 GrRenderTarget* GrVkGpuCommandBuffer::renderTarget() { return fRenderTarget; }
 
+void GrVkGpuCommandBuffer::begin() {
+    // TODO: remove this - see skbug.com/6936
+    if (VK_ATTACHMENT_LOAD_OP_CLEAR == fVkStencilLoadOp) {
+        fGpu->clearStencil(fRenderTarget, 0x0);
+    }
+}
+
 void GrVkGpuCommandBuffer::end() {
     if (fCurrentCmdInfo >= 0) {
         fCommandBufferInfos[fCurrentCmdInfo].currentCmdBuf()->end(fGpu);
@@ -178,7 +192,9 @@
             cbInfo.fBounds.roundOut(&iBounds);
 
             fGpu->submitSecondaryCommandBuffer(cbInfo.fCommandBuffers, cbInfo.fRenderPass,
-                                               &cbInfo.fColorClearValue, fRenderTarget, iBounds);
+                                               &cbInfo.fColorClearValue,
+                                               &cbInfo.fStencilClearValue,
+                                               fRenderTarget, iBounds);
         }
     }
 }
@@ -410,6 +426,7 @@
     // It shouldn't matter what we set the clear color to here since we will assume loading of the
     // attachment.
     memset(&cbInfo.fColorClearValue, 0, sizeof(VkClearValue));
+    memset(&cbInfo.fStencilClearValue, 0, sizeof(VkClearValue));
     cbInfo.fBounds.setEmpty();
     cbInfo.fIsEmpty = true;
     cbInfo.fStartsWithClear = false;
diff --git a/src/gpu/vk/GrVkGpuCommandBuffer.h b/src/gpu/vk/GrVkGpuCommandBuffer.h
index 6031788..cafb015 100644
--- a/src/gpu/vk/GrVkGpuCommandBuffer.h
+++ b/src/gpu/vk/GrVkGpuCommandBuffer.h
@@ -23,12 +23,13 @@
 
 class GrVkGpuCommandBuffer : public GrGpuCommandBuffer, private GrMesh::SendToGpuImpl {
 public:
-    GrVkGpuCommandBuffer(GrVkGpu* gpu,
-                         const LoadAndStoreInfo& colorInfo,
-                         const LoadAndStoreInfo& stencilInfo);
+    GrVkGpuCommandBuffer(GrVkGpu*, GrRenderTarget*, GrSurfaceOrigin,
+                         const LoadAndStoreInfo&,
+                         const StencilLoadAndStoreInfo&);
 
     ~GrVkGpuCommandBuffer() override;
 
+    void begin() override;
     void end() override;
 
     void discard(GrRenderTargetProxy*) override;
@@ -111,6 +112,7 @@
         const GrVkRenderPass*                  fRenderPass;
         SkTArray<GrVkSecondaryCommandBuffer*>  fCommandBuffers;
         VkClearValue                           fColorClearValue;
+        VkClearValue                           fStencilClearValue;
         SkRect                                 fBounds;
         bool                                   fIsEmpty;
         bool                                   fStartsWithClear;