Update min Vulkan version to 1.0.8.0, and fix various bugs

With updating the SDK, the debug layers also showed multiple bugs.
I have fixed those as well in this CL. These include:

1. Incorrectly tracking the allocated descriptor sets from the descriptor pools

2. Using MemoryBarriers inside render passes.

3. Correctly setting the Stencil Image layout anytime we are using a render pass with a stencil attachment

4. Setting the correct aspect mask for Depth/Stencil in a barrier.

TBR=bsalomon@google.com

BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1906623002

Review URL: https://codereview.chromium.org/1906623002
diff --git a/include/gpu/vk/GrVkTypes.h b/include/gpu/vk/GrVkTypes.h
index 833f6d3..ac33bf8 100755
--- a/include/gpu/vk/GrVkTypes.h
+++ b/include/gpu/vk/GrVkTypes.h
@@ -35,6 +35,7 @@
     VkDeviceMemory fAlloc;    // this may be null iff the texture is an RT and uses borrow semantics
     VkImageTiling  fImageTiling;
     VkImageLayout  fImageLayout;
+    VkFormat       fFormat;
 };
 
 GR_STATIC_ASSERT(sizeof(GrBackendObject) >= sizeof(const GrVkTextureInfo*));
diff --git a/src/gpu/vk/GrVkBackendContext.cpp b/src/gpu/vk/GrVkBackendContext.cpp
index d189840..30e7669 100644
--- a/src/gpu/vk/GrVkBackendContext.cpp
+++ b/src/gpu/vk/GrVkBackendContext.cpp
@@ -16,15 +16,14 @@
 #ifdef ENABLE_VK_LAYERS
 const char* kDebugLayerNames[] = {
     // elements of VK_LAYER_LUNARG_standard_validation
-    "VK_LAYER_LUNARG_threading",
-    "VK_LAYER_LUNARG_param_checker",
+    "VK_LAYER_GOOGLE_threading",
+    "VK_LAYER_LUNARG_parameter_validation",
     "VK_LAYER_LUNARG_device_limits",
     "VK_LAYER_LUNARG_object_tracker",
     "VK_LAYER_LUNARG_image",
-    "VK_LAYER_LUNARG_mem_tracker",
-    "VK_LAYER_LUNARG_draw_state",
+    "VK_LAYER_LUNARG_core_validation",
     "VK_LAYER_LUNARG_swapchain",
-    //"VK_LAYER_GOOGLE_unique_objects",
+    "VK_LAYER_GOOGLE_unique_objects",
     // not included in standard_validation
     //"VK_LAYER_LUNARG_api_dump",
     //"VK_LAYER_LUNARG_vktrace",
@@ -33,7 +32,7 @@
 #endif
 
 // the minimum version of Vulkan supported
-const uint32_t kGrVkMinimumVersion = VK_MAKE_VERSION(1, 0, 3);
+const uint32_t kGrVkMinimumVersion = VK_MAKE_VERSION(1, 0, 8);
 
 // Create the base Vulkan objects needed by the GrVkGpu object
 const GrVkBackendContext* GrVkBackendContext::Create() {
diff --git a/src/gpu/vk/GrVkCommandBuffer.cpp b/src/gpu/vk/GrVkCommandBuffer.cpp
index 2868d8e..94d4c9d 100644
--- a/src/gpu/vk/GrVkCommandBuffer.cpp
+++ b/src/gpu/vk/GrVkCommandBuffer.cpp
@@ -141,6 +141,7 @@
     submitInfo.pNext = nullptr;
     submitInfo.waitSemaphoreCount = 0;
     submitInfo.pWaitSemaphores = nullptr;
+    submitInfo.pWaitDstStageMask = 0;
     submitInfo.commandBufferCount = 1;
     submitInfo.pCommandBuffers = &fCmdBuffer;
     submitInfo.signalSemaphoreCount = 0;
@@ -195,6 +196,11 @@
                                         BarrierType barrierType,
                                         void* barrier) const {
     SkASSERT(fIsActive);
+    // For images we can have barriers inside of render passes but they require us to add more
+    // support in subpasses which need self dependencies to have barriers inside them. Also, we can
+    // never have buffer barriers inside of a render pass. For now we will just assert that we are
+    // not in a render pass.
+    SkASSERT(!fActiveRenderPass);
     VkDependencyFlags dependencyFlags = byRegion ? VK_DEPENDENCY_BY_REGION_BIT : 0;
 
     switch (barrierType) {
@@ -390,7 +396,6 @@
 
 void GrVkCommandBuffer::bindPipeline(const GrVkGpu* gpu, const GrVkPipeline* pipeline) {
     SkASSERT(fIsActive);
-    SkASSERT(fActiveRenderPass);
     GR_VK_CALL(gpu->vkInterface(), CmdBindPipeline(fCmdBuffer,
                                                    VK_PIPELINE_BIND_POINT_GRAPHICS,
                                                    pipeline->pipeline()));
diff --git a/src/gpu/vk/GrVkCommandBuffer.h b/src/gpu/vk/GrVkCommandBuffer.h
index e9e3d76..1b27c55 100644
--- a/src/gpu/vk/GrVkCommandBuffer.h
+++ b/src/gpu/vk/GrVkCommandBuffer.h
@@ -82,6 +82,8 @@
         }
     }
 
+    void bindPipeline(const GrVkGpu* gpu, const GrVkPipeline* pipeline);
+
     void bindDescriptorSets(const GrVkGpu* gpu,
                             GrVkPipelineState*,
                             VkPipelineLayout layout,
@@ -154,9 +156,6 @@
                           int numRects,
                           const VkClearRect* clearRects) const;
 
-    void bindPipeline(const GrVkGpu* gpu, const GrVkPipeline* pipeline);
-
-
     void drawIndexed(const GrVkGpu* gpu,
                      uint32_t indexCount,
                      uint32_t instanceCount,
diff --git a/src/gpu/vk/GrVkGpu.cpp b/src/gpu/vk/GrVkGpu.cpp
index 5fba475..040912e 100644
--- a/src/gpu/vk/GrVkGpu.cpp
+++ b/src/gpu/vk/GrVkGpu.cpp
@@ -619,18 +619,15 @@
 
 void GrVkGpu::bindGeometry(const GrPrimitiveProcessor& primProc,
                            const GrNonInstancedMesh& mesh) {
+    // There is no need to put any memory barriers to make sure host writes have finished here.
+    // When a command buffer is submitted to a queue, there is an implicit memory barrier that
+    // occurs for all host writes. Additionally, BufferMemoryBarriers are not allowed inside of
+    // an active RenderPass.
     GrVkVertexBuffer* vbuf;
     vbuf = (GrVkVertexBuffer*)mesh.vertexBuffer();
     SkASSERT(vbuf);
     SkASSERT(!vbuf->isMapped());
 
-    vbuf->addMemoryBarrier(this,
-                           VK_ACCESS_HOST_WRITE_BIT,
-                           VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
-                           VK_PIPELINE_STAGE_HOST_BIT,
-                           VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
-                           false);
-
     fCurrentCmdBuffer->bindVertexBuffer(this, vbuf);
 
     if (mesh.isIndexed()) {
@@ -638,13 +635,6 @@
         SkASSERT(ibuf);
         SkASSERT(!ibuf->isMapped());
 
-        ibuf->addMemoryBarrier(this,
-                               VK_ACCESS_HOST_WRITE_BIT,
-                               VK_ACCESS_INDEX_READ_BIT,
-                               VK_PIPELINE_STAGE_HOST_BIT,
-                               VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
-                               false);
-
         fCurrentCmdBuffer->bindIndexBuffer(this, ibuf);
     }
 }
@@ -783,6 +773,7 @@
     info->fAlloc = alloc;
     info->fImageTiling = imageTiling;
     info->fImageLayout = initialLayout;
+    info->fFormat = pixelFormat;
 
     return (GrBackendObject)info;
 }
@@ -933,8 +924,7 @@
     VkImageLayout origDstLayout = vkStencil->currentLayout();
     VkAccessFlags srcAccessMask = GrVkMemory::LayoutToSrcAccessMask(origDstLayout);
     VkAccessFlags dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
-    VkPipelineStageFlags srcStageMask =
-        GrVkMemory::LayoutToPipelineStageFlags(origDstLayout);
+    VkPipelineStageFlags srcStageMask = GrVkMemory::LayoutToPipelineStageFlags(origDstLayout);
     VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
     vkStencil->setImageLayout(this,
                               VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
@@ -944,6 +934,21 @@
                               dstStageMask,
                               false);
 
+    // Change layout of our render target so it can be used as the color attachment. This is what
+    // the render pass expects when it begins.
+    VkImageLayout layout = vkRT->currentLayout();
+    srcStageMask = GrVkMemory::LayoutToPipelineStageFlags(layout);
+    dstStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+    srcAccessMask = GrVkMemory::LayoutToSrcAccessMask(layout);
+    dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+    vkRT->setImageLayout(this,
+                         VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+                         srcAccessMask,
+                         dstAccessMask,
+                         srcStageMask,
+                         dstStageMask,
+                         false);
+
     VkClearRect clearRect;
     // Flip rect if necessary
     SkIRect vkRect = rect;
@@ -990,8 +995,7 @@
     if (rect.width() != target->width() || rect.height() != target->height()) {
         VkAccessFlags srcAccessMask = GrVkMemory::LayoutToSrcAccessMask(origDstLayout);
         VkAccessFlags dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
-        VkPipelineStageFlags srcStageMask =
-            GrVkMemory::LayoutToPipelineStageFlags(origDstLayout);
+        VkPipelineStageFlags srcStageMask = GrVkMemory::LayoutToPipelineStageFlags(origDstLayout);
         VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
         vkRT->setImageLayout(this,
                              VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
@@ -1001,6 +1005,25 @@
                              dstStageMask,
                              false);
 
+        // If we are using a stencil attachment we also need to change its layout to what the render
+        // pass is expecting.
+        if (GrStencilAttachment* stencil = vkRT->renderTargetPriv().getStencilAttachment()) {
+            GrVkStencilAttachment* vkStencil = (GrVkStencilAttachment*)stencil;
+            origDstLayout = vkStencil->currentLayout();
+            srcAccessMask = GrVkMemory::LayoutToSrcAccessMask(origDstLayout);
+            dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
+                            VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
+            srcStageMask = GrVkMemory::LayoutToPipelineStageFlags(origDstLayout);
+            dstStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+            vkStencil->setImageLayout(this,
+                                      VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+                                      srcAccessMask,
+                                      dstAccessMask,
+                                      srcStageMask,
+                                      dstStageMask,
+                                      false);
+        }
+
         VkClearRect clearRect;
         // Flip rect if necessary
         SkIRect vkRect = rect;
@@ -1483,7 +1506,6 @@
     const GrVkRenderPass* renderPass = vkRT->simpleRenderPass();
     SkASSERT(renderPass);
 
-    fCurrentCmdBuffer->beginRenderPass(this, renderPass, *vkRT);
 
     GrPrimitiveType primitiveType = meshes[0].primitiveType();
     sk_sp<GrVkPipelineState> pipelineState = this->prepareDrawState(pipeline,
@@ -1496,8 +1518,6 @@
 
     // Change layout of our render target so it can be used as the color attachment
     VkImageLayout layout = vkRT->currentLayout();
-    // Our color attachment is purely a destination and won't be read so don't need to flush or
-    // invalidate any caches
     VkPipelineStageFlags srcStageMask = GrVkMemory::LayoutToPipelineStageFlags(layout);
     VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
     VkAccessFlags srcAccessMask = GrVkMemory::LayoutToSrcAccessMask(layout);
@@ -1511,8 +1531,7 @@
                          false);
 
     // If we are using a stencil attachment we also need to update its layout
-    if (!pipeline.getStencil().isDisabled()) {
-        GrStencilAttachment* stencil = vkRT->renderTargetPriv().getStencilAttachment();
+    if (GrStencilAttachment* stencil = vkRT->renderTargetPriv().getStencilAttachment()) {
         GrVkStencilAttachment* vkStencil = (GrVkStencilAttachment*)stencil;
         VkImageLayout origDstLayout = vkStencil->currentLayout();
         VkAccessFlags srcAccessMask = GrVkMemory::LayoutToSrcAccessMask(origDstLayout);
@@ -1530,12 +1549,9 @@
                                   false);
     }
 
+    fCurrentCmdBuffer->beginRenderPass(this, renderPass, *vkRT);
 
     for (int i = 0; i < meshCount; ++i) {
-        if (GrXferBarrierType barrierType = pipeline.xferBarrierType(*this->caps())) {
-            this->xferBarrier(pipeline.getRenderTarget(), barrierType);
-        }
-
         const GrMesh& mesh = meshes[i];
         GrMesh::Iterator iter;
         const GrNonInstancedMesh* nonIdxMesh = iter.init(mesh);
@@ -1547,6 +1563,10 @@
                 pipelineState->freeTempResources(this);
                 SkDEBUGCODE(pipelineState = nullptr);
                 primitiveType = nonIdxMesh->primitiveType();
+                // It is illegal for us to have the necessary memory barriers for when we write and
+                // update the uniform buffers in prepareDrawState while in an active render pass.
+                // Thus we must end the current one and then start it up again.
+                fCurrentCmdBuffer->endRenderPass(this);
                 pipelineState = this->prepareDrawState(pipeline,
                                                        primProc,
                                                        primitiveType,
@@ -1554,6 +1574,7 @@
                 if (!pipelineState) {
                     return;
                 }
+                fCurrentCmdBuffer->beginRenderPass(this, renderPass, *vkRT);
             }
             SkASSERT(pipelineState);
             this->bindGeometry(primProc, *nonIdxMesh);
diff --git a/src/gpu/vk/GrVkImage.cpp b/src/gpu/vk/GrVkImage.cpp
index 7413630..5102124 100644
--- a/src/gpu/vk/GrVkImage.cpp
+++ b/src/gpu/vk/GrVkImage.cpp
@@ -12,6 +12,19 @@
 
 #define VK_CALL(GPU, X) GR_VK_CALL(GPU->vkInterface(), X)
 
+VkImageAspectFlags vk_format_to_aspect_flags(VkFormat format) {
+    switch (format) {
+        case VK_FORMAT_S8_UINT:
+            return VK_IMAGE_ASPECT_STENCIL_BIT;
+        case VK_FORMAT_D24_UNORM_S8_UINT: // fallthrough
+        case VK_FORMAT_D32_SFLOAT_S8_UINT:
+            return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
+        default:
+            SkASSERT(GrVkFormatToPixelConfig(format, nullptr));
+            return VK_IMAGE_ASPECT_COLOR_BIT;
+    }
+}
+
 void GrVkImage::setImageLayout(const GrVkGpu* gpu, VkImageLayout newLayout,
                                VkAccessFlags srcAccessMask,
                                VkAccessFlags dstAccessMask,
@@ -24,7 +37,7 @@
     if (newLayout == fCurrentLayout) {
         return;
     }
-
+    VkImageAspectFlags aspectFlags = vk_format_to_aspect_flags(fResource->fFormat);
     VkImageMemoryBarrier imageMemoryBarrier = {
         VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,   // sType
         NULL,                                     // pNext
@@ -35,7 +48,7 @@
         VK_QUEUE_FAMILY_IGNORED,                  // srcQueueFamilyIndex
         VK_QUEUE_FAMILY_IGNORED,                  // dstQueueFamilyIndex
         fResource->fImage,                        // image
-        { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 } // subresourceRange
+        { aspectFlags, 0, 1, 0, 1 }               // subresourceRange
     };
 
     // TODO: restrict to area of image we're interested in
@@ -91,7 +104,7 @@
         (VK_IMAGE_TILING_LINEAR == imageDesc.fImageTiling) ? Resource::kLinearTiling_Flag
                                                            : Resource::kNo_Flags;
 
-    return (new GrVkImage::Resource(image, alloc, flags));
+    return (new GrVkImage::Resource(image, alloc, flags, imageDesc.fFormat));
 }
 
 GrVkImage::~GrVkImage() {
diff --git a/src/gpu/vk/GrVkImage.h b/src/gpu/vk/GrVkImage.h
index 13c6e36..5b3b527 100644
--- a/src/gpu/vk/GrVkImage.h
+++ b/src/gpu/vk/GrVkImage.h
@@ -28,11 +28,17 @@
         VkImage        fImage;
         VkDeviceMemory fAlloc;
         Flags          fFlags;
+        VkFormat       fFormat;
 
-        Resource() : INHERITED(), fImage(VK_NULL_HANDLE), fAlloc(VK_NULL_HANDLE), fFlags(kNo_Flags) {}
+        Resource()
+            : INHERITED()
+            , fImage(VK_NULL_HANDLE)
+            , fAlloc(VK_NULL_HANDLE)
+            , fFlags(kNo_Flags)
+            , fFormat(VK_FORMAT_UNDEFINED) {}
 
-        Resource(VkImage image, VkDeviceMemory alloc, Flags flags)
-            : fImage(image), fAlloc(alloc), fFlags(flags) {}
+        Resource(VkImage image, VkDeviceMemory alloc, Flags flags, VkFormat format)
+            : fImage(image), fAlloc(alloc), fFlags(flags), fFormat(format) {}
 
         ~Resource() override {}
     private:
@@ -44,8 +50,8 @@
     // for wrapped textures
     class BorrowedResource : public Resource {
     public:
-        BorrowedResource(VkImage image, VkDeviceMemory alloc, Flags flags)
-            : Resource(image, alloc, flags) {}
+        BorrowedResource(VkImage image, VkDeviceMemory alloc, Flags flags, VkFormat format)
+            : Resource(image, alloc, flags, format) {}
     private:
         void freeGPUData(const GrVkGpu* gpu) const override;
     };
@@ -70,7 +76,8 @@
 
     VkImageLayout currentLayout() const { return fCurrentLayout; }
 
-    void setImageLayout(const GrVkGpu* gpu, VkImageLayout newLayout,
+    void setImageLayout(const GrVkGpu* gpu,
+                        VkImageLayout newLayout,
                         VkAccessFlags srcAccessMask,
                         VkAccessFlags dstAccessMask,
                         VkPipelineStageFlags srcStageMask,
diff --git a/src/gpu/vk/GrVkPipelineState.cpp b/src/gpu/vk/GrVkPipelineState.cpp
index e464848..0194e29 100644
--- a/src/gpu/vk/GrVkPipelineState.cpp
+++ b/src/gpu/vk/GrVkPipelineState.cpp
@@ -408,30 +408,29 @@
 void GrVkPipelineState::DescriptorPoolManager::getNewPool(GrVkGpu* gpu) {
     if (fPool) {
         fPool->unref(gpu);
-        SkASSERT(fMaxDescriptorSets < (SK_MaxU32 >> 1));
-        if (fMaxDescriptorSets < kMaxDescSetLimit >> 1) {
-            fMaxDescriptorSets = fMaxDescriptorSets << 1;
+        if (fMaxDescriptors < kMaxDescLimit >> 1) {
+            fMaxDescriptors = fMaxDescriptors << 1;
         } else {
-            fMaxDescriptorSets = kMaxDescSetLimit;
+            fMaxDescriptors = kMaxDescLimit;
         }
 
     }
-    if (fMaxDescriptorSets) {
+    if (fMaxDescriptors) {
         fPool = gpu->resourceProvider().findOrCreateCompatibleDescriptorPool(fDescType,
-                                                                             fMaxDescriptorSets);
+                                                                             fMaxDescriptors);
     }
-    SkASSERT(fPool || !fMaxDescriptorSets);
+    SkASSERT(fPool || !fMaxDescriptors);
 }
 
 void GrVkPipelineState::DescriptorPoolManager::getNewDescriptorSet(GrVkGpu* gpu, VkDescriptorSet* ds) {
-    if (!fMaxDescriptorSets) {
+    if (!fMaxDescriptors) {
         return;
     }
-    if (fCurrentDescriptorSet == fMaxDescriptorSets) {
+    if (fCurrentDescriptorCount == fMaxDescriptors) {
         this->getNewPool(gpu);
-        fCurrentDescriptorSet = 0;
+        fCurrentDescriptorCount = 0;
     }
-    fCurrentDescriptorSet++;
+    fCurrentDescriptorCount += fDescCountPerSet;
 
     VkDescriptorSetAllocateInfo dsAllocateInfo;
     memset(&dsAllocateInfo, 0, sizeof(VkDescriptorSetAllocateInfo));
diff --git a/src/gpu/vk/GrVkPipelineState.h b/src/gpu/vk/GrVkPipelineState.h
index 2bfc16c..ad3afa7 100644
--- a/src/gpu/vk/GrVkPipelineState.h
+++ b/src/gpu/vk/GrVkPipelineState.h
@@ -166,10 +166,11 @@
                               uint32_t descCount, GrVkGpu* gpu)
             : fDescLayout(layout)
             , fDescType(type)
-            , fCurrentDescriptorSet(0)
+            , fDescCountPerSet(descCount)
+            , fCurrentDescriptorCount(0)
             , fPool(nullptr) {
-            SkASSERT(descCount < (kMaxDescSetLimit >> 2));
-            fMaxDescriptorSets = descCount << 2;
+            SkASSERT(descCount < kMaxDescLimit >> 2);
+            fMaxDescriptors = fDescCountPerSet << 2;
             this->getNewPool(gpu);
         }
 
@@ -185,12 +186,13 @@
 
         VkDescriptorSetLayout  fDescLayout;
         VkDescriptorType       fDescType;
-        uint32_t               fMaxDescriptorSets;
-        uint32_t               fCurrentDescriptorSet;
+        uint32_t               fDescCountPerSet;
+        uint32_t               fMaxDescriptors;
+        uint32_t               fCurrentDescriptorCount;
         GrVkDescriptorPool*    fPool;
 
     private:
-        static const uint32_t kMaxDescSetLimit = 1 << 10;
+        static const uint32_t kMaxDescLimit = 1 << 10;
 
         void getNewPool(GrVkGpu* gpu);
     };
diff --git a/src/gpu/vk/GrVkRenderTarget.cpp b/src/gpu/vk/GrVkRenderTarget.cpp
index 486f2f5..4d84455 100644
--- a/src/gpu/vk/GrVkRenderTarget.cpp
+++ b/src/gpu/vk/GrVkRenderTarget.cpp
@@ -216,9 +216,12 @@
 
     const GrVkImage::Resource* imageResource;
     if (kBorrowed_LifeCycle == lifeCycle) {
-        imageResource = new GrVkImage::BorrowedResource(info->fImage, info->fAlloc, flags);
+        imageResource = new GrVkImage::BorrowedResource(info->fImage,
+                                                        info->fAlloc,
+                                                        flags,
+                                                        info->fFormat);
     } else {
-        imageResource = new GrVkImage::Resource(info->fImage, info->fAlloc, flags);
+        imageResource = new GrVkImage::Resource(info->fImage, info->fAlloc, flags, info->fFormat);
     }
     if (!imageResource) {
         return nullptr;
diff --git a/src/gpu/vk/GrVkStencilAttachment.cpp b/src/gpu/vk/GrVkStencilAttachment.cpp
index 97597fc..a3d3af3 100644
--- a/src/gpu/vk/GrVkStencilAttachment.cpp
+++ b/src/gpu/vk/GrVkStencilAttachment.cpp
@@ -42,7 +42,8 @@
     imageDesc.fLevels = 1;
     imageDesc.fSamples = sampleCnt;
     imageDesc.fImageTiling = VK_IMAGE_TILING_OPTIMAL;
-    imageDesc.fUsageFlags = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
+    imageDesc.fUsageFlags = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT |
+                            VK_IMAGE_USAGE_TRANSFER_DST_BIT;
     imageDesc.fMemProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
 
     const GrVkImage::Resource* imageResource = GrVkImage::CreateResource(gpu, imageDesc);
diff --git a/src/gpu/vk/GrVkTexture.cpp b/src/gpu/vk/GrVkTexture.cpp
index b297b60..8dcc368 100644
--- a/src/gpu/vk/GrVkTexture.cpp
+++ b/src/gpu/vk/GrVkTexture.cpp
@@ -87,9 +87,12 @@
 
     const GrVkImage::Resource* imageResource;
     if (kBorrowed_LifeCycle == lifeCycle) {
-        imageResource = new GrVkImage::BorrowedResource(info->fImage, info->fAlloc, flags);
+        imageResource = new GrVkImage::BorrowedResource(info->fImage,
+                                                        info->fAlloc,
+                                                        flags,
+                                                        info->fFormat);
     } else {
-        imageResource = new GrVkImage::Resource(info->fImage, info->fAlloc, flags);
+        imageResource = new GrVkImage::Resource(info->fImage, info->fAlloc, flags, info->fFormat);
     }
     if (!imageResource) {
         return nullptr;
diff --git a/src/gpu/vk/GrVkTextureRenderTarget.cpp b/src/gpu/vk/GrVkTextureRenderTarget.cpp
index bd028d2..7c085a3 100644
--- a/src/gpu/vk/GrVkTextureRenderTarget.cpp
+++ b/src/gpu/vk/GrVkTextureRenderTarget.cpp
@@ -152,9 +152,12 @@
 
     const GrVkImage::Resource* imageResource;
     if (kBorrowed_LifeCycle == lifeCycle) {
-        imageResource = new GrVkImage::BorrowedResource(info->fImage, info->fAlloc, flags);
+        imageResource = new GrVkImage::BorrowedResource(info->fImage,
+                                                        info->fAlloc,
+                                                        flags,
+                                                        info->fFormat);
     } else {
-        imageResource = new GrVkImage::Resource(info->fImage, info->fAlloc, flags);
+        imageResource = new GrVkImage::Resource(info->fImage, info->fAlloc, flags, info->fFormat);
     }
     if (!imageResource) {
         return nullptr;
diff --git a/tools/vulkan/VulkanTestContext.cpp b/tools/vulkan/VulkanTestContext.cpp
index b940656..03b7407 100644
--- a/tools/vulkan/VulkanTestContext.cpp
+++ b/tools/vulkan/VulkanTestContext.cpp
@@ -240,14 +240,14 @@
                                                                    nullptr));
     }
 
-    GrVkFormatToPixelConfig(swapchainCreateInfo.imageFormat, &fPixelConfig);
-
-    this->createBuffers();
+    this->createBuffers(swapchainCreateInfo.imageFormat);
 
     return true;
 }
 
-void VulkanTestContext::createBuffers() {
+void VulkanTestContext::createBuffers(VkFormat format) {
+    GrVkFormatToPixelConfig(format, &fPixelConfig);
+
     GR_VK_CALL_ERRCHECK(fBackendContext->fInterface, GetSwapchainImagesKHR(fBackendContext->fDevice,
                                                                            fSwapchain,
                                                                            &fImageCount,
@@ -271,6 +271,7 @@
         info.fAlloc = nullptr;
         info.fImageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
         info.fImageTiling = VK_IMAGE_TILING_OPTIMAL;
+        info.fFormat = format;
         desc.fWidth = fWidth;
         desc.fHeight = fHeight;
         desc.fConfig = fPixelConfig;
@@ -511,13 +512,14 @@
     GR_VK_CALL_ERRCHECK(fBackendContext->fInterface,
                         EndCommandBuffer(backbuffer->fTransitionCmdBuffers[0]));  
 
+    VkPipelineStageFlags waitDstStageFlags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
     // insert the layout transfer into the queue and wait on the acquire
     VkSubmitInfo submitInfo;
     memset(&submitInfo, 0, sizeof(VkSubmitInfo));
     submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
     submitInfo.waitSemaphoreCount = 1;
     submitInfo.pWaitSemaphores = &backbuffer->fAcquireSemaphore;
-    submitInfo.pWaitDstStageMask = 0;
+    submitInfo.pWaitDstStageMask = &waitDstStageFlags;
     submitInfo.commandBufferCount = 1;
     submitInfo.pCommandBuffers = &backbuffer->fTransitionCmdBuffers[0];
     submitInfo.signalSemaphoreCount = 0;
diff --git a/tools/vulkan/VulkanTestContext.h b/tools/vulkan/VulkanTestContext.h
index b300a57..66171af 100644
--- a/tools/vulkan/VulkanTestContext.h
+++ b/tools/vulkan/VulkanTestContext.h
@@ -64,7 +64,7 @@
 
     BackbufferInfo* getAvailableBackbuffer();
     bool createSwapchain(uint32_t width, uint32_t height);
-    void createBuffers();
+    void createBuffers(VkFormat format);
     void destroyBuffers();
 
     SkAutoTUnref<const GrVkBackendContext> fBackendContext;